├── .gitattributes
├── .github
├── FUNDING.yml
└── workflows
│ └── swift.yml
├── .gitignore
├── .swiftpm
└── xcode
│ └── package.xcworkspace
│ └── contents.xcworkspacedata
├── .travis.yml
├── CHANGELOG.md
├── Example
├── .gitignore
├── Example.xcodeproj
│ ├── project.pbxproj
│ └── project.xcworkspace
│ │ └── contents.xcworkspacedata
├── Example.xcworkspace
│ └── contents.xcworkspacedata
├── Example
│ ├── AppDelegate.swift
│ ├── Assets.xcassets
│ │ └── AppIcon.appiconset
│ │ │ └── Contents.json
│ ├── Base.lproj
│ │ ├── LaunchScreen.storyboard
│ │ └── Main.storyboard
│ ├── Info.plist
│ ├── QueryViewController.swift
│ └── ViewController.swift
├── Podfile
├── img1.png
└── img2.png
├── LICENSE
├── Package.swift
├── README.md
├── Resources
├── Info.plist
├── InfoMac.plist
├── InfoWatchOS.plist
└── InfotvOS.plist
├── Sources
├── ArrayExt.swift
├── Attribute.swift
├── Attributes.swift
├── BinarySearch.swift
├── BooleanAttribute.swift
├── CharacterExt.swift
├── CharacterReader.swift
├── Cleaner.swift
├── Collector.swift
├── CombiningEvaluator.swift
├── Comment.swift
├── Connection.swift
├── CssSelector.swift
├── DataNode.swift
├── DataUtil.swift
├── Document.swift
├── DocumentType.swift
├── Element.swift
├── Elements.swift
├── Entities.swift
├── Evaluator.swift
├── Exception.swift
├── FormElement.swift
├── HtmlTreeBuilder.swift
├── HtmlTreeBuilderState.swift
├── HttpStatusException.swift
├── Mutex.swift
├── Node.swift
├── NodeTraversor.swift
├── NodeVisitor.swift
├── OrderedSet.swift
├── ParseError.swift
├── ParseErrorList.swift
├── ParseSettings.swift
├── Parser.swift
├── ParsingStrings.swift
├── Pattern.swift
├── QueryParser.swift
├── SerializationException.swift
├── SimpleDictionary.swift
├── StreamReader.swift
├── String.swift
├── StringBuilder.swift
├── StringUtil.swift
├── StructuralEvaluator.swift
├── SwiftSoup.h
├── SwiftSoup.swift
├── Tag.swift
├── TextNode.swift
├── Token.swift
├── TokenQueue.swift
├── Tokeniser.swift
├── TokeniserState.swift
├── TreeBuilder.swift
├── UTF8Arrays.swift
├── UnfairLock.swift
├── UnicodeScalar.swift
├── Validate.swift
├── Whitelist.swift
├── XmlDeclaration.swift
└── XmlTreeBuilder.swift
├── SwiftSoup.podspec
├── SwiftSoup.xcodeproj
├── project.pbxproj
├── project.xcworkspace
│ ├── contents.xcworkspacedata
│ └── xcshareddata
│ │ └── IDEWorkspaceChecks.plist
└── xcshareddata
│ ├── xcbaselines
│ └── 8CE4181E1DAA54A900240B42.xcbaseline
│ │ ├── 9F80FB9B-4E37-45A8-BFE8-9AF36737A6F3.plist
│ │ ├── F9553B46-8F24-4C2B-8A1E-8CC5535D12E1.plist
│ │ └── Info.plist
│ └── xcschemes
│ ├── PerformanceTest.xcscheme
│ ├── SwiftSoup-Package.xcscheme
│ ├── SwiftSoup-iOS.xcscheme
│ ├── SwiftSoup-macOS.xcscheme
│ ├── SwiftSoup-tvOS.xcscheme
│ └── SwiftSoup-watchOS.xcscheme
├── Tests-macOS
├── Info.plist
├── ParserBenchmark.swift
└── corpus
│ ├── Amazon.html
│ ├── GitHub.html
│ ├── Google.html
│ ├── Reuters.html
│ ├── Wikipedia.html
│ └── Wirecutter.html
├── Tests
├── Info.plist
├── LinuxMain.swift
└── SwiftSoupTests
│ ├── AttributeParseTest.swift
│ ├── AttributeTest.swift
│ ├── AttributesTest.swift
│ ├── BuildEntities.swift
│ ├── CharacterReaderTest.swift
│ ├── CleanerTest.swift
│ ├── CssTest.swift
│ ├── DocumentTest.swift
│ ├── DocumentTypeTest.swift
│ ├── ElementTest.swift
│ ├── ElementsTest.swift
│ ├── EntitiesTest.swift
│ ├── FormElementTest.swift
│ ├── HtmlParserTest.swift
│ ├── NodeTest.swift
│ ├── NodeTraversorTest.swift
│ ├── ParseSettingsTest.swift
│ ├── QueryParserTest.swift
│ ├── SelectorTest.swift
│ ├── StringUtilTest.swift
│ ├── TagTest.swift
│ ├── TextNodeTest.swift
│ ├── TextUtil.swift
│ ├── TokenQueueTest.swift
│ └── XmlTreeBuilderTest.swift
├── _config.yml
└── swiftsoup.png
/.gitattributes:
--------------------------------------------------------------------------------
1 | *.swift linguist-detectable=true
2 | *.js linguist-detectable=false
3 | *.html linguist-detectable=false
4 | *.xml linguist-detectable=false
5 |
--------------------------------------------------------------------------------
/.github/FUNDING.yml:
--------------------------------------------------------------------------------
1 | # These are supported funding model platforms
2 |
3 | github: [scinfu]# Replace with up to 4 GitHub Sponsors-enabled usernames e.g., [user1, user2]
4 | patreon: # Replace with a single Patreon username
5 | open_collective: # Replace with a single Open Collective username
6 | ko_fi: # Replace with a single Ko-fi username
7 | tidelift: # Replace with a single Tidelift platform-name/package-name e.g., npm/babel
8 | community_bridge: # Replace with a single Community Bridge project-name e.g., cloud-foundry
9 | liberapay: # Replace with a single Liberapay username
10 | issuehunt: # Replace with a single IssueHunt username
11 | otechie: # Replace with a single Otechie username
12 | custom: # Replace with up to 4 custom sponsorship URLs e.g., ['link1', 'link2']
13 |
--------------------------------------------------------------------------------
/.github/workflows/swift.yml:
--------------------------------------------------------------------------------
1 | name: Swift
2 |
3 |
4 |
5 | on:
6 | push:
7 | branches: [ "master" ]
8 | pull_request:
9 | branches: [ "master" ]
10 |
11 | jobs:
12 | build:
13 | name: Swift ${{ matrix.swift }} on ${{ matrix.os }}
14 | strategy:
15 | matrix:
16 | os: [ubuntu-22.04, macos-latest]
17 | swift: ["5", "5.9"]
18 |
19 | runs-on: ${{ matrix.os }}
20 |
21 | steps:
22 | - uses: swift-actions/setup-swift@v2.2.0
23 | with:
24 | swift-version: ${{ matrix.swift }}
25 | - uses: actions/checkout@v4
26 | - name: Build
27 | run: swift build -v
28 | - name: Run tests
29 | run: swift test -v
30 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # OS X
2 | .DS_Store
3 |
4 | # Swift Package Manager
5 | .build
6 |
7 | # Xcode user settings
8 | xcuserdata/
9 |
--------------------------------------------------------------------------------
/.swiftpm/xcode/package.xcworkspace/contents.xcworkspacedata:
--------------------------------------------------------------------------------
1 |
2 |
4 |
6 |
7 |
8 |
--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
1 | # Travis CI
2 |
3 | # https://swift.org/download/#releases
4 | # whitelist (branches that should be built)
5 | branches:
6 | only:
7 | - master
8 | - develop
9 |
10 | # the matrix of builds should cover each combination of Swift version
11 | # and platform that is supported. The version of Swift used is specified
12 | # by .swift-version, unless SWIFT_SNAPSHOT is specified.
13 | matrix:
14 | include:
15 | # Continue to test one permutation on Trusty (14.04)
16 | - os: linux
17 | dist: xenial
18 | sudo: required
19 | services: docker
20 | env: DOCKER_IMAGE=swift:5.0.3-xenial SWIFT_SNAPSHOT=5.0.3 SWIFT_TEST_ARGS="--parallel"
21 | - os: linux
22 | dist: xenial
23 | sudo: required
24 | services: docker
25 | env: DOCKER_IMAGE=swift:5.1.5-xenial SWIFT_SNAPSHOT=5.1.5 SWIFT_TEST_ARGS="--parallel"
26 | - os: osx
27 | osx_image: xcode10.2
28 | sudo: required
29 | env: SWIFT_SNAPSHOT=5.0.1 SWIFT_TEST_ARGS="--parallel"
30 | - os: osx
31 | osx_image: xcode11.3
32 | sudo: required
33 | env: SWIFT_SNAPSHOT=5.1.3 SWIFT_TEST_ARGS="--parallel"
34 | - os: osx
35 | osx_image: xcode13.3
36 | sudo: required
37 | env: SWIFT_SNAPSHOT=$SWIFT_DEVELOPMENT_SNAPSHOT SWIFT_TEST_ARGS="--parallel"
38 |
39 | before_install:
40 | - git clone https://github.com/IBM-Swift/Package-Builder.git
41 |
42 | script:
43 | - ./Package-Builder/build-package.sh -projectDir $TRAVIS_BUILD_DIR
44 |
--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
1 | # Change Log
2 |
3 | All notable changes to this project will be documented in this file.
4 |
5 | ## [2.3.2](https://github.com/scinfu/SwiftSoup/tree/2.3.2)
6 | * Renamed Selector Class to CssSelector
7 |
8 | ## [1.7.4](https://github.com/scinfu/SwiftSoup/tree/1.7.4)
9 | * Removed Some warnings
10 | * Swift 4.2
11 |
12 | ## [1.7.1](https://github.com/scinfu/SwiftSoup/tree/1.7.1)
13 | * Backward compatibility for Swift < 4.1
14 |
15 | ## [1.7.0](https://github.com/scinfu/SwiftSoup/tree/1.7.0)
16 | * Removed StringBuilder from Element.cssSelector
17 | * Lint Code
18 | * Swift 4.1
19 |
20 | ## [1.6.5](https://github.com/scinfu/SwiftSoup/tree/1.6.5)
21 | * Removed StringBuilder from Element.cssSelector
22 | * Lint Code
23 |
24 |
25 | ## [1.6.4](https://github.com/scinfu/SwiftSoup/tree/1.6.4)
26 | * Add newer simulators to targeted devices to build with Carthage [tvOS]
27 |
28 | ## [1.6.3](https://github.com/scinfu/SwiftSoup/tree/1.6.3)
29 |
30 | * Add newer tvOS simulators to targeted devices to build with Carthage.
31 | * Add newer watchOS simulators to targeted devices to build with Carthage.
32 |
--------------------------------------------------------------------------------
/Example/.gitignore:
--------------------------------------------------------------------------------
1 |
2 | # Created by https://www.gitignore.io/api/xcode
3 |
4 | ### Xcode ###
5 | # Xcode
6 | #
7 | # gitignore contributors: remember to update Global/Xcode.gitignore, Objective-C.gitignore & Swift.gitignore
8 |
9 | ## User settings
10 | xcuserdata/
11 |
12 | ## compatibility with Xcode 8 and earlier (ignoring not required starting Xcode 9)
13 | *.xcscmblueprint
14 | *.xccheckout
15 |
16 | ## compatibility with Xcode 3 and earlier (ignoring not required starting Xcode 4)
17 | build/
18 | DerivedData/
19 | *.moved-aside
20 | *.pbxuser
21 | !default.pbxuser
22 | *.mode1v3
23 | !default.mode1v3
24 | *.mode2v3
25 | !default.mode2v3
26 | *.perspectivev3
27 | !default.perspectivev3
28 |
29 | ### Xcode Patch ###
30 | *.xcodeproj/*
31 | !*.xcodeproj/project.pbxproj
32 | !*.xcodeproj/xcshareddata/
33 | !*.xcworkspace/contents.xcworkspacedata
34 | /*.gcno
35 |
36 | # End of https://www.gitignore.io/api/xcode
37 | Pods
38 | Podfile.lock
--------------------------------------------------------------------------------
/Example/Example.xcodeproj/project.xcworkspace/contents.xcworkspacedata:
--------------------------------------------------------------------------------
1 |
2 |
4 |
6 |
7 |
8 |
--------------------------------------------------------------------------------
/Example/Example.xcworkspace/contents.xcworkspacedata:
--------------------------------------------------------------------------------
1 |
2 |
4 |
6 |
7 |
9 |
10 |
11 |
--------------------------------------------------------------------------------
/Example/Example/AppDelegate.swift:
--------------------------------------------------------------------------------
1 | //
2 | // AppDelegate.swift
3 | // Example
4 | //
5 | // Created by Nabil on 05/10/17.
6 | // Copyright © 2017 Nabil. All rights reserved.
7 | //
8 |
9 | import UIKit
10 |
11 | @UIApplicationMain
12 | class AppDelegate: UIResponder, UIApplicationDelegate {
13 |
14 | var window: UIWindow?
15 |
16 | func application(_ application: UIApplication, didFinishLaunchingWithOptions launchOptions: [UIApplication.LaunchOptionsKey: Any]?) -> Bool {
17 | // Override point for customization after application launch.
18 | return true
19 | }
20 |
21 | func applicationWillResignActive(_ application: UIApplication) {
22 | // Sent when the application is about to move from active to inactive state. This can occur for certain types of temporary interruptions (such as an incoming phone call or SMS message) or when the user quits the application and it begins the transition to the background state.
23 | // Use this method to pause ongoing tasks, disable timers, and invalidate graphics rendering callbacks. Games should use this method to pause the game.
24 | }
25 |
26 | func applicationDidEnterBackground(_ application: UIApplication) {
27 | // Use this method to release shared resources, save user data, invalidate timers, and store enough application state information to restore your application to its current state in case it is terminated later.
28 | // If your application supports background execution, this method is called instead of applicationWillTerminate: when the user quits.
29 | }
30 |
31 | func applicationWillEnterForeground(_ application: UIApplication) {
32 | // Called as part of the transition from the background to the active state; here you can undo many of the changes made on entering the background.
33 | }
34 |
35 | func applicationDidBecomeActive(_ application: UIApplication) {
36 | // Restart any tasks that were paused (or not yet started) while the application was inactive. If the application was previously in the background, optionally refresh the user interface.
37 | }
38 |
39 | func applicationWillTerminate(_ application: UIApplication) {
40 | // Called when the application is about to terminate. Save data if appropriate. See also applicationDidEnterBackground:.
41 | }
42 |
43 | }
44 |
--------------------------------------------------------------------------------
/Example/Example/Assets.xcassets/AppIcon.appiconset/Contents.json:
--------------------------------------------------------------------------------
1 | {
2 | "images" : [
3 | {
4 | "idiom" : "iphone",
5 | "size" : "20x20",
6 | "scale" : "2x"
7 | },
8 | {
9 | "idiom" : "iphone",
10 | "size" : "20x20",
11 | "scale" : "3x"
12 | },
13 | {
14 | "idiom" : "iphone",
15 | "size" : "29x29",
16 | "scale" : "2x"
17 | },
18 | {
19 | "idiom" : "iphone",
20 | "size" : "29x29",
21 | "scale" : "3x"
22 | },
23 | {
24 | "idiom" : "iphone",
25 | "size" : "40x40",
26 | "scale" : "2x"
27 | },
28 | {
29 | "idiom" : "iphone",
30 | "size" : "40x40",
31 | "scale" : "3x"
32 | },
33 | {
34 | "idiom" : "iphone",
35 | "size" : "60x60",
36 | "scale" : "2x"
37 | },
38 | {
39 | "idiom" : "iphone",
40 | "size" : "60x60",
41 | "scale" : "3x"
42 | },
43 | {
44 | "idiom" : "ipad",
45 | "size" : "20x20",
46 | "scale" : "1x"
47 | },
48 | {
49 | "idiom" : "ipad",
50 | "size" : "20x20",
51 | "scale" : "2x"
52 | },
53 | {
54 | "idiom" : "ipad",
55 | "size" : "29x29",
56 | "scale" : "1x"
57 | },
58 | {
59 | "idiom" : "ipad",
60 | "size" : "29x29",
61 | "scale" : "2x"
62 | },
63 | {
64 | "idiom" : "ipad",
65 | "size" : "40x40",
66 | "scale" : "1x"
67 | },
68 | {
69 | "idiom" : "ipad",
70 | "size" : "40x40",
71 | "scale" : "2x"
72 | },
73 | {
74 | "idiom" : "ipad",
75 | "size" : "76x76",
76 | "scale" : "1x"
77 | },
78 | {
79 | "idiom" : "ipad",
80 | "size" : "76x76",
81 | "scale" : "2x"
82 | },
83 | {
84 | "idiom" : "ipad",
85 | "size" : "83.5x83.5",
86 | "scale" : "2x"
87 | },
88 | {
89 | "idiom" : "ios-marketing",
90 | "size" : "1024x1024",
91 | "scale" : "1x"
92 | }
93 | ],
94 | "info" : {
95 | "version" : 1,
96 | "author" : "xcode"
97 | }
98 | }
--------------------------------------------------------------------------------
/Example/Example/Base.lproj/LaunchScreen.storyboard:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
--------------------------------------------------------------------------------
/Example/Example/Info.plist:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | CFBundleDevelopmentRegion
6 | $(DEVELOPMENT_LANGUAGE)
7 | CFBundleExecutable
8 | $(EXECUTABLE_NAME)
9 | CFBundleIdentifier
10 | $(PRODUCT_BUNDLE_IDENTIFIER)
11 | CFBundleInfoDictionaryVersion
12 | 6.0
13 | CFBundleName
14 | $(PRODUCT_NAME)
15 | CFBundlePackageType
16 | APPL
17 | CFBundleShortVersionString
18 | 1.0
19 | CFBundleVersion
20 | 1
21 | LSRequiresIPhoneOS
22 |
23 | NSAppTransportSecurity
24 |
25 | NSAllowsArbitraryLoads
26 |
27 |
28 | UILaunchStoryboardName
29 | LaunchScreen
30 | UIMainStoryboardFile
31 | Main
32 | UIRequiredDeviceCapabilities
33 |
34 | armv7
35 |
36 | UISupportedInterfaceOrientations
37 |
38 | UIInterfaceOrientationPortrait
39 | UIInterfaceOrientationLandscapeLeft
40 | UIInterfaceOrientationLandscapeRight
41 |
42 | UISupportedInterfaceOrientations~ipad
43 |
44 | UIInterfaceOrientationPortrait
45 | UIInterfaceOrientationPortraitUpsideDown
46 | UIInterfaceOrientationLandscapeLeft
47 | UIInterfaceOrientationLandscapeRight
48 |
49 |
50 |
51 |
--------------------------------------------------------------------------------
/Example/Example/QueryViewController.swift:
--------------------------------------------------------------------------------
1 | //
2 | // QueryViewController.swift
3 | // Example
4 | //
5 | // Created by Nabil on 02/03/18.
6 | // Copyright © 2018 Nabil. All rights reserved.
7 | //
8 |
9 | import UIKit
10 |
11 | class QueryViewControllerCell: UITableViewCell {
12 | @IBOutlet weak var selector: UILabel!
13 | @IBOutlet weak var example: UILabel!
14 | @IBOutlet weak var descriptionLabel: UILabel!
15 |
16 | }
17 |
18 | class QueryViewController: UIViewController {
19 |
20 | typealias Item = (selector: String, example: String, description: String)
21 |
22 | //example items
23 | let items: [
24 | Item] = [ Item(selector: "*", example: "*", description: "any element"),
25 | Item(selector: "#id", example: "#pageFooter", description: "elements with attribute ID of \"pageFooter\""),
26 | Item(selector: ".class", example: ".login_form_label_field", description: "Selects all elements with class=\"login_form_label_field\""),
27 | Item(selector: "element", example: "p", description: "Selects all
elements"),
28 | Item(selector: "element", example: "div", description: "Selects all
elements"),
29 | Item(selector: "element,element", example: "div, p", description: "Selects all
elements and all
elements"),
30 | Item(selector: "element element", example: "div p", description: "Selects all
elements inside
elements"),
31 | Item(selector: "element>element", example: "div > p", description: "Selects all
elements where the parent is a
element"),
32 | Item(selector: "[attribute]", example: "[title]", description: "Selects all elements with a \"title\" attribute"),
33 | Item(selector: "[^attrPrefix]", example: "[^cell]", description: "elements with an attribute name starting with \"cell\". Use to find elements with HTML5 datasets"),
34 | Item(selector: "[attribute=value]", example: "[id=pageTitle]", description: "Selects all elements with id=\"pageTitle\""),
35 | Item(selector: "[attribute^=value]", example: "a[href^=https]", description: "Selects every
element whose href attribute value begins with \"https\""),
36 | Item(selector: "[attribute$=value]", example: "a[href$=.com/]", description: "Selects every element whose href attribute value ends with \".com/\""),
37 | Item(selector: "[attribute*=value]", example: "a[href*=login]", description: "Selects every element whose href attribute value contains the substring \"login\""),
38 | Item(selector: "[attr~=regex]", example: "img[src~=[gif]]", description: "elements with an attribute named \"img\", and value matching the regular expression")
39 | ]
40 |
41 | var completionHandler: (Item) -> Void = { arg in }
42 | @IBOutlet weak var tableView: UITableView!
43 |
44 | override func viewDidLoad() {
45 | super.viewDidLoad()
46 | self.title = ""
47 |
48 | self.tableView.rowHeight = UITableView.automaticDimension
49 | self.tableView.estimatedRowHeight = UITableView.automaticDimension
50 | }
51 |
52 | }
53 |
54 | extension QueryViewController: UITableViewDataSource {
55 | func numberOfSections(in tableView: UITableView) -> Int {
56 | return 1
57 | }
58 |
59 | public func tableView(_ tableView: UITableView, numberOfRowsInSection section: Int) -> Int {
60 | return items.count
61 | }
62 |
63 | public func tableView(_ tableView: UITableView, cellForRowAt indexPath: IndexPath) -> UITableViewCell {
64 | let cell = tableView.dequeueReusableCell(withIdentifier: "QueryViewControllerCell", for: indexPath) as! QueryViewControllerCell
65 |
66 | cell.selector.text = items[indexPath.row].selector
67 | cell.example.text = items[indexPath.row].example
68 | cell.descriptionLabel.text = items[indexPath.row].description
69 |
70 | let color1 = UIColor.init(red: 245.0/255, green: 245.0/255, blue: 245.0/255, alpha: 1)
71 | let color2 = UIColor.init(red: 240.0/255, green: 240.0/255, blue: 240.0/255, alpha: 1)
72 | cell.backgroundColor = (indexPath.row % 2) == 0 ? color1 : color2
73 |
74 | return cell
75 | }
76 | }
77 |
78 | extension QueryViewController: UITableViewDelegate {
79 | func tableView(_ tableView: UITableView, didSelectRowAt indexPath: IndexPath) {
80 | // user select an item
81 | completionHandler(items[indexPath.row])
82 | }
83 | }
84 |
--------------------------------------------------------------------------------
/Example/Example/ViewController.swift:
--------------------------------------------------------------------------------
1 | //
2 | // ViewController.swift
3 | // Example
4 | //
5 | // Created by Nabil on 05/10/17.
6 | // Copyright © 2017 Nabil. All rights reserved.
7 | //
8 |
9 | import UIKit
10 | import SwiftSoup
11 |
12 | class ViewController: UIViewController {
13 |
14 | typealias Item = (text: String, html: String)
15 |
16 | @IBOutlet weak var tableView: UITableView!
17 | @IBOutlet var urlTextField: UITextField!
18 | @IBOutlet var cssTextField: UITextField!
19 |
20 | // current document
21 | var document: Document = Document.init("")
22 | // item founds
23 | var items: [Item] = []
24 |
25 | override func viewDidLoad() {
26 | super.viewDidLoad()
27 |
28 | self.title = "SwiftSoup Example"
29 |
30 | self.tableView.rowHeight = UITableView.automaticDimension
31 | self.tableView.estimatedRowHeight = UITableView.automaticDimension
32 |
33 | urlTextField.text = "http://www.facebook.com"
34 | cssTextField.text = "div"
35 |
36 | // start first request
37 | downloadHTML()
38 | }
39 |
40 | //Download HTML
41 | func downloadHTML() {
42 | // url string to URL
43 | guard let url = URL(string: urlTextField.text ?? "") else {
44 | // an error occurred
45 | UIAlertController.showAlert("Error: \(urlTextField.text ?? "") doesn't seem to be a valid URL", self)
46 | return
47 | }
48 |
49 | do {
50 | // content of url
51 | let html = try String.init(contentsOf: url)
52 | // parse it into a Document
53 | document = try SwiftSoup.parse(html)
54 | // parse css query
55 | parse()
56 | } catch let error {
57 | // an error occurred
58 | UIAlertController.showAlert("Error: \(error)", self)
59 | }
60 |
61 | }
62 |
63 | //Parse CSS selector
64 | func parse() {
65 | do {
66 | //empty old items
67 | items = []
68 | // firn css selector
69 | let elements: Elements = try document.select(cssTextField.text ?? "")
70 | //transform it into a local object (Item)
71 | for element in elements {
72 | let text = try element.text()
73 | let html = try element.outerHtml()
74 | items.append(Item(text: text, html: html))
75 | }
76 |
77 | } catch let error {
78 | UIAlertController.showAlert("Error: \(error)", self)
79 | }
80 |
81 | tableView.reloadData()
82 | }
83 |
84 | @IBAction func chooseQuery(_ sender: Any) {
85 | guard let viewController = storyboard?.instantiateViewController(
86 | withIdentifier: "QueryViewController") as? QueryViewController else {
87 | return
88 | }
89 | viewController.completionHandler = {[weak self](resilt) in
90 | self?.navigationController?.popViewController(animated: true)
91 | self?.cssTextField.text = resilt.example
92 | self?.parse()
93 | }
94 | self.show(viewController, sender: self)
95 | }
96 |
97 | }
98 |
99 | extension ViewController: UITableViewDataSource {
100 | func numberOfSections(in tableView: UITableView) -> Int {
101 | return 1
102 | }
103 |
104 | public func tableView(_ tableView: UITableView, numberOfRowsInSection section: Int) -> Int {
105 | return items.count
106 | }
107 |
108 | public func tableView(_ tableView: UITableView, cellForRowAt indexPath: IndexPath) -> UITableViewCell {
109 | var cell = tableView.dequeueReusableCell(withIdentifier: "cell")
110 | if cell == nil {
111 | cell = UITableViewCell.init(style: UITableViewCell.CellStyle.subtitle, reuseIdentifier: "cell")
112 | cell?.textLabel?.numberOfLines = 2
113 | cell?.detailTextLabel?.numberOfLines = 6
114 |
115 | cell?.textLabel?.textColor = UIColor.init(red: 1.0/255, green: 174.0/255, blue: 66.0/255, alpha: 1)
116 | cell?.detailTextLabel?.textColor = UIColor.init(red: 55.0/255, green: 67.0/255, blue: 55.0/255, alpha: 1)
117 |
118 | cell?.backgroundColor = UIColor.init(red: 245.0/255, green: 245.0/255, blue: 245.0/255, alpha: 1)
119 | }
120 |
121 | cell?.textLabel?.text = items[indexPath.row].text
122 | cell?.detailTextLabel?.text = items[indexPath.row].html
123 |
124 | let color1 = UIColor.init(red: 245.0/255, green: 245.0/255, blue: 245.0/255, alpha: 1)
125 | let color2 = UIColor.init(red: 240.0/255, green: 240.0/255, blue: 240.0/255, alpha: 1)
126 | cell?.backgroundColor = (indexPath.row % 2) == 0 ? color1 : color2
127 |
128 | return cell!
129 | }
130 | }
131 |
132 | extension ViewController: UITableViewDelegate {
133 | }
134 |
135 | extension ViewController: UITextFieldDelegate {
136 | public func textFieldShouldReturn(_ textField: UITextField) -> Bool {
137 | textField.resignFirstResponder()
138 | return false
139 | }
140 |
141 | public func textFieldDidEndEditing(_ textField: UITextField) {
142 |
143 | if textField == urlTextField {
144 | downloadHTML()
145 | }
146 |
147 | if textField == cssTextField {
148 | parse()
149 | }
150 | }
151 | }
152 |
153 | extension UIAlertController {
154 | static public func showAlert(_ message: String, _ controller: UIViewController) {
155 | let alert = UIAlertController(title: "Alert", message: message, preferredStyle: UIAlertController.Style.alert)
156 | alert.addAction(UIAlertAction(title: "OK", style: UIAlertAction.Style.default, handler: nil))
157 | controller.present(alert, animated: true, completion: nil)
158 | }
159 | }
160 |
--------------------------------------------------------------------------------
/Example/Podfile:
--------------------------------------------------------------------------------
1 | use_frameworks!
2 |
3 | target 'Example' do
4 | #pod 'SwiftSoup'
5 | pod 'SwiftSoup', :path => '../'
6 | end
7 |
--------------------------------------------------------------------------------
/Example/img1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/scinfu/SwiftSoup/aa85ee96017a730031bafe411cde24a08a17a9c9/Example/img1.png
--------------------------------------------------------------------------------
/Example/img2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/scinfu/SwiftSoup/aa85ee96017a730031bafe411cde24a08a17a9c9/Example/img2.png
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | The MIT License
2 |
3 | Copyright (c) 2009-2025 Jonathan Hedley
4 | Swift port copyright (c) 2016-2025 Nabil Chatbi
5 |
6 | Permission is hereby granted, free of charge, to any person obtaining a copy
7 | of this software and associated documentation files (the "Software"), to deal
8 | in the Software without restriction, including without limitation the rights
9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 | copies of the Software, and to permit persons to whom the Software is
11 | furnished to do so, subject to the following conditions:
12 |
13 | The above copyright notice and this permission notice shall be included in all
14 | copies or substantial portions of the Software.
15 |
16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 | SOFTWARE.
23 |
--------------------------------------------------------------------------------
/Package.swift:
--------------------------------------------------------------------------------
1 | // swift-tools-version:5.9
2 |
3 | import PackageDescription
4 |
5 | let package = Package(
6 | name: "SwiftSoup",
7 | platforms: [.macOS(.v10_15), .iOS(.v13), .watchOS(.v6)],
8 | products: [
9 | .library(name: "SwiftSoup", targets: ["SwiftSoup"])
10 | ],
11 | targets: [
12 | .target(
13 | name: "SwiftSoup",
14 | path: "Sources"),
15 | .testTarget(
16 | name: "SwiftSoupTests",
17 | dependencies: ["SwiftSoup"])
18 | ]
19 | )
20 |
--------------------------------------------------------------------------------
/Resources/Info.plist:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | CFBundleDevelopmentRegion
6 | en
7 | CFBundleExecutable
8 | $(EXECUTABLE_NAME)
9 | CFBundleIdentifier
10 | $(PRODUCT_BUNDLE_IDENTIFIER)
11 | CFBundleInfoDictionaryVersion
12 | 6.0
13 | CFBundleName
14 | $(PRODUCT_NAME)
15 | CFBundlePackageType
16 | FMWK
17 | CFBundleShortVersionString
18 | 1.1.1
19 | CFBundleVersion
20 | $(CURRENT_PROJECT_VERSION)
21 | NSHumanReadableCopyright
22 | Copyright © 2016 Nabil Chatbi (scinfu). All rights reserved
23 | NSPrincipalClass
24 |
25 |
26 |
27 |
--------------------------------------------------------------------------------
/Resources/InfoMac.plist:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | CFBundleDevelopmentRegion
6 | en
7 | CFBundleExecutable
8 | $(EXECUTABLE_NAME)
9 | CFBundleIdentifier
10 | $(PRODUCT_BUNDLE_IDENTIFIER)
11 | CFBundleInfoDictionaryVersion
12 | 6.0
13 | CFBundleName
14 | $(PRODUCT_NAME)
15 | CFBundlePackageType
16 | FMWK
17 | CFBundleShortVersionString
18 | 1.5.8
19 | CFBundleVersion
20 | $(CURRENT_PROJECT_VERSION)
21 | NSHumanReadableCopyright
22 | Copyright © 2016 Nabil Chatbi (scinfu). All rights reserved
23 | NSPrincipalClass
24 |
25 |
26 |
27 |
--------------------------------------------------------------------------------
/Resources/InfoWatchOS.plist:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | CFBundleDevelopmentRegion
6 | en
7 | CFBundleExecutable
8 | $(EXECUTABLE_NAME)
9 | CFBundleIdentifier
10 | $(PRODUCT_BUNDLE_IDENTIFIER)
11 | CFBundleInfoDictionaryVersion
12 | 6.0
13 | CFBundleName
14 | $(PRODUCT_NAME)
15 | CFBundlePackageType
16 | FMWK
17 | CFBundleShortVersionString
18 | 1.1.1
19 | CFBundleVersion
20 | $(CURRENT_PROJECT_VERSION)
21 | NSHumanReadableCopyright
22 | Copyright © 2016 Nabil Chatbi (scinfu). All rights reserved
23 | NSPrincipalClass
24 |
25 |
26 |
27 |
--------------------------------------------------------------------------------
/Resources/InfotvOS.plist:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | CFBundleDevelopmentRegion
6 | en
7 | CFBundleExecutable
8 | $(EXECUTABLE_NAME)
9 | CFBundleIdentifier
10 | $(PRODUCT_BUNDLE_IDENTIFIER)
11 | CFBundleInfoDictionaryVersion
12 | 6.0
13 | CFBundleName
14 | $(PRODUCT_NAME)
15 | CFBundlePackageType
16 | FMWK
17 | CFBundleShortVersionString
18 | 1.1.1
19 | CFBundleVersion
20 | $(CURRENT_PROJECT_VERSION)
21 | NSHumanReadableCopyright
22 | Copyright © 2016 Nabil Chatbi (scinfu). All rights reserved
23 | NSPrincipalClass
24 |
25 |
26 |
27 |
--------------------------------------------------------------------------------
/Sources/ArrayExt.swift:
--------------------------------------------------------------------------------
1 | //
2 | // ArrayExt.swift
3 | // SwifSoup
4 | //
5 | // Created by Nabil Chatbi on 05/10/16.
6 | //
7 |
8 | import Foundation
9 |
10 | extension Array where Element: Equatable {
11 | func lastIndexOf(_ e: Element) -> Int {
12 | for pos in (0.. String {
41 | return String(decoding: getKeyUTF8(), as: UTF8.self)
42 | }
43 |
44 | open func getKeyUTF8() -> [UInt8] {
45 | return key
46 | }
47 |
48 | /**
49 | Set the attribute key; case is preserved.
50 | @param key the new key; must not be null
51 | */
52 | open func setKey(key: [UInt8]) throws {
53 | try Validate.notEmpty(string: key)
54 | self.key = key.trim()
55 | }
56 |
57 | open func setKey(key: String) throws {
58 | try setKey(key: key.utf8Array)
59 | }
60 |
61 | /**
62 | Get the attribute value.
63 | @return the attribute value
64 | */
65 | open func getValue() -> String {
66 | return String(decoding: getValueUTF8(), as: UTF8.self)
67 | }
68 |
69 | open func getValueUTF8() -> [UInt8] {
70 | return value
71 | }
72 |
73 | /**
74 | Set the attribute value.
75 | @param value the new attribute value; must not be null
76 | */
77 | @discardableResult
78 | open func setValue(value: [UInt8]) -> [UInt8] {
79 | let old = self.value
80 | self.value = value
81 | return old
82 | }
83 |
84 | /**
85 | Get the HTML representation of this attribute; e.g. {@code href="index.html"}.
86 | @return HTML
87 | */
88 | public func html() -> String {
89 | let accum = StringBuilder()
90 | html(accum: accum, out: (Document([])).outputSettings())
91 | return accum.toString()
92 | }
93 |
94 | @inlinable
95 | public func html(accum: StringBuilder, out: OutputSettings) {
96 | accum.append(key)
97 | if (!shouldCollapseAttribute(out: out)) {
98 | accum.append(UTF8Arrays.attributeEqualsQuoteMark)
99 | Entities.escape(&accum.buffer, value, out, true, false, false)
100 | accum.append(UTF8Arrays.quoteMark)
101 | }
102 | }
103 |
104 | /**
105 | Get the string representation of this attribute, implemented as {@link #html()}.
106 | @return string
107 | */
108 | open func toString() -> String {
109 | return html()
110 | }
111 |
112 | /**
113 | * Create a new Attribute from an unencoded key and a HTML attribute encoded value.
114 | * @param unencodedKey assumes the key is not encoded, as can be only run of simple \w chars.
115 | * @param encodedValue HTML attribute encoded value
116 | * @return attribute
117 | */
118 | public static func createFromEncoded(unencodedKey: [UInt8], encodedValue: [UInt8]) throws -> Attribute {
119 | let value = try Entities.unescape(string: encodedValue, strict: true)
120 | return try Attribute(key: unencodedKey, value: value)
121 | }
122 |
123 | public func isDataAttribute() -> Bool {
124 | return key.starts(with: Attributes.dataPrefix) && key.count > Attributes.dataPrefix.count
125 | }
126 |
127 | /**
128 | * Collapsible if it's a boolean attribute and value is empty or same as name
129 | *
130 | * @param out Outputsettings
131 | * @return Returns whether collapsible or not
132 | */
133 | public final func shouldCollapseAttribute(out: OutputSettings) -> Bool {
134 | return (value.isEmpty || value.equalsIgnoreCase(string: key))
135 | && out.syntax() == OutputSettings.Syntax.html
136 | && isBooleanAttribute()
137 | }
138 |
139 | public func isBooleanAttribute() -> Bool {
140 | return Attribute.booleanAttributes.contains(key.lowercased()[...])
141 | }
142 |
143 | public func hashCode() -> Int {
144 | var result = key.hashValue
145 | result = 31 * result + value.hashValue
146 | return result
147 | }
148 |
149 | public func clone() -> Attribute {
150 | do {
151 | return try Attribute(key: key, value: value)
152 | } catch Exception.Error( _, let msg) {
153 | print(msg)
154 | } catch {
155 |
156 | }
157 | return try! Attribute(key: [], value: [])
158 | }
159 | }
160 |
161 | extension Attribute: Equatable {
162 | static public func == (lhs: Attribute, rhs: Attribute) -> Bool {
163 | return lhs.value == rhs.value && lhs.key == rhs.key
164 | }
165 |
166 | }
167 |
--------------------------------------------------------------------------------
/Sources/BinarySearch.swift:
--------------------------------------------------------------------------------
1 | //
2 | // BinarySearch.swift
3 | // SwiftSoup-iOS
4 | //
5 | // Created by Garth Snyder on 2/28/19.
6 | //
7 | // Adapted from https://stackoverflow.com/questions/31904396/swift-binary-search-for-standard-array
8 | //
9 |
10 | import Foundation
11 |
12 | extension Collection {
13 |
14 | /// Generalized binary search algorithm for ordered Collections
15 | ///
16 | /// Behavior is undefined if the collection is not properly sorted.
17 | ///
18 | /// This is only O(logN) for RandomAccessCollections; Collections in
19 | /// general may implement offsetting of indexes as an O(K) operation. (E.g.,
20 | /// Strings are like this).
21 | ///
22 | /// - Note: If you are using this for searching only (not insertion), you
23 | /// must always test the element at the returned index to ensure that
24 | /// it's a genuine match. If the element is not present in the array,
25 | /// you will still get a valid index back that represents the location
26 | /// where it should be inserted. Also check to be sure the returned
27 | /// index isn't off the end of the collection.
28 | ///
29 | /// - Parameter predicate: Reports the ordering of a given Element relative
30 | /// to the desired Element. Typically, this is <.
31 | ///
32 | /// - Returns: Index N such that the predicate is true for all elements up to
33 | /// but not including N, and is false for all elements N and beyond
34 |
35 | func binarySearch(predicate: (Element) -> Bool) -> Index {
36 | var low = startIndex
37 | var high = endIndex
38 | while low != high {
39 | let mid = index(low, offsetBy: distance(from: low, to: high)/2)
40 | if predicate(self[mid]) {
41 | low = index(after: mid)
42 | } else {
43 | high = mid
44 | }
45 | }
46 | return low
47 | }
48 |
49 | /// Binary search lookup for ordered Collections using a KeyPath
50 | /// relative to Element.
51 | ///
52 | /// Behavior is undefined if the collection is not properly sorted.
53 | ///
54 | /// This is only O(logN) for RandomAccessCollections; Collections in
55 | /// general may implement offsetting of indexes as an O(K) operation. (E.g.,
56 | /// Strings are like this).
57 | ///
58 | /// - Note: If you are using this for searching only (not insertion), you
59 | /// must always test the element at the returned index to ensure that
60 | /// it's a genuine match. If the element is not present in the array,
61 | /// you will still get a valid index back that represents the location
62 | /// where it should be inserted. Also check to be sure the returned
63 | /// index isn't off the end of the collection.
64 | ///
65 | /// - Parameter keyPath: KeyPath that extracts the Element value on which
66 | /// the Collection is presorted. Must be Comparable and Equatable.
67 | /// ordering is presumed to be <, however that is defined for the type.
68 | ///
69 | /// - Returns: The index of a matching element, or nil if not found. If
70 | /// the return value is non-nil, it is always a valid index.
71 |
72 | func indexOfElement(withValue value: T, atKeyPath keyPath: KeyPath) -> Index? where T: Comparable & Equatable {
73 | let ix = binarySearch { $0[keyPath: keyPath] < value }
74 | guard ix < endIndex else { return nil }
75 | guard self[ix][keyPath: keyPath] == value else { return nil }
76 | return ix
77 | }
78 |
79 | func element(withValue value: T, atKeyPath keyPath: KeyPath) -> Element? where T: Comparable & Equatable {
80 | if let ix = indexOfElement(withValue: value, atKeyPath: keyPath) {
81 | return self[ix]
82 | }
83 | return nil
84 | }
85 |
86 | func elements(withValue value: T, atKeyPath keyPath: KeyPath) -> [Element] where T: Comparable & Equatable {
87 | guard let start = indexOfElement(withValue: value, atKeyPath: keyPath) else { return [] }
88 | var end = index(after: start)
89 | while end < endIndex && self[end][keyPath: keyPath] == value {
90 | end = index(after: end)
91 | }
92 | return Array(self[start.. Bool {
24 | return true
25 | }
26 | }
27 |
--------------------------------------------------------------------------------
/Sources/CharacterExt.swift:
--------------------------------------------------------------------------------
1 | //
2 | // CharacterExt.swift
3 | // SwifSoup
4 | //
5 | // Created by Nabil Chatbi on 08/10/16.
6 | //
7 |
8 | import Foundation
9 |
10 | extension Character {
11 |
12 | public static let space: Character = " "
13 | public static let BackslashT: Character = "\t"
14 | public static let BackslashN: Character = "\n"
15 | public static let BackslashF: Character = Character(UnicodeScalar(12))
16 | public static let BackslashR: Character = "\r"
17 | public static let BackshashRBackslashN: Character = "\r\n"
18 |
19 | //http://www.unicode.org/glossary/#supplementary_code_point
20 | public static let MIN_SUPPLEMENTARY_CODE_POINT: UInt32 = 0x010000
21 |
22 | /// True for any space character, and the control characters \t, \n, \r, \f, \v.
23 |
24 | var isWhitespace: Bool {
25 | switch self {
26 | case Character.space, Character.BackslashT, Character.BackslashN, Character.BackslashF, Character.BackslashR: return true
27 | case Character.BackshashRBackslashN: return true
28 | default: return false
29 |
30 | }
31 | }
32 |
33 | /// `true` if `self` normalized contains a single code unit that is in the category of Decimal Numbers.
34 | var isDigit: Bool {
35 |
36 | return isMemberOfCharacterSet(CharacterSet.decimalDigits)
37 |
38 | }
39 |
40 | /// Lowercase `self`.
41 | var lowercase: Character {
42 |
43 | let str = String(self).lowercased()
44 | return str[str.startIndex]
45 |
46 | }
47 |
48 | /// Return `true` if `self` normalized contains a single code unit that is a member of the supplied character set.
49 | ///
50 | /// - parameter set: The `NSCharacterSet` used to test for membership.
51 | /// - returns: `true` if `self` normalized contains a single code unit that is a member of the supplied character set.
52 | func isMemberOfCharacterSet(_ set: CharacterSet) -> Bool {
53 |
54 | let normalized = String(self).precomposedStringWithCanonicalMapping
55 | let unicodes = normalized.unicodeScalars
56 |
57 | guard unicodes.count == 1 else { return false }
58 | return set.contains(UnicodeScalar(unicodes.first!.value)!)
59 |
60 | }
61 |
62 | static func convertFromIntegerLiteral(value: IntegerLiteralType) -> Character {
63 | return Character(UnicodeScalar(value)!)
64 | }
65 |
66 | static func isLetter(_ char: Character) -> Bool {
67 | return char.isLetter()
68 | }
69 | func isLetter() -> Bool {
70 | return self.isMemberOfCharacterSet(CharacterSet.letters)
71 | }
72 |
73 | static func isLetterOrDigit(_ char: Character) -> Bool {
74 | return char.isLetterOrDigit()
75 | }
76 | func isLetterOrDigit() -> Bool {
77 | if(self.isLetter()) {return true}
78 | return self.isDigit
79 | }
80 | }
81 |
--------------------------------------------------------------------------------
/Sources/Cleaner.swift:
--------------------------------------------------------------------------------
1 | //
2 | // Cleaner.swift
3 | // SwiftSoup
4 | //
5 | // Created by Nabil Chatbi on 15/10/16.
6 | //
7 |
8 | import Foundation
9 |
10 | open class Cleaner {
11 | fileprivate let headWhitelist: Whitelist?
12 | fileprivate let bodyWhitelist: Whitelist
13 |
14 | /// Create a new cleaner, that sanitizes documents' `` and `` using the supplied whitelist.
15 | /// - Parameters:
16 | /// - headWhitelist: Whitelist to clean the head with
17 | /// - bodyWhitelist: Whitelist to clean the body with
18 | public init(headWhitelist: Whitelist?, bodyWhitelist: Whitelist) {
19 | self.headWhitelist = headWhitelist
20 | self.bodyWhitelist = bodyWhitelist
21 | }
22 |
23 | /// Create a new cleaner, that sanitizes documents' `` using the supplied whitelist.
24 | /// - Parameter whitelist: Whitelist to clean the body with
25 | convenience init(_ whitelist: Whitelist) {
26 | self.init(headWhitelist: nil, bodyWhitelist: whitelist)
27 | }
28 |
29 | /// Creates a new, clean document, from the original dirty document, containing only elements allowed by the whitelist.
30 | /// The original document is not modified. Only elements from the dirt document's `` are used.
31 | /// - Parameter dirtyDocument: Untrusted base document to clean.
32 | /// - Returns: A cleaned document.
33 | public func clean(_ dirtyDocument: Document) throws -> Document {
34 | let clean = Document.createShell(dirtyDocument.getBaseUri())
35 | if let headWhitelist, let dirtHead = dirtyDocument.head(), let cleanHead = clean.head() { // frameset documents won't have a head. the clean doc will have empty head.
36 | try copySafeNodes(dirtHead, cleanHead, whitelist: headWhitelist)
37 | }
38 | if let dirtBody = dirtyDocument.body(), let cleanBody = clean.body() { // frameset documents won't have a body. the clean doc will have empty body.
39 | try copySafeNodes(dirtBody, cleanBody, whitelist: bodyWhitelist)
40 | }
41 | return clean
42 | }
43 |
44 | /// Determines if the input document is valid, against the whitelist. It is considered valid if all the tags and attributes
45 | /// in the input HTML are allowed by the whitelist.
46 | ///
47 | /// This method can be used as a validator for user input forms. An invalid document will still be cleaned successfully
48 | /// using the ``clean(_:)`` document. If using as a validator, it is recommended to still clean the document
49 | /// to ensure enforced attributes are set correctly, and that the output is tidied.
50 | /// - Parameter dirtyDocument: document to test
51 | /// - Returns: true if no tags or attributes need to be removed; false if they do
52 | public func isValid(_ dirtyDocument: Document) throws -> Bool {
53 | let clean = Document.createShell(dirtyDocument.getBaseUri())
54 | let numDiscarded = try copySafeNodes(dirtyDocument.body()!, clean.body()!, whitelist: bodyWhitelist)
55 | return numDiscarded == 0
56 | }
57 |
58 | @discardableResult
59 | fileprivate func copySafeNodes(_ source: Element, _ dest: Element, whitelist: Whitelist) throws -> Int {
60 | let cleaningVisitor = Cleaner.CleaningVisitor(source, dest, whitelist)
61 | try NodeTraversor(cleaningVisitor).traverse(source)
62 | return cleaningVisitor.numDiscarded
63 | }
64 | }
65 |
66 | extension Cleaner {
67 | fileprivate final class CleaningVisitor: NodeVisitor {
68 | private(set) var numDiscarded = 0
69 |
70 | private let root: Element
71 | private var destination: Element? // current element to append nodes to
72 |
73 | private let whitelist: Whitelist
74 |
75 | public init(_ root: Element, _ destination: Element, _ whitelist: Whitelist) {
76 | self.root = root
77 | self.destination = destination
78 | self.whitelist = whitelist
79 | }
80 |
81 | public func head(_ source: Node, _ depth: Int) throws {
82 | if let sourceEl = source as? Element {
83 | if whitelist.isSafeTag(sourceEl.tagNameUTF8()) { // safe, clone and copy safe attrs
84 | let meta = try createSafeElement(sourceEl)
85 | let destChild = meta.el
86 | try destination?.appendChild(destChild)
87 |
88 | numDiscarded += meta.numAttribsDiscarded
89 | destination = destChild
90 | } else if source != root { // not a safe tag, so don't add. don't count root against discarded.
91 | numDiscarded += 1
92 | }
93 | } else if let sourceText = source as? TextNode {
94 | let destText = TextNode(sourceText.getWholeTextUTF8(), source.getBaseUriUTF8())
95 | try destination?.appendChild(destText)
96 | } else if let sourceData = source as? DataNode {
97 | if sourceData.parent() != nil && whitelist.isSafeTag(sourceData.parent()!.nodeNameUTF8()) {
98 | let destData = DataNode(sourceData.getWholeDataUTF8(), source.getBaseUriUTF8())
99 | try destination?.appendChild(destData)
100 | } else {
101 | numDiscarded += 1
102 | }
103 | } else { // else, we don't care about comments, xml proc instructions, etc
104 | numDiscarded += 1
105 | }
106 | }
107 |
108 | public func tail(_ source: Node, _ depth: Int) throws {
109 | if let x = source as? Element {
110 | if whitelist.isSafeTag(x.nodeNameUTF8()) {
111 | // would have descended, so pop destination stack
112 | destination = destination?.parent()
113 | }
114 | }
115 | }
116 |
117 | private func createSafeElement(_ sourceEl: Element) throws -> ElementMeta {
118 | let sourceTag = sourceEl.tagName()
119 | let destAttrs = Attributes()
120 | var numDiscarded = 0
121 |
122 | if let sourceAttrs = sourceEl.getAttributes() {
123 | for sourceAttr in sourceAttrs {
124 | if try whitelist.isSafeAttribute(sourceTag, sourceEl, sourceAttr) {
125 | destAttrs.put(attribute: sourceAttr)
126 | } else {
127 | numDiscarded += 1
128 | }
129 | }
130 | }
131 | let enforcedAttrs = try whitelist.getEnforcedAttributes(sourceTag)
132 | destAttrs.addAll(incoming: enforcedAttrs)
133 |
134 | let dest = try Element(Tag.valueOf(sourceTag.utf8Array), sourceEl.getBaseUriUTF8(), destAttrs)
135 | return ElementMeta(dest, numDiscarded)
136 | }
137 | }
138 | }
139 |
140 | extension Cleaner {
141 | fileprivate struct ElementMeta {
142 | let el: Element
143 | let numAttribsDiscarded: Int
144 |
145 | init(_ el: Element, _ numAttribsDiscarded: Int) {
146 | self.el = el
147 | self.numAttribsDiscarded = numAttribsDiscarded
148 | }
149 | }
150 | }
151 |
--------------------------------------------------------------------------------
/Sources/Collector.swift:
--------------------------------------------------------------------------------
1 | //
2 | // Collector.swift
3 | // SwiftSoup
4 | //
5 | // Created by Nabil Chatbi on 22/10/16.
6 | //
7 |
8 | import Foundation
9 |
10 | /**
11 | * Collects a list of elements that match the supplied criteria.
12 | *
13 | */
14 | open class Collector {
15 |
16 | private init() {
17 | }
18 |
19 | /**
20 | Build a list of elements, by visiting root and every descendant of root, and testing it against the evaluator.
21 | @param eval Evaluator to test elements against
22 | @param root root of tree to descend
23 | @return list of matches; empty if none
24 | */
25 | public static func collect (_ eval: Evaluator, _ root: Element) throws -> Elements {
26 | let elements: Elements = Elements()
27 | try NodeTraversor(Accumulator(root, elements, eval)).traverse(root)
28 | return elements
29 | }
30 |
31 | }
32 |
33 | private final class Accumulator: NodeVisitor {
34 | private let root: Element
35 | private let elements: Elements
36 | private let eval: Evaluator
37 |
38 | init(_ root: Element, _ elements: Elements, _ eval: Evaluator) {
39 | self.root = root
40 | self.elements = elements
41 | self.eval = eval
42 | }
43 |
44 | @inlinable
45 | public func head(_ node: Node, _ depth: Int) {
46 | guard let el = node as? Element else {
47 | return
48 | }
49 | do {
50 | if try eval.matches(root, el) {
51 | elements.add(el)
52 | }
53 | } catch {}
54 | }
55 |
56 | public func tail(_ node: Node, _ depth: Int) {
57 | // void
58 | }
59 | }
60 |
--------------------------------------------------------------------------------
/Sources/CombiningEvaluator.swift:
--------------------------------------------------------------------------------
1 | //
2 | // CombiningEvaluator.swift
3 | // SwiftSoup
4 | //
5 | // Created by Nabil Chatbi on 23/10/16.
6 | //
7 |
8 | import Foundation
9 |
10 | /**
11 | * Base combining (and, or) evaluator.
12 | */
13 | public class CombiningEvaluator: Evaluator {
14 |
15 | public private(set) var evaluators: Array
16 | var num: Int = 0
17 |
18 | public override init() {
19 | evaluators = Array()
20 | super.init()
21 | }
22 |
23 | public init(_ evaluators: Array) {
24 | self.evaluators = evaluators
25 | super.init()
26 | updateNumEvaluators()
27 | }
28 |
29 | public init(_ evaluators: Evaluator...) {
30 | self.evaluators = evaluators
31 | super.init()
32 | updateNumEvaluators()
33 | }
34 |
35 | func rightMostEvaluator() -> Evaluator? {
36 | return num > 0 && evaluators.count > 0 ? evaluators[num - 1] : nil
37 | }
38 |
39 | func replaceRightMostEvaluator(_ replacement: Evaluator) {
40 | evaluators[num - 1] = replacement
41 | }
42 |
43 | func updateNumEvaluators() {
44 | // used so we don't need to bash on size() for every match test
45 | num = evaluators.count
46 | }
47 |
48 | public final class And: CombiningEvaluator {
49 | public override init(_ evaluators: [Evaluator]) {
50 | super.init(evaluators)
51 | }
52 |
53 | public override init(_ evaluators: Evaluator...) {
54 | super.init(evaluators)
55 | }
56 |
57 | public override func matches(_ root: Element, _ node: Element) -> Bool {
58 | for index in 0.. String {
71 | let array: [String] = evaluators.map { String($0.toString()) }
72 | return StringUtil.join(array, sep: " ")
73 | }
74 | }
75 |
76 | public final class Or: CombiningEvaluator {
77 | /**
78 | * Create a new Or evaluator. The initial evaluators are ANDed together and used as the first clause of the OR.
79 | * @param evaluators initial OR clause (these are wrapped into an AND evaluator).
80 | */
81 | public override init(_ evaluators: [Evaluator]) {
82 | super.init()
83 | if num > 1 {
84 | self.evaluators.append(And(evaluators))
85 | } else { // 0 or 1
86 | self.evaluators.append(contentsOf: evaluators)
87 | }
88 | updateNumEvaluators()
89 | }
90 |
91 | override init(_ evaluators: Evaluator...) {
92 | super.init()
93 | if num > 1 {
94 | self.evaluators.append(And(evaluators))
95 | } else { // 0 or 1
96 | self.evaluators.append(contentsOf: evaluators)
97 | }
98 | updateNumEvaluators()
99 | }
100 |
101 | override init() {
102 | super.init()
103 | }
104 |
105 | public func add(_ evaluator: Evaluator) {
106 | evaluators.append(evaluator)
107 | updateNumEvaluators()
108 | }
109 |
110 | public override func matches(_ root: Element, _ node: Element) -> Bool {
111 | for index in 0.. String {
123 | return ":or\(evaluators.map {String($0.toString())})"
124 | }
125 | }
126 | }
127 |
--------------------------------------------------------------------------------
/Sources/Comment.swift:
--------------------------------------------------------------------------------
1 | //
2 | // Comment.swift
3 | // SwiftSoup
4 | //
5 | // Created by Nabil Chatbi on 22/10/16.
6 | //
7 |
8 | import Foundation
9 |
10 | /**
11 | A comment node.
12 | */
13 | public class Comment: Node {
14 | private static let COMMENT_KEY: [UInt8] = UTF8Arrays.comment
15 |
16 | /**
17 | Create a new comment node.
18 | @param data The contents of the comment
19 | @param baseUri base URI
20 | */
21 | public init(_ data: [UInt8], _ baseUri: [UInt8]) {
22 | super.init(baseUri)
23 | do {
24 | try attributes?.put(Comment.COMMENT_KEY, data)
25 | } catch {}
26 | }
27 |
28 | public override func nodeNameUTF8() -> [UInt8] {
29 | return nodeName().utf8Array
30 | }
31 |
32 | public override func nodeName() -> String {
33 | return "#comment"
34 | }
35 |
36 | /**
37 | Get the contents of the comment.
38 | @return comment content
39 | */
40 | public func getData() -> String {
41 | return String(decoding: getDataUTF8(), as: UTF8.self)
42 | }
43 |
44 | public func getDataUTF8() -> [UInt8] {
45 | return attributes!.get(key: Comment.COMMENT_KEY)
46 | }
47 |
48 | override func outerHtmlHead(_ accum: StringBuilder, _ depth: Int, _ out: OutputSettings) {
49 | if (out.prettyPrint()) {
50 | indent(accum, depth, out)
51 | }
52 | accum
53 | .append("")
56 | }
57 |
58 | override func outerHtmlTail(_ accum: StringBuilder, _ depth: Int, _ out: OutputSettings) {}
59 |
60 | public override func copy(with zone: NSZone? = nil) -> Any {
61 | let clone = Comment(attributes!.get(key: Comment.COMMENT_KEY), baseUri!)
62 | return copy(clone: clone)
63 | }
64 |
65 | public override func copy(parent: Node?) -> Node {
66 | let clone = Comment(attributes!.get(key: Comment.COMMENT_KEY), baseUri!)
67 | return copy(clone: clone, parent: parent)
68 | }
69 |
70 | public override func copy(clone: Node, parent: Node?) -> Node {
71 | return super.copy(clone: clone, parent: parent)
72 | }
73 | }
74 |
--------------------------------------------------------------------------------
/Sources/Connection.swift:
--------------------------------------------------------------------------------
1 | //
2 | // Connection.swift
3 | // SwifSoup
4 | //
5 | // Created by Nabil Chatbi on 29/09/16.
6 | //
7 |
8 | import Foundation
9 | //TODO:
10 |
--------------------------------------------------------------------------------
/Sources/DataNode.swift:
--------------------------------------------------------------------------------
1 | //
2 | // DataNode.swift
3 | // SwifSoup
4 | //
5 | // Created by Nabil Chatbi on 29/09/16.
6 | //
7 |
8 | import Foundation
9 |
10 | /**
11 | A data node, for contents of style, script tags etc, where contents should not show in text().
12 | */
13 | open class DataNode: Node {
14 | private static let DATA_KEY = "data".utf8Array
15 |
16 | /**
17 | Create a new DataNode.
18 | @param data data contents
19 | @param baseUri base URI
20 | */
21 | public init(_ data: [UInt8], _ baseUri: [UInt8]) {
22 | super.init(baseUri)
23 | do {
24 | try attributes?.put(DataNode.DATA_KEY, data)
25 | } catch {}
26 |
27 | }
28 |
29 | open override func nodeNameUTF8() -> [UInt8] {
30 | return nodeName().utf8Array
31 | }
32 |
33 | open override func nodeName() -> String {
34 | return "#data"
35 | }
36 |
37 | /**
38 | Get the data contents of this node. Will be unescaped and with original new lines, space etc.
39 | @return data
40 | */
41 | open func getWholeData() -> String {
42 | return String(decoding: getWholeDataUTF8(), as: UTF8.self)
43 | }
44 |
45 | open func getWholeDataUTF8() -> [UInt8] {
46 | return attributes!.get(key: DataNode.DATA_KEY)
47 | }
48 |
49 | /**
50 | * Set the data contents of this node.
51 | * @param data unencoded data
52 | * @return this node, for chaining
53 | */
54 | @discardableResult
55 | open func setWholeData(_ data: String) -> DataNode {
56 | do {
57 | try attributes?.put(DataNode.DATA_KEY, data.utf8Array)
58 | } catch {}
59 | return self
60 | }
61 |
62 | override func outerHtmlHead(_ accum: StringBuilder, _ depth: Int, _ out: OutputSettings)throws {
63 | accum.append(getWholeData()) // data is not escaped in return from data nodes, so " in script, style is plain
64 | }
65 |
66 | override func outerHtmlTail(_ accum: StringBuilder, _ depth: Int, _ out: OutputSettings) {}
67 |
68 | /**
69 | Create a new DataNode from HTML encoded data.
70 | @param encodedData encoded data
71 | @param baseUri bass URI
72 | @return new DataNode
73 | */
74 | public static func createFromEncoded(_ encodedData: String, _ baseUri: String) throws -> DataNode {
75 | let data = try Entities.unescape(encodedData.utf8Array)
76 | return DataNode(data, baseUri.utf8Array)
77 | }
78 |
79 | public override func copy(with zone: NSZone? = nil) -> Any {
80 | let clone = DataNode(attributes!.get(key: DataNode.DATA_KEY), baseUri!)
81 | return copy(clone: clone)
82 | }
83 |
84 | public override func copy(parent: Node?) -> Node {
85 | let clone = DataNode(attributes!.get(key: DataNode.DATA_KEY), baseUri!)
86 | return copy(clone: clone, parent: parent)
87 | }
88 |
89 | public override func copy(clone: Node, parent: Node?) -> Node {
90 | return super.copy(clone: clone, parent: parent)
91 | }
92 | }
93 |
--------------------------------------------------------------------------------
/Sources/DataUtil.swift:
--------------------------------------------------------------------------------
1 | //
2 | // DataUtil.swift
3 | // SwifSoup
4 | //
5 | // Created by Nabil Chatbi on 02/10/16.
6 | //
7 |
8 | import Foundation
9 |
10 | /**
11 | * Internal static utilities for handling data.
12 | *
13 | */
14 | class DataUtil {
15 |
16 | static let charsetPattern = "(?i)\\bcharset=\\s*(?:\"|')?([^\\s,;\"']*)"
17 | static let defaultCharset = "UTF-8" // used if not found in header or meta charset
18 | static let bufferSize = 0x20000 // ~130K.
19 | static let UNICODE_BOM = 0xFEFF
20 | static let mimeBoundaryChars = "-_1234567890abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
21 | static let boundaryLength = 32
22 |
23 | }
24 |
--------------------------------------------------------------------------------
/Sources/DocumentType.swift:
--------------------------------------------------------------------------------
1 | //
2 | // DocumentType.swift
3 | // SwifSoup
4 | //
5 | // Created by Nabil Chatbi on 29/09/16.
6 | //
7 |
8 | import Foundation
9 |
10 | /**
11 | * A {@code } node.
12 | */
13 | public class DocumentType: Node {
14 | static let PUBLIC_KEY = "PUBLIC".utf8Array
15 | static let SYSTEM_KEY = "SYSTEM".utf8Array
16 | private static let NAME = "name".utf8Array
17 | private static let PUB_SYS_KEY = "pubSysKey".utf8Array // PUBLIC or SYSTEM
18 | private static let PUBLIC_ID = "publicId".utf8Array
19 | private static let SYSTEM_ID = "systemId".utf8Array
20 | // todo: quirk mode from publicId and systemId
21 |
22 | /**
23 | * Create a new doctype element.
24 | * @param name the doctype's name
25 | * @param publicId the doctype's public ID
26 | * @param systemId the doctype's system ID
27 | * @param baseUri the doctype's base URI
28 | */
29 | public convenience init(_ name: String, _ publicId: String, _ systemId: String, _ baseUri: String) {
30 | self.init(name.utf8Array, publicId.utf8Array, systemId.utf8Array, baseUri.utf8Array)
31 | }
32 |
33 | public init(_ name: [UInt8], _ publicId: [UInt8], _ systemId: [UInt8], _ baseUri: [UInt8]) {
34 | super.init(baseUri)
35 | do {
36 | try attr(DocumentType.NAME, name)
37 | try attr(DocumentType.PUBLIC_ID, publicId)
38 | if (has(DocumentType.PUBLIC_ID)) {
39 | try attr(DocumentType.PUB_SYS_KEY, DocumentType.PUBLIC_KEY)
40 | }
41 | try attr(DocumentType.SYSTEM_ID, systemId)
42 | } catch {}
43 | }
44 |
45 | /**
46 | * Create a new doctype element.
47 | * @param name the doctype's name
48 | * @param publicId the doctype's public ID
49 | * @param systemId the doctype's system ID
50 | * @param baseUri the doctype's base URI
51 | */
52 | public init(_ name: [UInt8], _ pubSysKey: [UInt8]?, _ publicId: [UInt8], _ systemId: [UInt8], _ baseUri: [UInt8]) {
53 | super.init(baseUri)
54 | do {
55 | try attr(DocumentType.NAME, name)
56 | if(pubSysKey != nil) {
57 | try attr(DocumentType.PUB_SYS_KEY, pubSysKey!)
58 | }
59 | try attr(DocumentType.PUBLIC_ID, publicId)
60 | try attr(DocumentType.SYSTEM_ID, systemId)
61 | } catch {}
62 | }
63 |
64 | public override func nodeNameUTF8() -> [UInt8] {
65 | return nodeName().utf8Array
66 | }
67 |
68 | public override func nodeName() -> String {
69 | return "#doctype"
70 | }
71 |
72 | override func outerHtmlHead(_ accum: StringBuilder, _ depth: Int, _ out: OutputSettings) {
73 | if (out.syntax() == OutputSettings.Syntax.html && !has(DocumentType.PUBLIC_ID) && !has(DocumentType.SYSTEM_ID)) {
74 | // looks like a html5 doctype, go lowercase for aesthetics
75 | accum.append("")
105 | }
106 |
107 | override func outerHtmlTail(_ accum: StringBuilder, _ depth: Int, _ out: OutputSettings) {
108 | }
109 |
110 | private func has(_ attribute: [UInt8]) -> Bool {
111 | do {
112 | return !StringUtil.isBlank(try String(decoding: attr(attribute), as: UTF8.self))
113 | } catch {return false}
114 | }
115 |
116 | public override func copy(with zone: NSZone? = nil) -> Any {
117 | let clone = DocumentType(attributes!.get(key: DocumentType.NAME),
118 | attributes!.get(key: DocumentType.PUBLIC_ID),
119 | attributes!.get(key: DocumentType.SYSTEM_ID),
120 | baseUri!)
121 | return copy(clone: clone)
122 | }
123 |
124 | public override func copy(parent: Node?) -> Node {
125 | let clone = DocumentType(attributes!.get(key: DocumentType.NAME),
126 | attributes!.get(key: DocumentType.PUBLIC_ID),
127 | attributes!.get(key: DocumentType.SYSTEM_ID),
128 | baseUri!)
129 | return copy(clone: clone, parent: parent)
130 | }
131 |
132 | public override func copy(clone: Node, parent: Node?) -> Node {
133 | return super.copy(clone: clone, parent: parent)
134 | }
135 |
136 | }
137 |
--------------------------------------------------------------------------------
/Sources/Exception.swift:
--------------------------------------------------------------------------------
1 | //
2 | // Exception.swift
3 | // SwifSoup
4 | //
5 | // Created by Nabil Chatbi on 02/10/16.
6 | //
7 |
8 | import Foundation
9 |
10 | public enum ExceptionType {
11 | case IllegalArgumentException
12 | case IOException
13 | case XmlDeclaration
14 | case MalformedURLException
15 | case CloneNotSupportedException
16 | case SelectorParseException
17 | }
18 |
19 | public enum Exception: Error {
20 | case Error(type:ExceptionType, Message: String)
21 | }
22 |
--------------------------------------------------------------------------------
/Sources/FormElement.swift:
--------------------------------------------------------------------------------
1 | //
2 | // FormElement.swift
3 | // SwifSoup
4 | //
5 | // Created by Nabil Chatbi on 29/09/16.
6 | //
7 |
8 | import Foundation
9 |
10 | /**
11 | * A HTML Form Element provides ready access to the form fields/controls that are associated with it. It also allows a
12 | * form to easily be submitted.
13 | */
14 | public class FormElement: Element {
15 | private let _elements: Elements = Elements()
16 |
17 | /**
18 | * Create a new, standalone form element.
19 | *
20 | * @param tag tag of this element
21 | * @param baseUri the base URI
22 | * @param attributes initial attributes
23 | */
24 | public override init(_ tag: Tag, _ baseUri: [UInt8], _ attributes: Attributes) {
25 | super.init(tag, baseUri, attributes)
26 | }
27 |
28 | /**
29 | * Create a new, standalone form element.
30 | *
31 | * @param tag tag of this element
32 | * @param baseUri the base URI
33 | */
34 | public override init(_ tag: Tag, _ baseUri: [UInt8]) {
35 | super.init(tag, baseUri)
36 | }
37 |
38 | /**
39 | * Get the list of form control elements associated with this form.
40 | * @return form controls associated with this element.
41 | */
42 | public func elements() -> Elements {
43 | return _elements
44 | }
45 |
46 | /**
47 | * Add a form control element to this form.
48 | * @param element form control to add
49 | * @return this form element, for chaining
50 | */
51 | @discardableResult
52 | public func addElement(_ element: Element) -> FormElement {
53 | _elements.add(element)
54 | return self
55 | }
56 |
57 | //todo:
58 | /**
59 | * Prepare to submit this form. A Connection object is created with the request set up from the form values. You
60 | * can then set up other options (like user-agent, timeout, cookies), then execute it.
61 | * @return a connection prepared from the values of this form.
62 | * @throws IllegalArgumentException if the form's absolute action URL cannot be determined. Make sure you pass the
63 | * document's base URI when parsing.
64 | */
65 | // public func submit()throws->Connection {
66 | // let action: String = hasAttr("action") ? try absUrl("action") : try baseUri()
67 | // Validate.notEmpty(action, "Could not determine a form action URL for submit. Ensure you set a base URI when parsing.")
68 | // Connection.Method method = attr("method").toUpperCase().equals("POST") ?
69 | // Connection.Method.POST : Connection.Method.GET
70 | //
71 | // return Jsoup.connect(action)
72 | // .data(formData())
73 | // .method(method)
74 | // }
75 |
76 | //todo:
77 | /**
78 | * Get the data that this form submits. The returned list is a copy of the data, and changes to the contents of the
79 | * list will not be reflected in the DOM.
80 | * @return a list of key vals
81 | */
82 | // public List formData() {
83 | // ArrayList data = new ArrayList();
84 | //
85 | // // iterate the form control elements and accumulate their values
86 | // for (Element el: elements) {
87 | // if (!el.tag().isFormSubmittable()) continue; // contents are form listable, superset of submitable
88 | // if (el.hasAttr("disabled")) continue; // skip disabled form inputs
89 | // String name = el.attr("name");
90 | // if (name.length() == 0) continue;
91 | // String type = el.attr("type");
92 | //
93 | // if ("select".equals(el.tagName())) {
94 | // Elements options = el.select("option[selected]");
95 | // boolean set = false;
96 | // for (Element option: options) {
97 | // data.add(HttpConnection.KeyVal.create(name, option.val()));
98 | // set = true;
99 | // }
100 | // if (!set) {
101 | // Element option = el.select("option").first();
102 | // if (option != null)
103 | // data.add(HttpConnection.KeyVal.create(name, option.val()));
104 | // }
105 | // } else if ("checkbox".equalsIgnoreCase(type) || "radio".equalsIgnoreCase(type)) {
106 | // // only add checkbox or radio if they have the checked attribute
107 | // if (el.hasAttr("checked")) {
108 | // final String val = el.val().length() > 0 ? el.val() : "on";
109 | // data.add(HttpConnection.KeyVal.create(name, val));
110 | // }
111 | // } else {
112 | // data.add(HttpConnection.KeyVal.create(name, el.val()));
113 | // }
114 | // }
115 | // return data;
116 | // }
117 |
118 | public override func copy(with zone: NSZone? = nil) -> Any {
119 | let clone = FormElement(_tag, baseUri!, attributes!)
120 | return copy(clone: clone)
121 | }
122 |
123 | public override func copy(parent: Node?) -> Node {
124 | let clone = FormElement(_tag, baseUri!, attributes!)
125 | return copy(clone: clone, parent: parent)
126 | }
127 | public override func copy(clone: Node, parent: Node?) -> Node {
128 | let clone = clone as! FormElement
129 | for att in _elements.array() {
130 | clone._elements.add(att)
131 | }
132 | return super.copy(clone: clone, parent: parent)
133 | }
134 | }
135 |
--------------------------------------------------------------------------------
/Sources/HttpStatusException.swift:
--------------------------------------------------------------------------------
1 | //
2 | // HttpStatusException.swift
3 | // SwifSoup
4 | //
5 | // Created by Nabil Chatbi on 29/09/16.
6 | //
7 |
8 | import Foundation
9 | //TODO:
10 |
--------------------------------------------------------------------------------
/Sources/Mutex.swift:
--------------------------------------------------------------------------------
1 | //
2 | // Mutex.swift
3 | // SwiftSoup
4 | //
5 | // Created by xukun on 2022/3/31.
6 | //
7 |
8 | import Foundation
9 |
10 | #if os(Windows)
11 | import WinSDK
12 | #endif
13 |
14 | final class Mutex: NSLocking {
15 | #if os(Windows)
16 | private var mutex = CRITICAL_SECTION()
17 |
18 | init() {
19 | InitializeCriticalSection(&mutex)
20 | }
21 |
22 | deinit {
23 | DeleteCriticalSection(&mutex)
24 | }
25 |
26 | func lock() {
27 | EnterCriticalSection(&mutex)
28 | }
29 |
30 | func unlock() {
31 | LeaveCriticalSection(&mutex)
32 | }
33 | #else
34 | private var mutex = pthread_mutex_t()
35 |
36 | init() {
37 | pthread_mutex_init(&mutex, nil)
38 | }
39 |
40 | deinit {
41 | pthread_mutex_destroy(&mutex)
42 | }
43 |
44 | func lock() {
45 | pthread_mutex_lock(&mutex)
46 | }
47 |
48 | func unlock() {
49 | pthread_mutex_unlock(&mutex)
50 | }
51 | #endif
52 | }
53 |
--------------------------------------------------------------------------------
/Sources/NodeTraversor.swift:
--------------------------------------------------------------------------------
1 | //
2 | // NodeTraversor.swift
3 | // SwiftSoup
4 | //
5 | // Created by Nabil Chatbi on 17/10/16.
6 | //
7 |
8 | import Foundation
9 |
10 | open class NodeTraversor {
11 | private let visitor: NodeVisitor
12 |
13 | /**
14 | * Create a new traversor.
15 | * @param visitor a class implementing the {@link NodeVisitor} interface, to be called when visiting each node.
16 | */
17 | public init(_ visitor: NodeVisitor) {
18 | self.visitor = visitor
19 | }
20 |
21 | /**
22 | * Start a depth-first traverse of the root and all of its descendants.
23 | * @param root the root node point to traverse.
24 | */
25 | open func traverse(_ root: Node?) throws {
26 | var node: Node? = root
27 | var depth: Int = 0
28 |
29 | while (node != nil) {
30 | try visitor.head(node!, depth)
31 | if node!.hasChildNodes() {
32 | node = node!.childNode(0)
33 | depth += 1
34 | } else {
35 | while !node!.hasNextSibling() && depth > 0 {
36 | let parent = node!.getParentNode()
37 | try visitor.tail(node!, depth)
38 | node = parent
39 | depth -= 1
40 | }
41 | let nextSib = node!.nextSibling()
42 | try visitor.tail(node!, depth)
43 | if node === root {
44 | break
45 | }
46 | node = nextSib
47 | }
48 | }
49 | }
50 | }
51 |
--------------------------------------------------------------------------------
/Sources/NodeVisitor.swift:
--------------------------------------------------------------------------------
1 | //
2 | // NodeVisitor.swift
3 | // SwiftSoup
4 | //
5 | // Created by Nabil Chatbi on 16/10/16.
6 | //
7 |
8 | import Foundation
9 |
10 | /**
11 | * Node visitor interface. Provide an implementing class to {@link NodeTraversor} to iterate through nodes.
12 | *
13 | * This interface provides two methods, {@code head} and {@code tail}. The head method is called when the node is first
14 | * seen, and the tail method when all of the node's children have been visited. As an example, head can be used to
15 | * create a start tag for a node, and tail to create the end tag.
16 | *
17 | */
18 | public protocol NodeVisitor {
19 | /**
20 | * Callback for when a node is first visited. {@code head} cannot safely call {@code node.remove()}.
21 | *
22 | * @param node the node being visited.
23 | * @param depth the depth of the node, relative to the root node. E.g., the root node has depth 0, and a child node
24 | * of that will have depth 1.
25 | */
26 | func head(_ node: Node, _ depth: Int) throws
27 |
28 | /**
29 | * Callback for when a node is last visited, after all of its descendants have been visited. {@code tail} can safely call {@code node.remove()}.
30 | *
31 | * @param node the node being visited.
32 | * @param depth the depth of the node, relative to the root node. E.g., the root node has depth 0, and a child node
33 | * of that will have depth 1.
34 | */
35 | func tail(_ node: Node, _ depth: Int) throws
36 | }
37 |
--------------------------------------------------------------------------------
/Sources/ParseError.swift:
--------------------------------------------------------------------------------
1 | //
2 | // ParseError.swift
3 | // SwiftSoup
4 | //
5 | // Created by Nabil Chatbi on 19/10/16.
6 | //
7 |
8 | import Foundation
9 |
10 | /**
11 | * A Parse Error records an error in the input HTML that occurs in either the tokenisation or the tree building phase.
12 | */
13 | open class ParseError {
14 | private let pos: Int
15 | private let errorMsg: String
16 |
17 | init(_ pos: Int, _ errorMsg: String) {
18 | self.pos = pos
19 | self.errorMsg = errorMsg
20 | }
21 |
22 | /**
23 | * Retrieve the error message.
24 | * @return the error message.
25 | */
26 | open func getErrorMessage() -> String {
27 | return errorMsg
28 | }
29 |
30 | /**
31 | * Retrieves the offset of the error.
32 | * @return error offset within input
33 | */
34 | open func getPosition() -> Int {
35 | return pos
36 | }
37 |
38 | open func toString() -> String {
39 | return "\(pos): " + errorMsg
40 | }
41 | }
42 |
--------------------------------------------------------------------------------
/Sources/ParseErrorList.swift:
--------------------------------------------------------------------------------
1 | //
2 | // ParseErrorList.swift
3 | // SwiftSoup
4 | //
5 | // Created by Nabil Chatbi on 19/10/16.
6 | //
7 |
8 | import Foundation
9 |
10 | public class ParseErrorList {
11 | private static let INITIAL_CAPACITY: Int = 16
12 | private let maxSize: Int
13 | private let initialCapacity: Int
14 | private var array: Array = Array()
15 |
16 | init(_ initialCapacity: Int, _ maxSize: Int) {
17 | self.maxSize = maxSize
18 | self.initialCapacity = initialCapacity
19 | array = Array(repeating: nil, count: maxSize)
20 | }
21 |
22 | func canAddError() -> Bool {
23 | return array.count < maxSize
24 | }
25 |
26 | func getMaxSize() -> Int {
27 | return maxSize
28 | }
29 |
30 | static func noTracking() -> ParseErrorList {
31 | return ParseErrorList(0, 0)
32 | }
33 |
34 | static func tracking(_ maxSize: Int) -> ParseErrorList {
35 | return ParseErrorList(INITIAL_CAPACITY, maxSize)
36 | }
37 |
38 | // // you need to provide the Equatable functionality
39 | // static func ==(leftFoo: Foo, rightFoo: Foo) -> Bool {
40 | // return ObjectIdentifier(leftFoo) == ObjectIdentifier(rightFoo)
41 | // }
42 |
43 | open func add(_ e: ParseError) {
44 | array.append(e)
45 | }
46 |
47 | open func add(_ index: Int, _ element: ParseError) {
48 | array.insert(element, at: index)
49 | }
50 |
51 | }
52 |
--------------------------------------------------------------------------------
/Sources/ParseSettings.swift:
--------------------------------------------------------------------------------
1 | //
2 | // ParseSettings.swift
3 | // SwiftSoup
4 | //
5 | // Created by Nabil Chatbi on 14/10/16.
6 | //
7 |
8 | import Foundation
9 |
10 | open class ParseSettings {
11 | /**
12 | * HTML default settings: both tag and attribute names are lower-cased during parsing.
13 | */
14 | public static let htmlDefault: ParseSettings = ParseSettings(false, false)
15 | /**
16 | * Preserve both tag and attribute case.
17 | */
18 | public static let preserveCase: ParseSettings = ParseSettings(true, true)
19 |
20 | private let preserveTagCase: Bool
21 | private let preserveAttributeCase: Bool
22 |
23 | /**
24 | * Define parse settings.
25 | * @param tag preserve tag case?
26 | * @param attribute preserve attribute name case?
27 | */
28 | public init(_ tag: Bool, _ attribute: Bool) {
29 | preserveTagCase = tag
30 | preserveAttributeCase = attribute
31 | }
32 |
33 | open func normalizeTag(_ name: [UInt8]) -> [UInt8] {
34 | var name = name.trim()
35 | if (!preserveTagCase) {
36 | name = name.lowercased()
37 | }
38 | return name
39 | }
40 |
41 | open func normalizeTag(_ name: String) -> String {
42 | return String(decoding: normalizeTag(name.utf8Array), as: UTF8.self)
43 | }
44 |
45 | open func normalizeAttribute(_ name: [UInt8]) -> [UInt8] {
46 | var name = name.trim()
47 | if (!preserveAttributeCase) {
48 | name = name.lowercased()
49 | }
50 | return name
51 | }
52 |
53 | open func normalizeAttribute(_ name: String) -> String {
54 | return String(decoding: normalizeAttribute(name.utf8Array), as: UTF8.self)
55 | }
56 |
57 | open func normalizeAttributes(_ attributes: Attributes) throws -> Attributes {
58 | if (!preserveAttributeCase) {
59 | attributes.lowercaseAllKeys()
60 | }
61 | return attributes
62 | }
63 |
64 | }
65 |
--------------------------------------------------------------------------------
/Sources/ParsingStrings.swift:
--------------------------------------------------------------------------------
1 | import Foundation
2 |
3 | @inline(__always)
4 | func setBit(in mask: inout (UInt64, UInt64, UInt64, UInt64), forByte b: UInt8) {
5 | let idx = Int(b >> 6)
6 | let shift = b & 63
7 | switch idx {
8 | case 0: mask.0 |= (1 << shift)
9 | case 1: mask.1 |= (1 << shift)
10 | case 2: mask.2 |= (1 << shift)
11 | default: mask.3 |= (1 << shift)
12 | }
13 | }
14 |
15 | @inline(__always)
16 | public func testBit(_ mask: (UInt64, UInt64, UInt64, UInt64), _ b: UInt8) -> Bool {
17 | let idx = Int(b >> 6)
18 | let shift = b & 63
19 | let val: UInt64
20 | switch idx {
21 | case 0: val = mask.0
22 | case 1: val = mask.1
23 | case 2: val = mask.2
24 | default: val = mask.3
25 | }
26 | return (val & (1 << shift)) != 0
27 | }
28 |
29 | final class TrieNode {
30 | // For fastest lookup: a 256-element array for direct indexing by byte
31 | var children: [TrieNode?] = .init(repeating: nil, count: 256)
32 |
33 | // Mark that a path ending at this node represents a complete string
34 | var isTerminal: Bool = false
35 | }
36 |
37 | public struct ParsingStrings: Hashable, Equatable {
38 | let multiByteChars: [[UInt8]]
39 | let multiByteCharLengths: [Int]
40 | public let multiByteByteLookups: [(UInt64, UInt64, UInt64, UInt64)]
41 | public let multiByteSet: Set>
42 | public let multiByteByteLookupsCount: Int
43 | public var singleByteMask: (UInt64, UInt64, UInt64, UInt64) = (0, 0, 0, 0) // Precomputed set for single-byte lookups
44 | private let precomputedHash: Int
45 | private let root = TrieNode()
46 |
47 | public init(_ strings: [String]) {
48 | self.init(strings.map { $0.utf8Array })
49 | }
50 |
51 | public init(_ strings: [[UInt8]]) {
52 | multiByteChars = strings
53 | multiByteCharLengths = strings.map { $0.count }
54 | let maxLen = multiByteCharLengths.max() ?? 0
55 |
56 | var multiByteByteLookups: [(UInt64, UInt64, UInt64, UInt64)] = Array(repeating: (0,0,0,0), count: maxLen)
57 |
58 | for bytes in strings {
59 | guard !bytes.isEmpty else { continue }
60 |
61 | var current = root
62 | for b in bytes {
63 | if current.children[Int(b)] == nil {
64 | current.children[Int(b)] = TrieNode()
65 | }
66 | current = current.children[Int(b)]!
67 | }
68 | current.isTerminal = true
69 | }
70 |
71 | for char in multiByteChars {
72 | if char.count == 1 {
73 | setBit(in: &singleByteMask, forByte: char[0])
74 | }
75 | for (i, byte) in char.enumerated() {
76 | var mask = multiByteByteLookups[i]
77 | setBit(in: &mask, forByte: byte)
78 | multiByteByteLookups[i] = mask
79 | }
80 | }
81 | self.multiByteByteLookups = multiByteByteLookups
82 | multiByteByteLookupsCount = multiByteByteLookups.count
83 |
84 | multiByteSet = Set(multiByteChars.map { ArraySlice($0) })
85 | self.precomputedHash = Self.computeHash(
86 | multiByteChars: multiByteChars,
87 | multiByteByteLookups: multiByteByteLookups
88 | )
89 | }
90 |
91 | public init(_ strings: [UnicodeScalar]) {
92 | self.init(strings.map { Array($0.utf8) })
93 | }
94 |
95 | private static func computeHash(
96 | multiByteChars: [[UInt8]],
97 | multiByteByteLookups: [(UInt64, UInt64, UInt64, UInt64)]
98 | ) -> Int {
99 | var hasher = Hasher()
100 | for char in multiByteChars {
101 | hasher.combine(char.count)
102 | for b in char {
103 | hasher.combine(b)
104 | }
105 | }
106 | for mbb in multiByteByteLookups {
107 | hasher.combine(mbb.0)
108 | hasher.combine(mbb.1)
109 | hasher.combine(mbb.2)
110 | hasher.combine(mbb.3)
111 | }
112 | return hasher.finalize()
113 | }
114 |
115 | public static func ==(lhs: ParsingStrings, rhs: ParsingStrings) -> Bool {
116 | return lhs.multiByteChars == rhs.multiByteChars
117 | }
118 |
119 | public func hash(into hasher: inout Hasher) {
120 | hasher.combine(precomputedHash)
121 | }
122 |
123 | @inlinable
124 | public func contains(_ bytes: [UInt8]) -> Bool {
125 | return contains(ArraySlice(bytes))
126 | }
127 |
128 | @inlinable
129 | public func contains(_ slice: ArraySlice) -> Bool {
130 | var index = 0
131 | for byte in slice {
132 | if index >= multiByteByteLookupsCount || !testBit(multiByteByteLookups[index], byte) {
133 | return false
134 | }
135 | index &+= 1
136 | }
137 | return multiByteSet.contains(slice)
138 | }
139 |
140 | @inlinable
141 | public func contains(_ byte: UInt8) -> Bool {
142 | let idx = Int(byte >> 6)
143 | let shift = byte & 63
144 |
145 | // Pick which 64-bit in the tuple:
146 | let val: UInt64
147 | switch idx {
148 | case 0: val = singleByteMask.0
149 | case 1: val = singleByteMask.1
150 | case 2: val = singleByteMask.2
151 | default: val = singleByteMask.3
152 | }
153 |
154 | // If the corresponding bit is set, membership is true
155 | return (val & (1 << shift)) != 0
156 | }
157 |
158 | @inlinable
159 | public func contains(_ scalar: UnicodeScalar) -> Bool {
160 | // Fast path for ASCII
161 | if scalar.value < 0x80 {
162 | return contains(UInt8(scalar.value))
163 | }
164 |
165 | var buffer = [UInt8](repeating: 0, count: 4)
166 | var length = 0
167 | for b in scalar.utf8 {
168 | buffer[length] = b
169 | length &+= 1
170 | }
171 | let slice = buffer[..) -> Bool {
182 | // Early single-byte check
183 | if slice.count == 1 {
184 | return contains(slice.first!)
185 | }
186 |
187 | var current = root
188 | for b in slice {
189 | guard let child = current.children[Int(b)] else {
190 | return false
191 | }
192 | current = child
193 | }
194 | return current.isTerminal
195 | }
196 | }
197 |
--------------------------------------------------------------------------------
/Sources/Pattern.swift:
--------------------------------------------------------------------------------
1 | //
2 | // Regex.swift
3 | // SwifSoup
4 | //
5 | // Created by Nabil Chatbi on 08/10/16.
6 | //
7 |
8 | import Foundation
9 |
10 | public struct Pattern {
11 | public static let CASE_INSENSITIVE: Int = 0x02
12 | let pattern: String
13 |
14 | init(_ pattern: String) {
15 | self.pattern = pattern
16 | }
17 |
18 | static public func compile(_ s: String) -> Pattern {
19 | return Pattern(s)
20 | }
21 | static public func compile(_ s: String, _ op: Int) -> Pattern {
22 | return Pattern(s)
23 | }
24 |
25 | public func validate() throws {
26 | _ = try NSRegularExpression(pattern: self.pattern, options: [])
27 | }
28 |
29 | public func matcher(in text: String) -> Matcher {
30 | do {
31 | let regex = try NSRegularExpression(pattern: self.pattern, options: [])
32 | let nsString = NSString(string: text)
33 | let results = regex.matches(in: text, options: [], range: NSRange(location: 0, length: nsString.length))
34 |
35 | return Matcher(results, text)
36 | } catch let error {
37 | print("invalid regex: \(error.localizedDescription)")
38 | return Matcher([], text)
39 | }
40 | }
41 |
42 | public func toString() -> String {
43 | return pattern
44 | }
45 | }
46 |
47 | public class Matcher {
48 | let matches: [NSTextCheckingResult]
49 | let string: String
50 | var index: Int = -1
51 |
52 | public var count: Int { return matches.count}
53 |
54 | init(_ m: [NSTextCheckingResult], _ s: String) {
55 | matches = m
56 | string = s
57 | }
58 |
59 | @discardableResult
60 | public func find() -> Bool {
61 | index += 1
62 | if(index < matches.count) {
63 | return true
64 | }
65 | return false
66 | }
67 |
68 | public func group(_ i: Int) -> String? {
69 | let b = matches[index]
70 | #if !os(Linux) && !swift(>=4)
71 | let c = b.rangeAt(i)
72 | #else
73 | let c = b.range(at: i)
74 | #endif
75 |
76 | if(c.location == NSNotFound) {return nil}
77 | let result = string.substring(c.location, c.length)
78 | return result
79 | }
80 | public func group() -> String? {
81 | return group(0)
82 | }
83 | }
84 |
--------------------------------------------------------------------------------
/Sources/SerializationException.swift:
--------------------------------------------------------------------------------
1 | //
2 | // SerializationException.swift
3 | // SwifSoup
4 | //
5 | // Created by Nabil Chatbi on 29/09/16.
6 | //
7 |
8 | import Foundation
9 | //TODO:
10 |
--------------------------------------------------------------------------------
/Sources/SimpleDictionary.swift:
--------------------------------------------------------------------------------
1 | //
2 | // SimpleDictionary.swift
3 | // SwiftSoup
4 | //
5 | // Created by Nabil Chatbi on 30/10/16.
6 | //
7 |
8 | import Foundation
9 |
10 | public class SimpleDictionary {
11 |
12 | public typealias DictionaryType = [KeyType: ValueType]
13 | public private(set) var values = DictionaryType()
14 |
15 | public init() {
16 | }
17 |
18 | public var count: Int {
19 | return values.count
20 | }
21 |
22 | public func remove(_ key: KeyType) {
23 | values.removeValue(forKey: key)
24 | }
25 |
26 | public func contains(_ key: KeyType) -> Bool {
27 | return self.values[key] != nil
28 | }
29 |
30 | public func put(_ value: ValueType, forKey key: KeyType) {
31 | self.values[key] = value
32 | }
33 |
34 | public func get(_ key: KeyType) -> ValueType? {
35 | return self.values[key]
36 | }
37 |
38 | }
39 |
--------------------------------------------------------------------------------
/Sources/StreamReader.swift:
--------------------------------------------------------------------------------
1 | //
2 | // StreamReader.swift
3 | // SwifSoup
4 | //
5 | // Created by Nabil Chatbi on 08/10/16.
6 | //
7 |
8 | import Foundation
9 |
10 | class StreamReader {
11 |
12 | let encoding: String.Encoding
13 | let chunkSize: Int
14 | var fileHandle: FileHandle!
15 | let delimData: Data
16 | var buffer: Data
17 | var atEof: Bool
18 |
19 | init?(path: String, delimiter: String = "\n", encoding: String.Encoding = .utf8,
20 | chunkSize: Int = 4096) {
21 |
22 | guard let fileHandle = FileHandle(forReadingAtPath: path),
23 | let delimData = delimiter.data(using: encoding) else {
24 | return nil
25 | }
26 | self.encoding = encoding
27 | self.chunkSize = chunkSize
28 | self.fileHandle = fileHandle
29 | self.delimData = delimData
30 | self.buffer = Data(capacity: chunkSize)
31 | self.atEof = false
32 | }
33 |
34 | deinit {
35 | self.close()
36 | }
37 |
38 | /// Return next line, or nil on EOF.
39 | func nextLine() -> String? {
40 | precondition(fileHandle != nil, "Attempt to read from closed file")
41 |
42 | // Read data chunks from file until a line delimiter is found:
43 | while !atEof {
44 | if let range = buffer.range(of: delimData) {
45 | // Convert complete line (excluding the delimiter) to a string:
46 | let line = String(data: buffer.subdata(in: 0.. 0 {
53 | buffer.append(tmpData)
54 | } else {
55 | // EOF or read error.
56 | atEof = true
57 | if buffer.count > 0 {
58 | // Buffer contains last line in file (not terminated by delimiter).
59 | let line = String(data: buffer as Data, encoding: encoding)
60 | buffer.count = 0
61 | return line
62 | }
63 | }
64 | }
65 | return nil
66 | }
67 |
68 | /// Start reading from the beginning of file.
69 | func rewind() {
70 | fileHandle.seek(toFileOffset: 0)
71 | buffer.count = 0
72 | atEof = false
73 | }
74 |
75 | /// Close the underlying file. No reading must be done after calling this method.
76 | func close() {
77 | fileHandle?.closeFile()
78 | fileHandle = nil
79 | }
80 | }
81 |
82 | extension StreamReader: Sequence {
83 | func makeIterator() -> AnyIterator {
84 | return AnyIterator {
85 | return self.nextLine()
86 | }
87 | }
88 | }
89 |
--------------------------------------------------------------------------------
/Sources/StringBuilder.swift:
--------------------------------------------------------------------------------
1 | /**
2 | Supports creation of a String from pieces
3 | Based on https://gist.github.com/kristopherjohnson/1fc55e811d944a430289
4 | */
5 | open class StringBuilder {
6 | public var buffer: [UInt8] = []
7 |
8 | /**
9 | Construct with initial String contents
10 |
11 | :param: string Initial value; defaults to empty string
12 | */
13 | public init(string: String? = nil) {
14 | if let string, !string.isEmpty {
15 | buffer.append(contentsOf: string.utf8)
16 | }
17 | buffer.reserveCapacity(1024)
18 | }
19 |
20 | public init(_ size: Int) {
21 | buffer = Array()
22 | buffer.reserveCapacity(size)
23 | }
24 |
25 | /**
26 | Return the String object
27 |
28 | :return: String
29 | */
30 | open func toString() -> String {
31 | return String(decoding: buffer, as: UTF8.self)
32 | }
33 |
34 | /**
35 | Return the current length of the String object
36 | */
37 | open var xlength: Int {
38 | return buffer.count
39 | }
40 |
41 | open var isEmpty: Bool {
42 | return buffer.isEmpty
43 | }
44 |
45 | /**
46 | Append a String to the object
47 |
48 | :param: string String
49 |
50 | :return: reference to this StringBuilder instance
51 | */
52 | @inline(__always)
53 | @discardableResult
54 | open func append(_ string: String) -> StringBuilder {
55 | buffer.append(contentsOf: string.utf8)
56 | return self
57 | }
58 |
59 | @inline(__always)
60 | open func append(_ chr: Character) {
61 | append(String(chr))
62 | }
63 |
64 | @inline(__always)
65 | open func appendCodePoints(_ chr: [Character]) {
66 | append(String(chr))
67 | }
68 |
69 | @inline(__always)
70 | open func appendCodePoint(_ ch: Int) {
71 | appendCodePoint(UnicodeScalar(ch)!)
72 | }
73 |
74 | @inlinable
75 | open func appendCodePoint(_ ch: UnicodeScalar) {
76 | let val = ch.value
77 | if val < 0x80 {
78 | // 1-byte ASCII
79 | buffer.append(UInt8(val))
80 | } else if val < 0x800 {
81 | // 2-byte sequence
82 | buffer.append(contentsOf: [
83 | UInt8(0xC0 | (val >> 6)),
84 | UInt8(0x80 | (val & 0x3F))
85 | ])
86 | } else if val < 0x10000 {
87 | // 3-byte sequence
88 | buffer.append(contentsOf: [
89 | UInt8(0xE0 | (val >> 12)),
90 | UInt8(0x80 | ((val >> 6) & 0x3F)),
91 | UInt8(0x80 | (val & 0x3F))
92 | ])
93 | } else {
94 | // 4-byte sequence
95 | buffer.append(contentsOf: [
96 | UInt8(0xF0 | (val >> 18)),
97 | UInt8(0x80 | ((val >> 12) & 0x3F)),
98 | UInt8(0x80 | ((val >> 6) & 0x3F)),
99 | UInt8(0x80 | (val & 0x3F))
100 | ])
101 | }
102 | }
103 |
104 | @inlinable
105 | open func appendCodePoints(_ chr: [UnicodeScalar]) {
106 | for chr in chr {
107 | appendCodePoint(chr)
108 | }
109 | }
110 |
111 | /**
112 | Append a Printable to the object
113 |
114 | :param: value a value supporting the Printable protocol
115 |
116 | :return: reference to this StringBuilder instance
117 | */
118 | // @discardableResult
119 | // open func append(_ value: T) -> StringBuilder {
120 | // append(value.description)
121 | // return self
122 | // }
123 |
124 | @inlinable
125 | @discardableResult
126 | open func append(_ value: ArraySlice) -> StringBuilder {
127 | buffer.append(contentsOf: value)
128 | return self
129 | }
130 |
131 | @inlinable
132 | @discardableResult
133 | open func append(_ value: [UInt8]) -> StringBuilder {
134 | buffer.append(contentsOf: value)
135 | return self
136 | }
137 |
138 | @inlinable
139 | @discardableResult
140 | open func append(_ value: UInt8) -> StringBuilder {
141 | buffer.append(value)
142 | return self
143 | }
144 |
145 | @inlinable
146 | @discardableResult
147 | open func append(_ value: UnicodeScalar) -> StringBuilder {
148 | appendCodePoint(value)
149 | return self
150 | }
151 |
152 | /**
153 | Append a String and a newline to the object
154 |
155 | :param: string String
156 |
157 | :return: reference to this StringBuilder instance
158 | */
159 | @discardableResult
160 | open func appendLine(_ string: String) -> StringBuilder {
161 | append(string)
162 | append("\n")
163 | return self
164 | }
165 |
166 | /**
167 | Append a Printable and a newline to the object
168 |
169 | :param: value a value supporting the Printable protocol
170 |
171 | :return: reference to this StringBuilder instance
172 | */
173 | @discardableResult
174 | open func appendLine(_ value: T) -> StringBuilder {
175 | append(value.description)
176 | append("\n")
177 | return self
178 | }
179 |
180 | /**
181 | Reset the object to an empty string
182 |
183 | :return: reference to this StringBuilder instance
184 | */
185 | @discardableResult
186 | @inlinable
187 | open func clear() -> StringBuilder {
188 | buffer.removeAll(keepingCapacity: true)
189 | return self
190 | }
191 | }
192 |
193 | /**
194 | Append a String to a StringBuilder using operator syntax
195 |
196 | :param: lhs StringBuilder
197 | :param: rhs String
198 | */
199 | @inlinable
200 | public func += (lhs: StringBuilder, rhs: String) {
201 | lhs.append(rhs)
202 | }
203 |
204 | /**
205 | Append a Printable to a StringBuilder using operator syntax
206 |
207 | :param: lhs Printable
208 | :param: rhs String
209 | */
210 | @inlinable
211 | public func += (lhs: StringBuilder, rhs: T) {
212 | lhs.append(rhs.description)
213 | }
214 |
215 | /**
216 | Create a StringBuilder by concatenating the values of two StringBuilders
217 |
218 | :param: lhs first StringBuilder
219 | :param: rhs second StringBuilder
220 |
221 | :result StringBuilder
222 | */
223 | @inlinable
224 | public func +(lhs: StringBuilder, rhs: StringBuilder) -> StringBuilder {
225 | return StringBuilder(string: lhs.toString() + rhs.toString())
226 | }
227 |
--------------------------------------------------------------------------------
/Sources/StructuralEvaluator.swift:
--------------------------------------------------------------------------------
1 | //
2 | // StructuralEvaluator.swift
3 | // SwiftSoup
4 | //
5 | // Created by Nabil Chatbi on 23/10/16.
6 | //
7 |
8 | import Foundation
9 |
10 | /**
11 | * Base structural evaluator.
12 | */
13 | public class StructuralEvaluator: Evaluator {
14 | let evaluator: Evaluator
15 |
16 | public init(_ evaluator: Evaluator) {
17 | self.evaluator = evaluator
18 | }
19 |
20 | public class Root: Evaluator {
21 | public override func matches(_ root: Element, _ element: Element) -> Bool {
22 | return root === element
23 | }
24 | }
25 |
26 | public class Has: StructuralEvaluator {
27 | public override init(_ evaluator: Evaluator) {
28 | super.init(evaluator)
29 | }
30 |
31 | public override func matches(_ root: Element, _ element: Element)throws->Bool {
32 | for e in try element.getAllElements().array() {
33 | do {
34 | if(e != element) {
35 | if ((try evaluator.matches(root, e))) {
36 | return true
37 | }
38 | }
39 | } catch {}
40 | }
41 |
42 | return false
43 | }
44 |
45 | public override func toString() -> String {
46 | return ":has(\(evaluator.toString()))"
47 | }
48 | }
49 |
50 | public class Not: StructuralEvaluator {
51 | public override init(_ evaluator: Evaluator) {
52 | super.init(evaluator)
53 | }
54 |
55 | public override func matches(_ root: Element, _ node: Element) -> Bool {
56 | do {
57 | return try !evaluator.matches(root, node)
58 | } catch {}
59 | return false
60 | }
61 |
62 | public override func toString() -> String {
63 | return ":not\(evaluator.toString())"
64 | }
65 | }
66 |
67 | public class Parent: StructuralEvaluator {
68 | public override init(_ evaluator: Evaluator) {
69 | super.init(evaluator)
70 | }
71 |
72 | public override func matches(_ root: Element, _ element: Element) -> Bool {
73 | if (root == element) {
74 | return false
75 | }
76 |
77 | var parent = element.parent()
78 | while (true) {
79 | do {
80 | if let p = parent, try evaluator.matches(root, p) {
81 | return true
82 | }
83 | } catch {}
84 |
85 | if (parent == root) {
86 | break
87 | }
88 | parent = parent?.parent()
89 | }
90 | return false
91 | }
92 |
93 | public override func toString() -> String {
94 | return ":parent\(evaluator.toString())"
95 | }
96 | }
97 |
98 | public class ImmediateParent: StructuralEvaluator {
99 | public override init(_ evaluator: Evaluator) {
100 | super.init(evaluator)
101 | }
102 |
103 | public override func matches(_ root: Element, _ element: Element) -> Bool {
104 | if (root == element) {
105 | return false
106 | }
107 |
108 | if let parent = element.parent() {
109 | do {
110 | return try evaluator.matches(root, parent)
111 | } catch {}
112 | }
113 |
114 | return false
115 | }
116 |
117 | public override func toString() -> String {
118 | return ":ImmediateParent\(evaluator.toString())"
119 | }
120 | }
121 |
122 | public class PreviousSibling: StructuralEvaluator {
123 | public override init(_ evaluator: Evaluator) {
124 | super.init(evaluator)
125 | }
126 |
127 | public override func matches(_ root: Element, _ element: Element)throws->Bool {
128 | if (root == element) {
129 | return false
130 | }
131 |
132 | var prev = try element.previousElementSibling()
133 |
134 | while (prev != nil) {
135 | do {
136 | if (try evaluator.matches(root, prev!)) {
137 | return true
138 | }
139 | } catch {}
140 |
141 | prev = try prev!.previousElementSibling()
142 | }
143 | return false
144 | }
145 |
146 | public override func toString() -> String {
147 | return ":prev*\(evaluator.toString())"
148 | }
149 | }
150 |
151 | class ImmediatePreviousSibling: StructuralEvaluator {
152 | public override init(_ evaluator: Evaluator) {
153 | super.init(evaluator)
154 | }
155 |
156 | public override func matches(_ root: Element, _ element: Element)throws->Bool {
157 | if (root == element) {
158 | return false
159 | }
160 |
161 | if let prev = try element.previousElementSibling() {
162 | do {
163 | return try evaluator.matches(root, prev)
164 | } catch {}
165 | }
166 | return false
167 | }
168 |
169 | public override func toString() -> String {
170 | return ":prev\(evaluator.toString())"
171 | }
172 | }
173 | }
174 |
--------------------------------------------------------------------------------
/Sources/SwiftSoup.h:
--------------------------------------------------------------------------------
1 | //
2 | // SwiftSoup.h
3 | // SwiftSoup
4 | //
5 | // Created by Nabil Chatbi on 09/10/16.
6 | //
7 |
8 |
9 |
10 |
--------------------------------------------------------------------------------
/Sources/TreeBuilder.swift:
--------------------------------------------------------------------------------
1 | //
2 | // TreeBuilder.swift
3 | // SwiftSoup
4 | //
5 | // Created by Nabil Chatbi on 24/10/16.
6 | //
7 |
8 | import Foundation
9 |
10 | public class TreeBuilder {
11 | public var reader: CharacterReader
12 | var tokeniser: Tokeniser
13 | public var doc: Document // current doc we are building into
14 | public var stack: Array // the stack of open elements
15 | public var baseUri: [UInt8] // current base uri, for creating new elements
16 | public var currentToken: Token? // currentToken is used only for error tracking.
17 | public var errors: ParseErrorList // null when not tracking errors
18 | public var settings: ParseSettings
19 |
20 | private let start: Token.StartTag = Token.StartTag() // start tag to process
21 | private let end: Token.EndTag = Token.EndTag()
22 |
23 | public func defaultSettings() -> ParseSettings {preconditionFailure("This method must be overridden")}
24 |
25 | public init() {
26 | doc = Document([])
27 | reader = CharacterReader([])
28 | tokeniser = Tokeniser(reader, nil)
29 | stack = Array()
30 | baseUri = []
31 | errors = ParseErrorList(0, 0)
32 | settings = ParseSettings(false, false)
33 | }
34 |
35 | public func initialiseParse(_ input: [UInt8], _ baseUri: [UInt8], _ errors: ParseErrorList, _ settings: ParseSettings) {
36 | doc = Document(baseUri)
37 | self.settings = settings
38 | reader = CharacterReader(input)
39 | self.errors = errors
40 | tokeniser = Tokeniser(reader, errors)
41 | stack = Array()
42 | self.baseUri = baseUri
43 | }
44 |
45 | func parse(_ input: [UInt8], _ baseUri: [UInt8], _ errors: ParseErrorList, _ settings: ParseSettings)throws->Document {
46 | initialiseParse(input, baseUri, errors, settings)
47 | try runParser()
48 | return doc
49 | }
50 |
51 | public func runParser() throws {
52 | while (true) {
53 | let token: Token = try tokeniser.read()
54 | try process(token)
55 | token.reset()
56 |
57 | if (token.type == Token.TokenType.EOF) {
58 | break
59 | }
60 | }
61 | }
62 |
63 | @discardableResult
64 | public func process(_ token: Token)throws->Bool {preconditionFailure("This method must be overridden")}
65 |
66 | @discardableResult
67 | public func processStartTag(_ name: [UInt8]) throws -> Bool {
68 | if (currentToken === start) { // don't recycle an in-use token
69 | return try process(Token.StartTag().name(name))
70 | }
71 | return try process(start.reset().name(name))
72 | }
73 |
74 | @discardableResult
75 | public func processStartTag(_ name: String) throws -> Bool {
76 | return try processStartTag(name.utf8Array)
77 | }
78 |
79 | @discardableResult
80 | public func processStartTag(_ name: [UInt8], _ attrs: Attributes) throws -> Bool {
81 | if (currentToken === start) { // don't recycle an in-use token
82 | return try process(Token.StartTag().nameAttr(name, attrs))
83 | }
84 | start.reset()
85 | start.nameAttr(name, attrs)
86 | return try process(start)
87 | }
88 |
89 | @discardableResult
90 | public func processStartTag(_ name: String, _ attrs: Attributes) throws -> Bool {
91 | return try processStartTag(name.utf8Array, attrs)
92 | }
93 |
94 | @discardableResult
95 | public func processEndTag(_ name: [UInt8]) throws -> Bool {
96 | if (currentToken === end) { // don't recycle an in-use token
97 | return try process(Token.EndTag().name(name))
98 | }
99 |
100 | return try process(end.reset().name(name))
101 | }
102 |
103 | @discardableResult
104 | public func processEndTag(_ name: String) throws -> Bool {
105 | return try processEndTag(name.utf8Array)
106 | }
107 |
108 | public func currentElement() -> Element? {
109 | let size: Int = stack.count
110 | return size > 0 ? stack[size-1] : nil
111 | }
112 | }
113 |
--------------------------------------------------------------------------------
/Sources/UnfairLock.swift:
--------------------------------------------------------------------------------
1 | //
2 | // UnfairLock.swift
3 | // SwiftSoup
4 | //
5 | // Created by xukun on 2022/3/31.
6 | //
7 |
8 | import Foundation
9 |
10 | #if os(macOS) || os(iOS) || os(tvOS) || os(watchOS)
11 | @available(iOS 10.0, macOS 10.12, watchOS 3.0, tvOS 10.0, *)
12 | final class UnfairLock: NSLocking {
13 |
14 | private let unfairLock: UnsafeMutablePointer = {
15 | let pointer = UnsafeMutablePointer.allocate(capacity: 1)
16 | pointer.initialize(to: os_unfair_lock())
17 | return pointer
18 | }()
19 |
20 | deinit {
21 | unfairLock.deinitialize(count: 1)
22 | unfairLock.deallocate()
23 | }
24 |
25 | func lock() {
26 | os_unfair_lock_lock(unfairLock)
27 | }
28 |
29 | func tryLock() -> Bool {
30 | return os_unfair_lock_trylock(unfairLock)
31 | }
32 |
33 | func unlock() {
34 | os_unfair_lock_unlock(unfairLock)
35 | }
36 | }
37 | #endif
38 |
--------------------------------------------------------------------------------
/Sources/UnicodeScalar.swift:
--------------------------------------------------------------------------------
1 | //
2 | // UnicodeScalar.swift
3 | // SwiftSoup
4 | //
5 | // Created by Nabil Chatbi on 14/11/16.
6 | //
7 |
8 | import Foundation
9 |
10 | private let uppercaseSet = CharacterSet.uppercaseLetters
11 | private let lowercaseSet = CharacterSet.lowercaseLetters
12 | private let alphaSet = CharacterSet.letters
13 | private let alphaNumericSet = CharacterSet.alphanumerics
14 | private let symbolSet = CharacterSet.symbols
15 | private let digitSet = CharacterSet.decimalDigits
16 |
17 | extension UnicodeScalar {
18 | public static let Ampersand: UnicodeScalar = "&"
19 | public static let LessThan: UnicodeScalar = "<"
20 | public static let GreaterThan: UnicodeScalar = ">"
21 |
22 | public static let Space: UnicodeScalar = " "
23 | public static let BackslashF: UnicodeScalar = UnicodeScalar(12)
24 | public static let BackslashT: UnicodeScalar = "\t"
25 | public static let BackslashN: UnicodeScalar = "\n"
26 | public static let BackslashR: UnicodeScalar = "\r"
27 | public static let Slash: UnicodeScalar = "/"
28 |
29 | public static let FormFeed: UnicodeScalar = "\u{000B}"// Form Feed
30 | public static let VerticalTab: UnicodeScalar = "\u{000C}"// vertical tab
31 |
32 | func isMemberOfCharacterSet(_ set: CharacterSet) -> Bool {
33 | return set.contains(self)
34 | }
35 |
36 | /// True for any space character, and the control characters \t, \n, \r, \f, \v.
37 | var isWhitespace: Bool {
38 |
39 | switch self {
40 |
41 | case UnicodeScalar.Space, UnicodeScalar.BackslashT, UnicodeScalar.BackslashN, UnicodeScalar.BackslashR, UnicodeScalar.BackslashF: return true
42 |
43 | case UnicodeScalar.FormFeed, UnicodeScalar.VerticalTab: return true // Form Feed, vertical tab
44 |
45 | default: return false
46 |
47 | }
48 |
49 | }
50 |
51 | /// `true` if `self` normalized contains a single code unit that is in the categories of Uppercase and Titlecase Letters.
52 | var isUppercase: Bool {
53 | return isMemberOfCharacterSet(uppercaseSet)
54 | }
55 |
56 | /// `true` if `self` normalized contains a single code unit that is in the category of Lowercase Letters.
57 | var isLowercase: Bool {
58 | return isMemberOfCharacterSet(lowercaseSet)
59 |
60 | }
61 |
62 | var uppercase: UnicodeScalar {
63 | let str = String(self).uppercased()
64 | return str.unicodeScalar(0)
65 | }
66 | }
67 |
--------------------------------------------------------------------------------
/Sources/Validate.swift:
--------------------------------------------------------------------------------
1 | //
2 | // Validate.swift
3 | // SwifSoup
4 | //
5 | // Created by Nabil Chatbi on 02/10/16.
6 | //
7 |
8 | import Foundation
9 |
10 | public struct Validate {
11 |
12 | /**
13 | * Validates that the object is not null
14 | * @param obj object to test
15 | */
16 | public static func notNull(obj: Any?) throws {
17 | if (obj == nil) {
18 | throw Exception.Error(type: ExceptionType.IllegalArgumentException, Message: "Object must not be null")
19 | }
20 | }
21 |
22 | /**
23 | * Validates that the object is not null
24 | * @param obj object to test
25 | * @param msg message to output if validation fails
26 | */
27 | public static func notNull(obj: AnyObject?, msg: String) throws {
28 | if (obj == nil) {
29 | throw Exception.Error(type: ExceptionType.IllegalArgumentException, Message: msg)
30 | }
31 | }
32 |
33 | /**
34 | * Validates that the value is true
35 | * @param val object to test
36 | */
37 | public static func isTrue(val: Bool) throws {
38 | if (!val) {
39 | throw Exception.Error(type: ExceptionType.IllegalArgumentException, Message: "Must be true")
40 | }
41 | }
42 |
43 | /**
44 | * Validates that the value is true
45 | * @param val object to test
46 | * @param msg message to output if validation fails
47 | */
48 | public static func isTrue(val: Bool, msg: String) throws {
49 | if (!val) {
50 | throw Exception.Error(type: ExceptionType.IllegalArgumentException, Message: msg)
51 | }
52 | }
53 |
54 | /**
55 | * Validates that the value is false
56 | * @param val object to test
57 | */
58 | public static func isFalse(val: Bool) throws {
59 | if (val) {
60 | throw Exception.Error(type: ExceptionType.IllegalArgumentException, Message: "Must be false")
61 | }
62 | }
63 |
64 | /**
65 | * Validates that the value is false
66 | * @param val object to test
67 | * @param msg message to output if validation fails
68 | */
69 | public static func isFalse(val: Bool, msg: String) throws {
70 | if (val) {
71 | throw Exception.Error(type: ExceptionType.IllegalArgumentException, Message: msg)
72 | }
73 | }
74 |
75 | /**
76 | * Validates that the array contains no null elements
77 | * @param objects the array to test
78 | */
79 | public static func noNullElements(objects: [AnyObject?]) throws {
80 | try noNullElements(objects: objects, msg: "Array must not contain any null objects")
81 | }
82 |
83 | /**
84 | * Validates that the array contains no null elements
85 | * @param objects the array to test
86 | * @param msg message to output if validation fails
87 | */
88 | public static func noNullElements(objects: [AnyObject?], msg: String) throws {
89 | for obj in objects {
90 | if (obj == nil) {
91 | throw Exception.Error(type: ExceptionType.IllegalArgumentException, Message: msg)
92 | }
93 | }
94 | }
95 |
96 | /**
97 | * Validates that the string is not empty
98 | * @param string the string to test
99 | */
100 | public static func notEmpty(string: T?) throws where T.Element == UInt8 {
101 | if string?.isEmpty ?? true {
102 | throw Exception.Error(type: ExceptionType.IllegalArgumentException, Message: "String must not be empty")
103 | }
104 |
105 | }
106 |
107 | public static func notEmpty(string: String?) throws {
108 | try notEmpty(string: string?.utf8Array)
109 | }
110 |
111 | /**
112 | * Validates that the string is not empty
113 | * @param string the string to test
114 | * @param msg message to output if validation fails
115 | */
116 | public static func notEmpty(string: [UInt8]?, msg: String ) throws {
117 | if string?.isEmpty ?? true {
118 | throw Exception.Error(type: ExceptionType.IllegalArgumentException, Message: msg)
119 | }
120 | }
121 |
122 | public static func notEmpty(string: String?, msg: String) throws {
123 | try notEmpty(string: string?.utf8Array, msg: msg)
124 | }
125 |
126 | /**
127 | Cause a failure.
128 | @param msg message to output.
129 | */
130 | public static func fail(msg: String) throws {
131 | throw Exception.Error(type: ExceptionType.IllegalArgumentException, Message: msg)
132 | }
133 |
134 | /**
135 | Helper
136 | */
137 | public static func exception(msg: String) throws {
138 | throw Exception.Error(type: ExceptionType.IllegalArgumentException, Message: msg)
139 | }
140 | }
141 |
--------------------------------------------------------------------------------
/Sources/XmlDeclaration.swift:
--------------------------------------------------------------------------------
1 | //
2 | // XmlDeclaration.swift
3 | // SwifSoup
4 | //
5 | // Created by Nabil Chatbi on 29/09/16.
6 | //
7 |
8 | import Foundation
9 |
10 | /**
11 | An XML Declaration.
12 | */
13 | public class XmlDeclaration: Node {
14 | private let _name: [UInt8]
15 | private let isProcessingInstruction: Bool // [UInt8] {
34 | return nodeName().utf8Array
35 | }
36 |
37 | public override func nodeName() -> String {
38 | return "#declaration"
39 | }
40 |
41 | /**
42 | * Get the name of this declaration.
43 | * @return name of this declaration.
44 | */
45 | public func name() -> String {
46 | return String(decoding: _name, as: UTF8.self)
47 | }
48 |
49 | /**
50 | Get the unencoded XML declaration.
51 | @return XML declaration
52 | */
53 | public func getWholeDeclaration()throws->String {
54 | return try attributes!.html().trim() // attr html starts with a " "
55 | }
56 |
57 | override func outerHtmlHead(_ accum: StringBuilder, _ depth: Int, _ out: OutputSettings) {
58 | accum
59 | .append(UTF8Arrays.tagStart)
60 | .append(isProcessingInstruction ? "!" : "?")
61 | .append(_name)
62 | do {
63 | try attributes?.html(accum: accum, out: out)
64 | } catch {}
65 | accum
66 | .append(isProcessingInstruction ? "!" : "?")
67 | .append(UTF8Arrays.tagEnd)
68 | }
69 |
70 | override func outerHtmlTail(_ accum: StringBuilder, _ depth: Int, _ out: OutputSettings) {}
71 |
72 | public override func copy(with zone: NSZone? = nil) -> Any {
73 | let clone = XmlDeclaration(_name, baseUri!, isProcessingInstruction)
74 | return copy(clone: clone)
75 | }
76 |
77 | public override func copy(parent: Node?) -> Node {
78 | let clone = XmlDeclaration(_name, baseUri!, isProcessingInstruction)
79 | return copy(clone: clone, parent: parent)
80 | }
81 | public override func copy(clone: Node, parent: Node?) -> Node {
82 | return super.copy(clone: clone, parent: parent)
83 | }
84 | }
85 |
--------------------------------------------------------------------------------
/Sources/XmlTreeBuilder.swift:
--------------------------------------------------------------------------------
1 | //
2 | // XmlTreeBuilder.swift
3 | // SwiftSoup
4 | //
5 | // Created by Nabil Chatbi on 14/10/16.
6 | //
7 |
8 | import Foundation
9 |
10 | /**
11 | * Use the {@code XmlTreeBuilder} when you want to parse XML without any of the HTML DOM rules being applied to the
12 | * document.
13 | * Usage example: {@code Document xmlDoc = Jsoup.parse(html, baseUrl, Parser.xmlParser())}
14 | *
15 | */
16 | public class XmlTreeBuilder: TreeBuilder {
17 |
18 | public override init() {
19 | super.init()
20 | }
21 |
22 | public override func defaultSettings() -> ParseSettings {
23 | return ParseSettings.preserveCase
24 | }
25 |
26 | public func parse(_ input: [UInt8], _ baseUri: [UInt8]) throws -> Document {
27 | return try parse(input, baseUri, ParseErrorList.noTracking(), ParseSettings.preserveCase)
28 | }
29 |
30 | public func parse(_ input: String, _ baseUri: String) throws -> Document {
31 | return try parse(input.utf8Array, baseUri.utf8Array, ParseErrorList.noTracking(), ParseSettings.preserveCase)
32 | }
33 |
34 | override public func initialiseParse(_ input: [UInt8], _ baseUri: [UInt8], _ errors: ParseErrorList, _ settings: ParseSettings) {
35 | super.initialiseParse(input, baseUri, errors, settings)
36 | stack.append(doc) // place the document onto the stack. differs from HtmlTreeBuilder (not on stack)
37 | doc.outputSettings().syntax(syntax: OutputSettings.Syntax.xml)
38 | }
39 |
40 | override public func process(_ token: Token) throws -> Bool {
41 | // start tag, end tag, doctype, comment, character, eof
42 | switch (token.type) {
43 | case .StartTag:
44 | try insert(token.asStartTag())
45 | break
46 | case .EndTag:
47 | try popStackToClose(token.asEndTag())
48 | break
49 | case .Comment:
50 | try insert(token.asComment())
51 | break
52 | case .Char:
53 | try insert(token.asCharacter())
54 | break
55 | case .Doctype:
56 | try insert(token.asDoctype())
57 | break
58 | case .EOF: // could put some normalisation here if desired
59 | break
60 | // default:
61 | // try Validate.fail(msg: "Unexpected token type: " + token.tokenType())
62 | }
63 | return true
64 | }
65 |
66 | private func insertNode(_ node: Node)throws {
67 | try currentElement()?.appendChild(node)
68 | }
69 |
70 | @discardableResult
71 | func insert(_ startTag: Token.StartTag)throws->Element {
72 | let tag: Tag = try Tag.valueOf(startTag.name(), settings)
73 | // todo: wonder if for xml parsing, should treat all tags as unknown? because it's not html.
74 | let el: Element
75 | if let attributes = startTag._attributes {
76 | el = try Element(tag, baseUri, settings.normalizeAttributes(attributes))
77 | } else {
78 | el = Element(tag, baseUri)
79 | }
80 | try insertNode(el)
81 | if (startTag.isSelfClosing()) {
82 | tokeniser.acknowledgeSelfClosingFlag()
83 | if (!tag.isKnownTag()) // unknown tag, remember this is self closing for output. see above.
84 | {
85 | tag.setSelfClosing()
86 | }
87 | } else {
88 | stack.append(el)
89 | }
90 | return el
91 | }
92 |
93 | func insert(_ commentToken: Token.Comment)throws {
94 | let comment: Comment = Comment(commentToken.getData(), baseUri)
95 | var insert: Node = comment
96 | if (commentToken.bogus) { // xml declarations are emitted as bogus comments (which is right for html, but not xml)
97 | // so we do a bit of a hack and parse the data as an element to pull the attributes out
98 | let data: String = comment.getData()
99 | if (data.count > 1 && (data.startsWith("!") || data.startsWith("?"))) {
100 | let doc: Document = try SwiftSoup.parse("<" + data.substring(1, data.count - 2) + ">", String(decoding: baseUri, as: UTF8.self), Parser.xmlParser())
101 | let el: Element = doc.child(0)
102 | insert = XmlDeclaration(settings.normalizeTag(el.tagNameUTF8()), comment.getBaseUriUTF8(), data.startsWith("!"))
103 | insert.getAttributes()?.addAll(incoming: el.getAttributes())
104 | }
105 | }
106 | try insertNode(insert)
107 | }
108 |
109 | func insert(_ characterToken: Token.Char)throws {
110 | let node: Node = TextNode(characterToken.getData()!, baseUri)
111 | try insertNode(node)
112 | }
113 |
114 | func insert(_ d: Token.Doctype)throws {
115 | let doctypeNode = DocumentType(
116 | settings.normalizeTag(d.getName()),
117 | d.getPubSysKey(),
118 | d.getPublicIdentifier(),
119 | d.getSystemIdentifier(),
120 | baseUri
121 | )
122 | try insertNode(doctypeNode)
123 | }
124 |
125 | /**
126 | * If the stack contains an element with this tag's name, pop up the stack to remove the first occurrence. If not
127 | * found, skips.
128 | *
129 | * @param endTag
130 | */
131 | private func popStackToClose(_ endTag: Token.EndTag) throws {
132 | let elName: [UInt8] = try endTag.name()
133 | var firstFound: Element? = nil
134 |
135 | for pos in (0.. Array {
156 | initialiseParse(inputFragment, baseUri, errors, settings)
157 | try runParser()
158 | return doc.getChildNodes()
159 | }
160 | }
161 |
--------------------------------------------------------------------------------
/SwiftSoup.podspec:
--------------------------------------------------------------------------------
1 | Pod::Spec.new do |s|
2 | s.name = 'SwiftSoup'
3 | s.version = '2.7.6'
4 | s.summary = 'Swift HTML Parser / Reader, XML , with best of DOM, CSS, and jquery'
5 | s.description = <<-DESC
6 | SwiftSoup is a Swift library for working with real-world HTML. It provides a very convenient API for extracting and manipulating data, using the best of DOM, CSS, and jquery-like methods.
7 | DESC
8 |
9 | s.homepage = 'https://github.com/scinfu/SwiftSoup'
10 | s.license = { :type => 'MIT', :file => 'LICENSE' }
11 | s.author = { 'Nabil Chatbi' => 'scinfu@gmail.com' }
12 | s.source = { :git => 'https://github.com/scinfu/SwiftSoup.git', :tag => s.version.to_s }
13 | s.social_media_url = 'https://twitter.com/scinfu'
14 |
15 | s.ios.deployment_target = '13.0'
16 | s.osx.deployment_target = '10.15'
17 | s.watchos.deployment_target = '6.0'
18 | s.tvos.deployment_target = '13.0'
19 |
20 | s.source_files = 'Sources/**/*.swift'
21 | s.swift_versions = ['4.0', '4.2', '5.0', '5.1']
22 | end
23 |
--------------------------------------------------------------------------------
/SwiftSoup.xcodeproj/project.xcworkspace/contents.xcworkspacedata:
--------------------------------------------------------------------------------
1 |
2 |
4 |
6 |
7 |
--------------------------------------------------------------------------------
/SwiftSoup.xcodeproj/project.xcworkspace/xcshareddata/IDEWorkspaceChecks.plist:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | IDEDidComputeMac32BitWarning
6 |
7 |
8 |
9 |
--------------------------------------------------------------------------------
/SwiftSoup.xcodeproj/xcshareddata/xcbaselines/8CE4181E1DAA54A900240B42.xcbaseline/9F80FB9B-4E37-45A8-BFE8-9AF36737A6F3.plist:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | classNames
6 |
7 | SwiftSoupTests
8 |
9 | testPerformanceDiv()
10 |
11 | com.apple.XCTPerformanceMetric_WallClockTime
12 |
13 | baselineAverage
14 | 36.859
15 | baselineIntegrationDisplayName
16 | Local Baseline
17 |
18 |
19 |
20 |
21 |
22 |
23 |
--------------------------------------------------------------------------------
/SwiftSoup.xcodeproj/xcshareddata/xcbaselines/8CE4181E1DAA54A900240B42.xcbaseline/F9553B46-8F24-4C2B-8A1E-8CC5535D12E1.plist:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | classNames
6 |
7 | CssTest
8 |
9 | testPerformanceExample()
10 |
11 | com.apple.XCTPerformanceMetric_WallClockTime
12 |
13 | baselineAverage
14 | 1.2831e-06
15 | baselineIntegrationDisplayName
16 | Local Baseline
17 |
18 |
19 |
20 |
21 |
22 |
23 |
--------------------------------------------------------------------------------
/SwiftSoup.xcodeproj/xcshareddata/xcbaselines/8CE4181E1DAA54A900240B42.xcbaseline/Info.plist:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | runDestinationsByUUID
6 |
7 | 9F80FB9B-4E37-45A8-BFE8-9AF36737A6F3
8 |
9 | localComputer
10 |
11 | busSpeedInMHz
12 | 100
13 | cpuCount
14 | 1
15 | cpuKind
16 | Intel Core i7
17 | cpuSpeedInMHz
18 | 3100
19 | logicalCPUCoresPerPackage
20 | 4
21 | modelCode
22 | MacBookPro12,1
23 | physicalCPUCoresPerPackage
24 | 2
25 | platformIdentifier
26 | com.apple.platform.macosx
27 |
28 | targetArchitecture
29 | x86_64
30 | targetDevice
31 |
32 | modelCode
33 | iPhone7,2
34 | platformIdentifier
35 | com.apple.platform.iphonesimulator
36 |
37 |
38 | F9553B46-8F24-4C2B-8A1E-8CC5535D12E1
39 |
40 | localComputer
41 |
42 | busSpeedInMHz
43 | 100
44 | cpuCount
45 | 1
46 | cpuKind
47 | Intel Core i7
48 | cpuSpeedInMHz
49 | 3100
50 | logicalCPUCoresPerPackage
51 | 4
52 | modelCode
53 | MacBookPro12,1
54 | physicalCPUCoresPerPackage
55 | 2
56 | platformIdentifier
57 | com.apple.platform.macosx
58 |
59 | targetArchitecture
60 | i386
61 | targetDevice
62 |
63 | modelCode
64 | iPhone5,1
65 | platformIdentifier
66 | com.apple.platform.iphonesimulator
67 |
68 |
69 |
70 |
71 |
72 |
--------------------------------------------------------------------------------
/SwiftSoup.xcodeproj/xcshareddata/xcschemes/PerformanceTest.xcscheme:
--------------------------------------------------------------------------------
1 |
2 |
5 |
8 |
9 |
15 |
21 |
22 |
23 |
29 |
35 |
36 |
37 |
38 |
39 |
44 |
45 |
47 |
53 |
54 |
55 |
56 |
57 |
63 |
64 |
65 |
66 |
67 |
68 |
78 |
79 |
85 |
86 |
87 |
88 |
89 |
90 |
96 |
97 |
103 |
104 |
105 |
106 |
108 |
109 |
112 |
113 |
114 |
--------------------------------------------------------------------------------
/SwiftSoup.xcodeproj/xcshareddata/xcschemes/SwiftSoup-Package.xcscheme:
--------------------------------------------------------------------------------
1 |
2 |
5 |
8 |
9 |
15 |
21 |
22 |
23 |
24 |
25 |
30 |
31 |
33 |
39 |
40 |
41 |
42 |
43 |
53 |
54 |
60 |
61 |
62 |
63 |
69 |
70 |
72 |
73 |
76 |
77 |
78 |
--------------------------------------------------------------------------------
/SwiftSoup.xcodeproj/xcshareddata/xcschemes/SwiftSoup-iOS.xcscheme:
--------------------------------------------------------------------------------
1 |
2 |
5 |
8 |
9 |
15 |
21 |
22 |
23 |
29 |
35 |
36 |
37 |
38 |
39 |
44 |
45 |
51 |
52 |
53 |
54 |
56 |
62 |
63 |
64 |
65 |
66 |
76 |
77 |
83 |
84 |
85 |
86 |
92 |
93 |
99 |
100 |
101 |
102 |
104 |
105 |
108 |
109 |
110 |
--------------------------------------------------------------------------------
/SwiftSoup.xcodeproj/xcshareddata/xcschemes/SwiftSoup-macOS.xcscheme:
--------------------------------------------------------------------------------
1 |
2 |
5 |
8 |
9 |
15 |
21 |
22 |
23 |
24 |
25 |
30 |
31 |
37 |
38 |
39 |
40 |
41 |
42 |
52 |
53 |
59 |
60 |
61 |
62 |
68 |
69 |
71 |
72 |
75 |
76 |
77 |
--------------------------------------------------------------------------------
/SwiftSoup.xcodeproj/xcshareddata/xcschemes/SwiftSoup-tvOS.xcscheme:
--------------------------------------------------------------------------------
1 |
2 |
5 |
8 |
9 |
15 |
21 |
22 |
23 |
24 |
25 |
30 |
31 |
32 |
33 |
43 |
44 |
50 |
51 |
52 |
53 |
59 |
60 |
66 |
67 |
68 |
69 |
71 |
72 |
75 |
76 |
77 |
--------------------------------------------------------------------------------
/SwiftSoup.xcodeproj/xcshareddata/xcschemes/SwiftSoup-watchOS.xcscheme:
--------------------------------------------------------------------------------
1 |
2 |
5 |
8 |
9 |
15 |
21 |
22 |
23 |
24 |
25 |
30 |
31 |
32 |
33 |
43 |
44 |
50 |
51 |
52 |
53 |
59 |
60 |
66 |
67 |
68 |
69 |
71 |
72 |
75 |
76 |
77 |
--------------------------------------------------------------------------------
/Tests-macOS/Info.plist:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | CFBundleDevelopmentRegion
6 | $(DEVELOPMENT_LANGUAGE)
7 | CFBundleExecutable
8 | $(EXECUTABLE_NAME)
9 | CFBundleIdentifier
10 | $(PRODUCT_BUNDLE_IDENTIFIER)
11 | CFBundleInfoDictionaryVersion
12 | 6.0
13 | CFBundleName
14 | $(PRODUCT_NAME)
15 | CFBundlePackageType
16 | BNDL
17 | CFBundleShortVersionString
18 | 1.0
19 | CFBundleVersion
20 | 1
21 |
22 |
23 |
--------------------------------------------------------------------------------
/Tests-macOS/ParserBenchmark.swift:
--------------------------------------------------------------------------------
1 | //
2 | // ParserBenchmark.swift
3 | // SwiftSoupTests
4 | //
5 | // Created by garth on 2/26/19.
6 | //
7 |
8 | import XCTest
9 | import SwiftSoup
10 |
11 | class ParserBenchmark: XCTestCase {
12 |
13 | enum Const {
14 | static var corpusHTMLData: [String] = []
15 | static let repetitions = 5
16 | }
17 |
18 | override func setUp() {
19 | let bundle = Bundle(for: type(of: self))
20 | let urls = bundle.urls(forResourcesWithExtension: ".html", subdirectory: nil)
21 | Const.corpusHTMLData = urls!.compactMap { try? Data(contentsOf: $0) }.map { String(decoding: $0, as: UTF8.self) }
22 | }
23 |
24 | func testParserPerformance() throws {
25 | var count = 0
26 | measure {
27 | for htmlDoc in Const.corpusHTMLData {
28 | for _ in 1...Const.repetitions {
29 | do {
30 | let _ = try SwiftSoup.parse(htmlDoc)
31 | count += 1
32 | } catch {
33 | XCTFail("Exception while parsing HTML")
34 | }
35 | }
36 | }
37 | print("Did \(count) iterations")
38 | }
39 | }
40 |
41 | }
42 |
--------------------------------------------------------------------------------
/Tests/Info.plist:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | CFBundleDevelopmentRegion
6 | en
7 | CFBundleExecutable
8 | $(EXECUTABLE_NAME)
9 | CFBundleIdentifier
10 | $(PRODUCT_BUNDLE_IDENTIFIER)
11 | CFBundleInfoDictionaryVersion
12 | 6.0
13 | CFBundleName
14 | $(PRODUCT_NAME)
15 | CFBundlePackageType
16 | BNDL
17 | CFBundleShortVersionString
18 | 1.0
19 | CFBundleVersion
20 | 1
21 |
22 |
23 |
--------------------------------------------------------------------------------
/Tests/LinuxMain.swift:
--------------------------------------------------------------------------------
1 | //
2 | // LinuxMain.swift
3 | // SwiftSoup
4 | //
5 | // Created by Nabil Chatbi on 20/12/16.
6 | //
7 |
8 | import XCTest
9 | @testable import SwiftSoupTests
10 |
11 | XCTMain([
12 | testCase(CssTest.allTests),
13 | testCase(ElementsTest.allTests),
14 | testCase(QueryParserTest.allTests),
15 | testCase(SelectorTest.allTests),
16 | testCase(AttributeParseTest.allTests),
17 | testCase(CharacterReaderTest.allTests),
18 | testCase(HtmlParserTest.allTests),
19 | testCase(ParseSettingsTest.allTests),
20 | testCase(TagTest.allTests),
21 | testCase(TokenQueueTest.allTests),
22 | testCase(XmlTreeBuilderTest.allTests),
23 | testCase(FormElementTest.allTests),
24 | testCase(ElementTest.allTests),
25 | testCase(EntitiesTest.allTests),
26 | testCase(DocumentTypeTest.allTests),
27 | testCase(TextNodeTest.allTests),
28 | testCase(DocumentTest.allTests),
29 | testCase(AttributesTest.allTests),
30 | testCase(NodeTest.allTests),
31 | testCase(AttributeTest.allTests),
32 | testCase(CleanerTest.allTests),
33 | testCase(StringUtilTest.allTests)
34 | ])
35 |
--------------------------------------------------------------------------------
/Tests/SwiftSoupTests/AttributeParseTest.swift:
--------------------------------------------------------------------------------
1 | //
2 | // AttributeParseTest.swift
3 | // SwiftSoup
4 | //
5 | // Created by Nabil Chatbi on 10/11/16.
6 | //
7 | /**
8 | Test suite for attribute parser.
9 | */
10 |
11 | import XCTest
12 | import SwiftSoup
13 |
14 | class AttributeParseTest: XCTestCase {
15 |
16 | func testLinuxTestSuiteIncludesAllTests() {
17 | #if os(macOS) || os(iOS) || os(tvOS) || os(watchOS)
18 | let thisClass = type(of: self)
19 | let linuxCount = thisClass.allTests.count
20 | let darwinCount = Int(thisClass.defaultTestSuite.testCaseCount)
21 | XCTAssertEqual(linuxCount, darwinCount, "\(darwinCount - linuxCount) tests are missing from allTests")
22 | #endif
23 | }
24 |
25 | func testparsesRoughAttributeString() throws {
26 | let html: String = ""
27 | // should be: , , , , ,
28 |
29 | let el: Element = try SwiftSoup.parse(html).getElementsByTag("a").get(0)
30 | let attr: Attributes = el.getAttributes()!
31 | XCTAssertEqual(7, attr.size())
32 | XCTAssertEqual("123", attr.get(key: "id"))
33 | XCTAssertEqual("baz = 'bar'", attr.get(key: "class"))
34 | XCTAssertEqual("border: 2px", attr.get(key: "style"))
35 | XCTAssertEqual("", attr.get(key: "qux"))
36 | XCTAssertEqual("", attr.get(key: "zim"))
37 | XCTAssertEqual("12", attr.get(key: "foo"))
38 | XCTAssertEqual("18", attr.get(key: "mux"))
39 | }
40 |
41 | func testhandlesNewLinesAndReturns() throws {
42 | let html: String = "One"
43 | let el: Element = try SwiftSoup.parse(html).select("a").first()!
44 | XCTAssertEqual(2, el.getAttributes()?.size())
45 | XCTAssertEqual("bar\r\nqux", try el.attr("foo")) // currently preserves newlines in quoted attributes. todo confirm if should.
46 | XCTAssertEqual("two", try el.attr("bar"))
47 | }
48 |
49 | func testparsesEmptyString() throws {
50 | let html: String = ""
51 | let el: Element = try SwiftSoup.parse(html).getElementsByTag("a").get(0)
52 | let attr: Attributes = el.getAttributes()!
53 | XCTAssertEqual(0, attr.size())
54 | }
55 |
56 | func testcanStartWithEq() throws {
57 | let html: String = ""
58 | let el: Element = try SwiftSoup.parse(html).getElementsByTag("a").get(0)
59 | let attr: Attributes = el.getAttributes()!
60 | XCTAssertEqual(1, attr.size())
61 | XCTAssertTrue(attr.hasKey(key: "=empty"))
62 | XCTAssertEqual("", attr.get(key: "=empty"))
63 | }
64 |
65 | func teststrictAttributeUnescapes() throws {
66 | let html: String = "One Two"
67 | let els: Elements = try SwiftSoup.parse(html).select("a")
68 | XCTAssertEqual("?foo=bar&mid<=true", try els.first()!.attr("href"))
69 | XCTAssertEqual("?foo=bar"
80 | let el: Element = try SwiftSoup.parse(html).select("a").first()!
81 |
82 | XCTAssertEqual("123", try el.attr("normal"))
83 | XCTAssertEqual("", try el.attr("boolean"))
84 | XCTAssertEqual("", try el.attr("empty"))
85 |
86 | let attributes: Array
= el.getAttributes()!.asList()
87 | XCTAssertEqual(3, attributes.count, "There should be 3 attribute present")
88 |
89 | // Assuming the list order always follows the parsed html
90 | XCTAssertFalse((attributes[0] as? BooleanAttribute) != nil, "'normal' attribute should not be boolean")
91 | XCTAssertTrue((attributes[1] as? BooleanAttribute) != nil, "'boolean' attribute should be boolean")
92 | XCTAssertFalse((attributes[2] as? BooleanAttribute) != nil, "'empty' attribute should not be boolean")
93 |
94 | XCTAssertEqual(html, try el.outerHtml())
95 | }
96 |
97 | func testretainsSlashFromAttributeName() throws {
98 | let html: String = "
"
99 | var doc: Document = try SwiftSoup.parse(html)
100 | XCTAssertTrue(try doc.select("img[onerror]").size() != 0, "SelfClosingStartTag ignores last character")
101 | XCTAssertEqual("
", try doc.body()!.html())
102 |
103 | doc = try SwiftSoup.parse(html, "", Parser.xmlParser())
104 | XCTAssertEqual("
", try doc.html())
105 | }
106 |
107 | static var allTests = {
108 | return [
109 | ("testLinuxTestSuiteIncludesAllTests", testLinuxTestSuiteIncludesAllTests),
110 | ("testparsesRoughAttributeString", testparsesRoughAttributeString),
111 | ("testhandlesNewLinesAndReturns", testhandlesNewLinesAndReturns),
112 | ("testparsesEmptyString", testparsesEmptyString),
113 | ("testcanStartWithEq", testcanStartWithEq),
114 | ("teststrictAttributeUnescapes", teststrictAttributeUnescapes),
115 | ("testmoreAttributeUnescapes", testmoreAttributeUnescapes),
116 | ("testparsesBooleanAttributes", testparsesBooleanAttributes),
117 | ("testretainsSlashFromAttributeName", testretainsSlashFromAttributeName)
118 | ]
119 | }()
120 |
121 | }
122 |
--------------------------------------------------------------------------------
/Tests/SwiftSoupTests/AttributeTest.swift:
--------------------------------------------------------------------------------
1 | //
2 | // AttributeTest.swift
3 | // SwifSoup
4 | //
5 | // Created by Nabil Chatbi on 07/10/16.
6 | //
7 |
8 | import XCTest
9 | @testable import SwiftSoup
10 | class AttributeTest: XCTestCase {
11 |
12 | func testLinuxTestSuiteIncludesAllTests() {
13 | #if os(macOS) || os(iOS) || os(tvOS) || os(watchOS)
14 | let thisClass = type(of: self)
15 | let linuxCount = thisClass.allTests.count
16 | let darwinCount = Int(thisClass.defaultTestSuite.testCaseCount)
17 | XCTAssertEqual(linuxCount, darwinCount, "\(darwinCount - linuxCount) tests are missing from allTests")
18 | #endif
19 | }
20 |
21 | func testHtml() throws {
22 | let attr = try Attribute(key: "key", value: "value &")
23 | XCTAssertEqual("key=\"value &\"", attr.html())
24 | XCTAssertEqual(attr.html(), attr.toString())
25 | }
26 |
27 | func testWithSupplementaryCharacterInAttributeKeyAndValue() throws {
28 | let string = "135361"
29 | let attr = try Attribute(key: string, value: "A" + string + "B")
30 | XCTAssertEqual(string + "=\"A" + string + "B\"", attr.html())
31 | XCTAssertEqual(attr.html(), attr.toString())
32 | }
33 |
34 | func testRemoveCaseSensitive() throws {
35 | let atteibute: Attributes = Attributes()
36 | try atteibute.put("Tot", "a&p")
37 | try atteibute.put("tot", "one")
38 | try atteibute.put("Hello", "There")
39 | try atteibute.put("hello", "There")
40 | try atteibute.put("data-name", "Jsoup")
41 |
42 | XCTAssertEqual(5, atteibute.size())
43 | try atteibute.remove(key: "Tot")
44 | try atteibute.remove(key: "Hello")
45 | XCTAssertEqual(3, atteibute.size())
46 | XCTAssertTrue(atteibute.hasKey(key: "tot"))
47 | XCTAssertFalse(atteibute.hasKey(key: "Tot"))
48 | }
49 |
50 | static var allTests = {
51 | return [
52 | ("testLinuxTestSuiteIncludesAllTests", testLinuxTestSuiteIncludesAllTests),
53 | ("testHtml", testHtml),
54 | ("testWithSupplementaryCharacterInAttributeKeyAndValue", testWithSupplementaryCharacterInAttributeKeyAndValue),
55 | ("testRemoveCaseSensitive", testRemoveCaseSensitive)
56 | ]
57 | }()
58 |
59 | }
60 |
--------------------------------------------------------------------------------
/Tests/SwiftSoupTests/AttributesTest.swift:
--------------------------------------------------------------------------------
1 | //
2 | // AttributesTest.swift
3 | // SwiftSoup
4 | //
5 | // Created by Nabil Chatbi on 29/10/16.
6 | //
7 |
8 | import XCTest
9 | import SwiftSoup
10 |
11 | class AttributesTest: XCTestCase {
12 |
13 | func testLinuxTestSuiteIncludesAllTests() {
14 | #if os(macOS) || os(iOS) || os(tvOS) || os(watchOS)
15 | let thisClass = type(of: self)
16 | let linuxCount = thisClass.allTests.count
17 | let darwinCount = Int(thisClass.defaultTestSuite.testCaseCount)
18 | XCTAssertEqual(linuxCount, darwinCount, "\(darwinCount - linuxCount) tests are missing from allTests")
19 | #endif
20 | }
21 |
22 | func testHtml() {
23 | let a: Attributes = Attributes()
24 | do {
25 | try a.put("Tot", "a&p")
26 | try a.put("Hello", "There")
27 | try a.put("data-name", "Jsoup")
28 | } catch {}
29 |
30 | XCTAssertEqual(3, a.size())
31 | XCTAssertTrue(a.hasKey(key: "Tot"))
32 | XCTAssertTrue(a.hasKey(key: "Hello"))
33 | XCTAssertTrue(a.hasKey(key: "data-name"))
34 | XCTAssertFalse(a.hasKey(key: "tot"))
35 | XCTAssertTrue(a.hasKeyIgnoreCase(key: "tot"))
36 | XCTAssertEqual("There", try a.getIgnoreCase(key: "hEllo"))
37 |
38 | XCTAssertEqual(1, a.dataset().count)
39 | XCTAssertEqual("Jsoup", a.dataset()["name"])
40 | XCTAssertEqual("", a.get(key: "tot"))
41 | XCTAssertEqual("a&p", a.get(key: "Tot"))
42 | XCTAssertEqual("a&p", try a.getIgnoreCase(key: "tot"))
43 |
44 | XCTAssertEqual(" Tot=\"a&p\" Hello=\"There\" data-name=\"Jsoup\"", try a.html())
45 | XCTAssertEqual(try a.html(), try a.toString())
46 | }
47 | //todo: se serve
48 | // func testIteratorRemovable() {
49 | // let a = Attributes()
50 | // do{
51 | // try a.put("Tot", "a&p")
52 | // try a.put("Hello", "There")
53 | // try a.put("data-name", "Jsoup")
54 | // }catch{}
55 | //
56 | // var iterator = a.iterator()
57 | //
58 | // iterator.next()
59 | // iterator.dropFirst()
60 | // XCTAssertEqual(2, a.size())
61 | // }
62 |
63 | func testIterator() {
64 | let a: Attributes = Attributes()
65 | let datas: [[String]] = [["Tot", "raul"], ["Hello", "pismuth"], ["data-name", "Jsoup"]]
66 |
67 | for atts in datas {
68 | try! a.put(atts[0], atts[1])
69 | }
70 |
71 | let iterator = a.makeIterator()
72 | XCTAssertTrue(iterator.next() != nil)
73 | var i = 0
74 | for attribute in a {
75 | XCTAssertEqual(datas[i][0], attribute.getKey())
76 | XCTAssertEqual(datas[i][1], attribute.getValue())
77 | i += 1
78 | }
79 | XCTAssertEqual(datas.count, i)
80 | }
81 |
82 | func testIteratorEmpty() {
83 | let a = Attributes()
84 |
85 | let iterator = a.makeIterator()
86 | XCTAssertNil(iterator.next())
87 | }
88 |
89 | static var allTests = {
90 | return [
91 | ("testLinuxTestSuiteIncludesAllTests", testLinuxTestSuiteIncludesAllTests),
92 | ("testHtml", testHtml),
93 | ("testIterator", testIterator),
94 | ("testIteratorEmpty", testIteratorEmpty)
95 | ]
96 | }()
97 | }
98 |
--------------------------------------------------------------------------------
/Tests/SwiftSoupTests/BuildEntities.swift:
--------------------------------------------------------------------------------
1 | //
2 | // BuildEntities.swift
3 | // SwiftSoup
4 | //
5 | // Created by Nabil Chatbi on 31/10/16.
6 | //
7 |
8 | import Foundation
9 | //todo:
10 |
--------------------------------------------------------------------------------
/Tests/SwiftSoupTests/DocumentTypeTest.swift:
--------------------------------------------------------------------------------
1 | //
2 | // DocumentTypeTest.swift
3 | // SwiftSoup
4 | //
5 | // Created by Nabil Chatbi on 06/11/16.
6 | //
7 |
8 | import XCTest
9 | import SwiftSoup
10 |
11 | class DocumentTypeTest: XCTestCase {
12 |
13 | func testLinuxTestSuiteIncludesAllTests() {
14 | #if os(macOS) || os(iOS) || os(tvOS) || os(watchOS)
15 | let thisClass = type(of: self)
16 | let linuxCount = thisClass.allTests.count
17 | let darwinCount = Int(thisClass.defaultTestSuite.testCaseCount)
18 | XCTAssertEqual(linuxCount, darwinCount, "\(darwinCount - linuxCount) tests are missing from allTests")
19 | #endif
20 | }
21 |
22 | func testConstructorValidationOkWithBlankName() {
23 | let fail: DocumentType? = DocumentType("", "", "", "")
24 | XCTAssertTrue(fail != nil)
25 | }
26 |
27 | func testConstructorValidationThrowsExceptionOnNulls() {
28 | let fail: DocumentType? = DocumentType("html", "", "", "")
29 | XCTAssertTrue(fail != nil)
30 | }
31 |
32 | func testConstructorValidationOkWithBlankPublicAndSystemIds() {
33 | let fail: DocumentType? = DocumentType("html", "", "", "")
34 | XCTAssertTrue(fail != nil)
35 | }
36 |
37 | func testOuterHtmlGeneration() {
38 | let html5 = DocumentType("html", "", "", "")
39 | XCTAssertEqual("", try! html5.outerHtml())
40 |
41 | let publicDocType = DocumentType("html", "-//IETF//DTD HTML//", "", "")
42 | XCTAssertEqual("", try! publicDocType.outerHtml())
43 |
44 | let systemDocType = DocumentType("html", "", "http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd", "")
45 | XCTAssertEqual("", try! systemDocType.outerHtml())
46 |
47 | let combo = DocumentType("notHtml", "--public", "--system", "")
48 | XCTAssertEqual("", try! combo.outerHtml())
49 | }
50 |
51 | static var allTests = {
52 | return [
53 | ("testLinuxTestSuiteIncludesAllTests", testLinuxTestSuiteIncludesAllTests),
54 | ("testConstructorValidationOkWithBlankName", testConstructorValidationOkWithBlankName),
55 | ("testConstructorValidationThrowsExceptionOnNulls", testConstructorValidationThrowsExceptionOnNulls),
56 | ("testConstructorValidationOkWithBlankPublicAndSystemIds", testConstructorValidationOkWithBlankPublicAndSystemIds),
57 | ("testOuterHtmlGeneration", testOuterHtmlGeneration)
58 | ]
59 | }()
60 | }
61 |
--------------------------------------------------------------------------------
/Tests/SwiftSoupTests/EntitiesTest.swift:
--------------------------------------------------------------------------------
1 | //
2 | // EntitiesTest.swift
3 | // SwiftSoup
4 | //
5 | // Created by Nabil Chatbi on 09/10/16.
6 | //
7 |
8 | import Foundation
9 | import XCTest
10 | import SwiftSoup
11 |
12 | class EntitiesTest: XCTestCase {
13 |
14 | func testLinuxTestSuiteIncludesAllTests() {
15 | #if os(macOS) || os(iOS) || os(tvOS) || os(watchOS)
16 | let thisClass = type(of: self)
17 | let linuxCount = thisClass.allTests.count
18 | let darwinCount = Int(thisClass.defaultTestSuite.testCaseCount)
19 | XCTAssertEqual(linuxCount, darwinCount, "\(darwinCount - linuxCount) tests are missing from allTests")
20 | #endif
21 | }
22 |
23 | func testEscape() throws {
24 | let text = "Hello &<> Å å π 新 there ¾ © »"
25 |
26 | let escapedAscii = Entities.escape(text, OutputSettings().encoder(String.Encoding.ascii).escapeMode(Entities.EscapeMode.base))
27 | let escapedAsciiFull = Entities.escape(text, OutputSettings().charset(String.Encoding.ascii).escapeMode(Entities.EscapeMode.extended))
28 | let escapedAsciiXhtml = Entities.escape(text, OutputSettings().charset(String.Encoding.ascii).escapeMode(Entities.EscapeMode.xhtml))
29 | let escapedUtfFull = Entities.escape(text, OutputSettings().charset(String.Encoding.utf8).escapeMode(Entities.EscapeMode.extended))
30 | let escapedUtfFull2 = Entities.escape(text)
31 | let escapedUtfMin = Entities.escape(text, OutputSettings().charset(String.Encoding.utf8).escapeMode(Entities.EscapeMode.xhtml))
32 |
33 | XCTAssertEqual("Hello &<> Å å π 新 there ¾ © »", escapedAscii)
34 | XCTAssertEqual("Hello &<> Å å π 新 there ¾ © »", escapedAsciiFull)
35 | XCTAssertEqual("Hello &<> Å å π 新 there ¾ © »", escapedAsciiXhtml)
36 | XCTAssertEqual("Hello &<> Å å π 新 there ¾ © »", escapedUtfFull)
37 | XCTAssertEqual("Hello &<> Å å π 新 there ¾ © »", escapedUtfFull2)
38 | XCTAssertEqual("Hello &<> Å å π 新 there ¾ © »", escapedUtfMin)
39 | // odd that it's defined as aring in base but angst in full
40 |
41 | // round trip
42 | XCTAssertEqual(text, try Entities.unescape(escapedAscii))
43 | XCTAssertEqual(text, try Entities.unescape(escapedAsciiFull))
44 | XCTAssertEqual(text, try Entities.unescape(escapedAsciiXhtml))
45 | XCTAssertEqual(text, try Entities.unescape(escapedUtfFull))
46 | XCTAssertEqual(text, try Entities.unescape(escapedUtfFull2))
47 | XCTAssertEqual(text, try Entities.unescape(escapedUtfMin))
48 | }
49 |
50 | func testXhtml() {
51 | //let text = "& > < "";
52 | XCTAssertEqual(UnicodeScalar(38), Entities.EscapeMode.xhtml.codepointForName("amp"))
53 | XCTAssertEqual(UnicodeScalar(62), Entities.EscapeMode.xhtml.codepointForName("gt"))
54 | XCTAssertEqual(UnicodeScalar(60), Entities.EscapeMode.xhtml.codepointForName("lt"))
55 | XCTAssertEqual(UnicodeScalar(34), Entities.EscapeMode.xhtml.codepointForName("quot"))
56 |
57 | XCTAssertEqual("amp", Entities.EscapeMode.xhtml.nameForCodepoint(UnicodeScalar(38)!))
58 | XCTAssertEqual("gt", Entities.EscapeMode.xhtml.nameForCodepoint(UnicodeScalar(62)!))
59 | XCTAssertEqual("lt", Entities.EscapeMode.xhtml.nameForCodepoint(UnicodeScalar(60)!))
60 | XCTAssertEqual("quot", Entities.EscapeMode.xhtml.nameForCodepoint(UnicodeScalar(34)!))
61 | }
62 |
63 | func testGetByName() {
64 | //XCTAssertEqual("≫⃒", Entities.getByName(name: "nGt"));//todo:nabil same codepoint 8811 in java but charachters different
65 | //XCTAssertEqual("fj", Entities.getByName(name: "fjlig"));
66 | XCTAssertEqual("≫", Entities.getByName(name: "gg"))
67 | XCTAssertEqual("©", Entities.getByName(name: "copy"))
68 | }
69 |
70 | func testEscapeSupplementaryCharacter() {
71 | let text: String = "𡃁"
72 | let escapedAscii: String = Entities.escape(text, OutputSettings().charset(.ascii).escapeMode(Entities.EscapeMode.base))
73 | XCTAssertEqual("𡃁", escapedAscii)
74 | let escapedUtf: String = Entities.escape(text, OutputSettings().charset(.utf8).escapeMode(Entities.EscapeMode.base))
75 | XCTAssertEqual(text, escapedUtf)
76 | }
77 |
78 | func testNotMissingMultis() throws {
79 | let text: String = "⫽⃥"
80 | let un: String = "\u{2AFD}\u{20E5}"
81 | XCTAssertEqual(un, try Entities.unescape(text))
82 | }
83 |
84 | func testnotMissingSupplementals() throws {
85 | let text: String = "⨔ 𝔮"
86 | let un: String = "⨔ 𝔮"//+"\u{D835}\u{DD2E}" // 𝔮
87 | XCTAssertEqual(un, try Entities.unescape(text))
88 | }
89 |
90 | func testUnescape() throws {
91 | let text: String = "Hello Æ &<> ® Å &angst π π 新 there &! ¾ © ©"
92 | XCTAssertEqual("Hello Æ &<> ® Å &angst π π 新 there &! ¾ © ©", try Entities.unescape(text))
93 |
94 | XCTAssertEqual("&0987654321; &unknown", try Entities.unescape("&0987654321; &unknown"))
95 | }
96 |
97 | func testStrictUnescape() throws { // for attributes, enforce strict unescaping (must look like xx; , not just xx)
98 | let text: String = "&a"
99 | // XCTAssertEqual("&a", try Entities.unescape(string: text, strict: true))
100 | // let text2: String = "&" // accepted as a "base" form entity, unlike "extended" forms
101 | // XCTAssertEqual("&", try Entities.unescape(string: text2, strict: true))
102 | let text3: String = "Hello &= &"
103 | XCTAssertEqual("Hello &= &", try Entities.unescape(string: text3, strict: true))
104 | XCTAssertEqual("Hello &= &", try Entities.unescape(text3))
105 | XCTAssertEqual("Hello &= &", try Entities.unescape(string: text3, strict: false))
106 | }
107 |
108 | func testCaseSensitive() throws {
109 | let unescaped: String = "Ü ü & &"
110 | XCTAssertEqual("Ü ü & &",
111 | Entities.escape(unescaped, OutputSettings().charset(.ascii).escapeMode(Entities.EscapeMode.extended)))
112 |
113 | let escaped: String = "Ü ü & &"
114 | XCTAssertEqual("Ü ü & &", try Entities.unescape(escaped))
115 | }
116 |
117 | func testQuoteReplacements() throws {
118 | let escaped: String = "\ $"
119 | let unescaped: String = "\\ $"
120 |
121 | XCTAssertEqual(unescaped, try Entities.unescape(escaped))
122 | }
123 |
124 | func testLetterDigitEntities() throws {
125 | let html: String = "¹²³¼½¾
"
126 | let doc: Document = try SwiftSoup.parse(html)
127 | doc.outputSettings().charset(.ascii)
128 | let p: Element = try doc.select("p").first()!
129 | XCTAssertEqual("¹²³¼½¾", try p.html())
130 | XCTAssertEqual("¹²³¼½¾", try p.text())
131 | doc.outputSettings().charset(.utf8)
132 | XCTAssertEqual("¹²³¼½¾", try p.html())
133 | }
134 |
135 | func testNoSpuriousDecodes() throws {
136 | let string: String = "http://www.foo.com?a=1&num_rooms=1&children=0&int=VA&b=2"
137 | XCTAssertEqual(string, try Entities.unescape(string))
138 | }
139 |
140 | func testUscapesGtInXmlAttributesButNotInHtml() throws {
141 | // https://github.com/jhy/jsoup/issues/528 - < is OK in HTML attribute values, but not in XML
142 |
143 | let docHtml: String = "One"
144 | let doc: Document = try SwiftSoup.parse(docHtml)
145 | let element: Element = try doc.select("a").first()!
146 |
147 | doc.outputSettings().escapeMode(Entities.EscapeMode.base)
148 | XCTAssertEqual("One\">One", try element.outerHtml())
149 |
150 | doc.outputSettings().escapeMode(Entities.EscapeMode.xhtml)
151 | XCTAssertEqual("One</p>\">One", try element.outerHtml())
152 | }
153 |
154 | static var allTests = {
155 | return [
156 | ("testLinuxTestSuiteIncludesAllTests", testLinuxTestSuiteIncludesAllTests),
157 | ("testEscape", testEscape),
158 | ("testXhtml", testXhtml),
159 | ("testGetByName", testGetByName),
160 | ("testEscapeSupplementaryCharacter", testEscapeSupplementaryCharacter),
161 | ("testNotMissingMultis", testNotMissingMultis),
162 | ("testnotMissingSupplementals", testnotMissingSupplementals),
163 | ("testUnescape", testUnescape),
164 | ("testStrictUnescape", testStrictUnescape),
165 | ("testCaseSensitive", testCaseSensitive),
166 | ("testQuoteReplacements", testQuoteReplacements),
167 | ("testLetterDigitEntities", testLetterDigitEntities),
168 | ("testNoSpuriousDecodes", testNoSpuriousDecodes),
169 | ("testUscapesGtInXmlAttributesButNotInHtml", testUscapesGtInXmlAttributesButNotInHtml)
170 | ]
171 | }()
172 | }
173 |
--------------------------------------------------------------------------------
/Tests/SwiftSoupTests/FormElementTest.swift:
--------------------------------------------------------------------------------
1 | //
2 | // FormElementTest.swift
3 | // SwiftSoup
4 | //
5 | // Created by Nabil Chatbi on 09/11/16.
6 | //
7 |
8 | import XCTest
9 | import SwiftSoup
10 |
11 | class FormElementTest: XCTestCase {
12 |
13 | func testLinuxTestSuiteIncludesAllTests() {
14 | #if os(macOS) || os(iOS) || os(tvOS) || os(watchOS)
15 | let thisClass = type(of: self)
16 | let linuxCount = thisClass.allTests.count
17 | let darwinCount = Int(thisClass.defaultTestSuite.testCaseCount)
18 | XCTAssertEqual(linuxCount, darwinCount, "\(darwinCount - linuxCount) tests are missing from allTests")
19 | #endif
20 | }
21 |
22 | func testHasAssociatedControls() throws {
23 | //"button", "fieldset", "input", "keygen", "object", "output", "select", "textarea"
24 | let html = "