├── .gitattributes ├── .github ├── FUNDING.yml └── workflows │ └── swift.yml ├── .gitignore ├── .swiftpm └── xcode │ └── package.xcworkspace │ └── contents.xcworkspacedata ├── .travis.yml ├── CHANGELOG.md ├── Example ├── .gitignore ├── Example.xcodeproj │ ├── project.pbxproj │ └── project.xcworkspace │ │ └── contents.xcworkspacedata ├── Example.xcworkspace │ └── contents.xcworkspacedata ├── Example │ ├── AppDelegate.swift │ ├── Assets.xcassets │ │ └── AppIcon.appiconset │ │ │ └── Contents.json │ ├── Base.lproj │ │ ├── LaunchScreen.storyboard │ │ └── Main.storyboard │ ├── Info.plist │ ├── QueryViewController.swift │ └── ViewController.swift ├── Podfile ├── img1.png └── img2.png ├── LICENSE ├── Package.swift ├── README.md ├── Resources ├── Info.plist ├── InfoMac.plist ├── InfoWatchOS.plist └── InfotvOS.plist ├── Sources ├── ArrayExt.swift ├── Attribute.swift ├── Attributes.swift ├── BinarySearch.swift ├── BooleanAttribute.swift ├── CharacterExt.swift ├── CharacterReader.swift ├── Cleaner.swift ├── Collector.swift ├── CombiningEvaluator.swift ├── Comment.swift ├── Connection.swift ├── CssSelector.swift ├── DataNode.swift ├── DataUtil.swift ├── Document.swift ├── DocumentType.swift ├── Element.swift ├── Elements.swift ├── Entities.swift ├── Evaluator.swift ├── Exception.swift ├── FormElement.swift ├── HtmlTreeBuilder.swift ├── HtmlTreeBuilderState.swift ├── HttpStatusException.swift ├── Mutex.swift ├── Node.swift ├── NodeTraversor.swift ├── NodeVisitor.swift ├── OrderedSet.swift ├── ParseError.swift ├── ParseErrorList.swift ├── ParseSettings.swift ├── Parser.swift ├── ParsingStrings.swift ├── Pattern.swift ├── QueryParser.swift ├── SerializationException.swift ├── SimpleDictionary.swift ├── StreamReader.swift ├── String.swift ├── StringBuilder.swift ├── StringUtil.swift ├── StructuralEvaluator.swift ├── SwiftSoup.h ├── SwiftSoup.swift ├── Tag.swift ├── TextNode.swift ├── Token.swift ├── TokenQueue.swift ├── Tokeniser.swift ├── TokeniserState.swift ├── TreeBuilder.swift ├── UTF8Arrays.swift ├── UnfairLock.swift ├── UnicodeScalar.swift ├── Validate.swift ├── Whitelist.swift ├── XmlDeclaration.swift └── XmlTreeBuilder.swift ├── SwiftSoup.podspec ├── SwiftSoup.xcodeproj ├── project.pbxproj ├── project.xcworkspace │ ├── contents.xcworkspacedata │ └── xcshareddata │ │ └── IDEWorkspaceChecks.plist └── xcshareddata │ ├── xcbaselines │ └── 8CE4181E1DAA54A900240B42.xcbaseline │ │ ├── 9F80FB9B-4E37-45A8-BFE8-9AF36737A6F3.plist │ │ ├── F9553B46-8F24-4C2B-8A1E-8CC5535D12E1.plist │ │ └── Info.plist │ └── xcschemes │ ├── PerformanceTest.xcscheme │ ├── SwiftSoup-Package.xcscheme │ ├── SwiftSoup-iOS.xcscheme │ ├── SwiftSoup-macOS.xcscheme │ ├── SwiftSoup-tvOS.xcscheme │ └── SwiftSoup-watchOS.xcscheme ├── Tests-macOS ├── Info.plist ├── ParserBenchmark.swift └── corpus │ ├── Amazon.html │ ├── GitHub.html │ ├── Google.html │ ├── Reuters.html │ ├── Wikipedia.html │ └── Wirecutter.html ├── Tests ├── Info.plist ├── LinuxMain.swift └── SwiftSoupTests │ ├── AttributeParseTest.swift │ ├── AttributeTest.swift │ ├── AttributesTest.swift │ ├── BuildEntities.swift │ ├── CharacterReaderTest.swift │ ├── CleanerTest.swift │ ├── CssTest.swift │ ├── DocumentTest.swift │ ├── DocumentTypeTest.swift │ ├── ElementTest.swift │ ├── ElementsTest.swift │ ├── EntitiesTest.swift │ ├── FormElementTest.swift │ ├── HtmlParserTest.swift │ ├── NodeTest.swift │ ├── NodeTraversorTest.swift │ ├── ParseSettingsTest.swift │ ├── QueryParserTest.swift │ ├── SelectorTest.swift │ ├── StringUtilTest.swift │ ├── TagTest.swift │ ├── TextNodeTest.swift │ ├── TextUtil.swift │ ├── TokenQueueTest.swift │ └── XmlTreeBuilderTest.swift ├── _config.yml └── swiftsoup.png /.gitattributes: -------------------------------------------------------------------------------- 1 | *.swift linguist-detectable=true 2 | *.js linguist-detectable=false 3 | *.html linguist-detectable=false 4 | *.xml linguist-detectable=false 5 | -------------------------------------------------------------------------------- /.github/FUNDING.yml: -------------------------------------------------------------------------------- 1 | # These are supported funding model platforms 2 | 3 | github: [scinfu]# Replace with up to 4 GitHub Sponsors-enabled usernames e.g., [user1, user2] 4 | patreon: # Replace with a single Patreon username 5 | open_collective: # Replace with a single Open Collective username 6 | ko_fi: # Replace with a single Ko-fi username 7 | tidelift: # Replace with a single Tidelift platform-name/package-name e.g., npm/babel 8 | community_bridge: # Replace with a single Community Bridge project-name e.g., cloud-foundry 9 | liberapay: # Replace with a single Liberapay username 10 | issuehunt: # Replace with a single IssueHunt username 11 | otechie: # Replace with a single Otechie username 12 | custom: # Replace with up to 4 custom sponsorship URLs e.g., ['link1', 'link2'] 13 | -------------------------------------------------------------------------------- /.github/workflows/swift.yml: -------------------------------------------------------------------------------- 1 | name: Swift 2 | 3 | 4 | 5 | on: 6 | push: 7 | branches: [ "master" ] 8 | pull_request: 9 | branches: [ "master" ] 10 | 11 | jobs: 12 | build: 13 | name: Swift ${{ matrix.swift }} on ${{ matrix.os }} 14 | strategy: 15 | matrix: 16 | os: [ubuntu-22.04, macos-latest] 17 | swift: ["5", "5.9"] 18 | 19 | runs-on: ${{ matrix.os }} 20 | 21 | steps: 22 | - uses: swift-actions/setup-swift@v2.2.0 23 | with: 24 | swift-version: ${{ matrix.swift }} 25 | - uses: actions/checkout@v4 26 | - name: Build 27 | run: swift build -v 28 | - name: Run tests 29 | run: swift test -v 30 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # OS X 2 | .DS_Store 3 | 4 | # Swift Package Manager 5 | .build 6 | 7 | # Xcode user settings 8 | xcuserdata/ 9 | -------------------------------------------------------------------------------- /.swiftpm/xcode/package.xcworkspace/contents.xcworkspacedata: -------------------------------------------------------------------------------- 1 | 2 | 4 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | # Travis CI 2 | 3 | # https://swift.org/download/#releases 4 | # whitelist (branches that should be built) 5 | branches: 6 | only: 7 | - master 8 | - develop 9 | 10 | # the matrix of builds should cover each combination of Swift version 11 | # and platform that is supported. The version of Swift used is specified 12 | # by .swift-version, unless SWIFT_SNAPSHOT is specified. 13 | matrix: 14 | include: 15 | # Continue to test one permutation on Trusty (14.04) 16 | - os: linux 17 | dist: xenial 18 | sudo: required 19 | services: docker 20 | env: DOCKER_IMAGE=swift:5.0.3-xenial SWIFT_SNAPSHOT=5.0.3 SWIFT_TEST_ARGS="--parallel" 21 | - os: linux 22 | dist: xenial 23 | sudo: required 24 | services: docker 25 | env: DOCKER_IMAGE=swift:5.1.5-xenial SWIFT_SNAPSHOT=5.1.5 SWIFT_TEST_ARGS="--parallel" 26 | - os: osx 27 | osx_image: xcode10.2 28 | sudo: required 29 | env: SWIFT_SNAPSHOT=5.0.1 SWIFT_TEST_ARGS="--parallel" 30 | - os: osx 31 | osx_image: xcode11.3 32 | sudo: required 33 | env: SWIFT_SNAPSHOT=5.1.3 SWIFT_TEST_ARGS="--parallel" 34 | - os: osx 35 | osx_image: xcode13.3 36 | sudo: required 37 | env: SWIFT_SNAPSHOT=$SWIFT_DEVELOPMENT_SNAPSHOT SWIFT_TEST_ARGS="--parallel" 38 | 39 | before_install: 40 | - git clone https://github.com/IBM-Swift/Package-Builder.git 41 | 42 | script: 43 | - ./Package-Builder/build-package.sh -projectDir $TRAVIS_BUILD_DIR 44 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Change Log 2 | 3 | All notable changes to this project will be documented in this file. 4 | 5 | ## [2.3.2](https://github.com/scinfu/SwiftSoup/tree/2.3.2) 6 | * Renamed Selector Class to CssSelector 7 | 8 | ## [1.7.4](https://github.com/scinfu/SwiftSoup/tree/1.7.4) 9 | * Removed Some warnings 10 | * Swift 4.2 11 | 12 | ## [1.7.1](https://github.com/scinfu/SwiftSoup/tree/1.7.1) 13 | * Backward compatibility for Swift < 4.1 14 | 15 | ## [1.7.0](https://github.com/scinfu/SwiftSoup/tree/1.7.0) 16 | * Removed StringBuilder from Element.cssSelector 17 | * Lint Code 18 | * Swift 4.1 19 | 20 | ## [1.6.5](https://github.com/scinfu/SwiftSoup/tree/1.6.5) 21 | * Removed StringBuilder from Element.cssSelector 22 | * Lint Code 23 | 24 | 25 | ## [1.6.4](https://github.com/scinfu/SwiftSoup/tree/1.6.4) 26 | * Add newer simulators to targeted devices to build with Carthage [tvOS] 27 | 28 | ## [1.6.3](https://github.com/scinfu/SwiftSoup/tree/1.6.3) 29 | 30 | * Add newer tvOS simulators to targeted devices to build with Carthage. 31 | * Add newer watchOS simulators to targeted devices to build with Carthage. 32 | -------------------------------------------------------------------------------- /Example/.gitignore: -------------------------------------------------------------------------------- 1 | 2 | # Created by https://www.gitignore.io/api/xcode 3 | 4 | ### Xcode ### 5 | # Xcode 6 | # 7 | # gitignore contributors: remember to update Global/Xcode.gitignore, Objective-C.gitignore & Swift.gitignore 8 | 9 | ## User settings 10 | xcuserdata/ 11 | 12 | ## compatibility with Xcode 8 and earlier (ignoring not required starting Xcode 9) 13 | *.xcscmblueprint 14 | *.xccheckout 15 | 16 | ## compatibility with Xcode 3 and earlier (ignoring not required starting Xcode 4) 17 | build/ 18 | DerivedData/ 19 | *.moved-aside 20 | *.pbxuser 21 | !default.pbxuser 22 | *.mode1v3 23 | !default.mode1v3 24 | *.mode2v3 25 | !default.mode2v3 26 | *.perspectivev3 27 | !default.perspectivev3 28 | 29 | ### Xcode Patch ### 30 | *.xcodeproj/* 31 | !*.xcodeproj/project.pbxproj 32 | !*.xcodeproj/xcshareddata/ 33 | !*.xcworkspace/contents.xcworkspacedata 34 | /*.gcno 35 | 36 | # End of https://www.gitignore.io/api/xcode 37 | Pods 38 | Podfile.lock -------------------------------------------------------------------------------- /Example/Example.xcodeproj/project.xcworkspace/contents.xcworkspacedata: -------------------------------------------------------------------------------- 1 | 2 | 4 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /Example/Example.xcworkspace/contents.xcworkspacedata: -------------------------------------------------------------------------------- 1 | 2 | 4 | 6 | 7 | 9 | 10 | 11 | -------------------------------------------------------------------------------- /Example/Example/AppDelegate.swift: -------------------------------------------------------------------------------- 1 | // 2 | // AppDelegate.swift 3 | // Example 4 | // 5 | // Created by Nabil on 05/10/17. 6 | // Copyright © 2017 Nabil. All rights reserved. 7 | // 8 | 9 | import UIKit 10 | 11 | @UIApplicationMain 12 | class AppDelegate: UIResponder, UIApplicationDelegate { 13 | 14 | var window: UIWindow? 15 | 16 | func application(_ application: UIApplication, didFinishLaunchingWithOptions launchOptions: [UIApplication.LaunchOptionsKey: Any]?) -> Bool { 17 | // Override point for customization after application launch. 18 | return true 19 | } 20 | 21 | func applicationWillResignActive(_ application: UIApplication) { 22 | // Sent when the application is about to move from active to inactive state. This can occur for certain types of temporary interruptions (such as an incoming phone call or SMS message) or when the user quits the application and it begins the transition to the background state. 23 | // Use this method to pause ongoing tasks, disable timers, and invalidate graphics rendering callbacks. Games should use this method to pause the game. 24 | } 25 | 26 | func applicationDidEnterBackground(_ application: UIApplication) { 27 | // Use this method to release shared resources, save user data, invalidate timers, and store enough application state information to restore your application to its current state in case it is terminated later. 28 | // If your application supports background execution, this method is called instead of applicationWillTerminate: when the user quits. 29 | } 30 | 31 | func applicationWillEnterForeground(_ application: UIApplication) { 32 | // Called as part of the transition from the background to the active state; here you can undo many of the changes made on entering the background. 33 | } 34 | 35 | func applicationDidBecomeActive(_ application: UIApplication) { 36 | // Restart any tasks that were paused (or not yet started) while the application was inactive. If the application was previously in the background, optionally refresh the user interface. 37 | } 38 | 39 | func applicationWillTerminate(_ application: UIApplication) { 40 | // Called when the application is about to terminate. Save data if appropriate. See also applicationDidEnterBackground:. 41 | } 42 | 43 | } 44 | -------------------------------------------------------------------------------- /Example/Example/Assets.xcassets/AppIcon.appiconset/Contents.json: -------------------------------------------------------------------------------- 1 | { 2 | "images" : [ 3 | { 4 | "idiom" : "iphone", 5 | "size" : "20x20", 6 | "scale" : "2x" 7 | }, 8 | { 9 | "idiom" : "iphone", 10 | "size" : "20x20", 11 | "scale" : "3x" 12 | }, 13 | { 14 | "idiom" : "iphone", 15 | "size" : "29x29", 16 | "scale" : "2x" 17 | }, 18 | { 19 | "idiom" : "iphone", 20 | "size" : "29x29", 21 | "scale" : "3x" 22 | }, 23 | { 24 | "idiom" : "iphone", 25 | "size" : "40x40", 26 | "scale" : "2x" 27 | }, 28 | { 29 | "idiom" : "iphone", 30 | "size" : "40x40", 31 | "scale" : "3x" 32 | }, 33 | { 34 | "idiom" : "iphone", 35 | "size" : "60x60", 36 | "scale" : "2x" 37 | }, 38 | { 39 | "idiom" : "iphone", 40 | "size" : "60x60", 41 | "scale" : "3x" 42 | }, 43 | { 44 | "idiom" : "ipad", 45 | "size" : "20x20", 46 | "scale" : "1x" 47 | }, 48 | { 49 | "idiom" : "ipad", 50 | "size" : "20x20", 51 | "scale" : "2x" 52 | }, 53 | { 54 | "idiom" : "ipad", 55 | "size" : "29x29", 56 | "scale" : "1x" 57 | }, 58 | { 59 | "idiom" : "ipad", 60 | "size" : "29x29", 61 | "scale" : "2x" 62 | }, 63 | { 64 | "idiom" : "ipad", 65 | "size" : "40x40", 66 | "scale" : "1x" 67 | }, 68 | { 69 | "idiom" : "ipad", 70 | "size" : "40x40", 71 | "scale" : "2x" 72 | }, 73 | { 74 | "idiom" : "ipad", 75 | "size" : "76x76", 76 | "scale" : "1x" 77 | }, 78 | { 79 | "idiom" : "ipad", 80 | "size" : "76x76", 81 | "scale" : "2x" 82 | }, 83 | { 84 | "idiom" : "ipad", 85 | "size" : "83.5x83.5", 86 | "scale" : "2x" 87 | }, 88 | { 89 | "idiom" : "ios-marketing", 90 | "size" : "1024x1024", 91 | "scale" : "1x" 92 | } 93 | ], 94 | "info" : { 95 | "version" : 1, 96 | "author" : "xcode" 97 | } 98 | } -------------------------------------------------------------------------------- /Example/Example/Base.lproj/LaunchScreen.storyboard: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | -------------------------------------------------------------------------------- /Example/Example/Info.plist: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | CFBundleDevelopmentRegion 6 | $(DEVELOPMENT_LANGUAGE) 7 | CFBundleExecutable 8 | $(EXECUTABLE_NAME) 9 | CFBundleIdentifier 10 | $(PRODUCT_BUNDLE_IDENTIFIER) 11 | CFBundleInfoDictionaryVersion 12 | 6.0 13 | CFBundleName 14 | $(PRODUCT_NAME) 15 | CFBundlePackageType 16 | APPL 17 | CFBundleShortVersionString 18 | 1.0 19 | CFBundleVersion 20 | 1 21 | LSRequiresIPhoneOS 22 | 23 | NSAppTransportSecurity 24 | 25 | NSAllowsArbitraryLoads 26 | 27 | 28 | UILaunchStoryboardName 29 | LaunchScreen 30 | UIMainStoryboardFile 31 | Main 32 | UIRequiredDeviceCapabilities 33 | 34 | armv7 35 | 36 | UISupportedInterfaceOrientations 37 | 38 | UIInterfaceOrientationPortrait 39 | UIInterfaceOrientationLandscapeLeft 40 | UIInterfaceOrientationLandscapeRight 41 | 42 | UISupportedInterfaceOrientations~ipad 43 | 44 | UIInterfaceOrientationPortrait 45 | UIInterfaceOrientationPortraitUpsideDown 46 | UIInterfaceOrientationLandscapeLeft 47 | UIInterfaceOrientationLandscapeRight 48 | 49 | 50 | 51 | -------------------------------------------------------------------------------- /Example/Example/QueryViewController.swift: -------------------------------------------------------------------------------- 1 | // 2 | // QueryViewController.swift 3 | // Example 4 | // 5 | // Created by Nabil on 02/03/18. 6 | // Copyright © 2018 Nabil. All rights reserved. 7 | // 8 | 9 | import UIKit 10 | 11 | class QueryViewControllerCell: UITableViewCell { 12 | @IBOutlet weak var selector: UILabel! 13 | @IBOutlet weak var example: UILabel! 14 | @IBOutlet weak var descriptionLabel: UILabel! 15 | 16 | } 17 | 18 | class QueryViewController: UIViewController { 19 | 20 | typealias Item = (selector: String, example: String, description: String) 21 | 22 | //example items 23 | let items: [ 24 | Item] = [ Item(selector: "*", example: "*", description: "any element"), 25 | Item(selector: "#id", example: "#pageFooter", description: "elements with attribute ID of \"pageFooter\""), 26 | Item(selector: ".class", example: ".login_form_label_field", description: "Selects all elements with class=\"login_form_label_field\""), 27 | Item(selector: "element", example: "p", description: "Selects all

elements"), 28 | Item(selector: "element", example: "div", description: "Selects all

elements"), 29 | Item(selector: "element,element", example: "div, p", description: "Selects all
elements and all

elements"), 30 | Item(selector: "element element", example: "div p", description: "Selects all

elements inside

elements"), 31 | Item(selector: "element>element", example: "div > p", description: "Selects all

elements where the parent is a

element"), 32 | Item(selector: "[attribute]", example: "[title]", description: "Selects all elements with a \"title\" attribute"), 33 | Item(selector: "[^attrPrefix]", example: "[^cell]", description: "elements with an attribute name starting with \"cell\". Use to find elements with HTML5 datasets"), 34 | Item(selector: "[attribute=value]", example: "[id=pageTitle]", description: "Selects all elements with id=\"pageTitle\""), 35 | Item(selector: "[attribute^=value]", example: "a[href^=https]", description: "Selects every element whose href attribute value begins with \"https\""), 36 | Item(selector: "[attribute$=value]", example: "a[href$=.com/]", description: "Selects every element whose href attribute value ends with \".com/\""), 37 | Item(selector: "[attribute*=value]", example: "a[href*=login]", description: "Selects every element whose href attribute value contains the substring \"login\""), 38 | Item(selector: "[attr~=regex]", example: "img[src~=[gif]]", description: "elements with an attribute named \"img\", and value matching the regular expression") 39 | ] 40 | 41 | var completionHandler: (Item) -> Void = { arg in } 42 | @IBOutlet weak var tableView: UITableView! 43 | 44 | override func viewDidLoad() { 45 | super.viewDidLoad() 46 | self.title = "" 47 | 48 | self.tableView.rowHeight = UITableView.automaticDimension 49 | self.tableView.estimatedRowHeight = UITableView.automaticDimension 50 | } 51 | 52 | } 53 | 54 | extension QueryViewController: UITableViewDataSource { 55 | func numberOfSections(in tableView: UITableView) -> Int { 56 | return 1 57 | } 58 | 59 | public func tableView(_ tableView: UITableView, numberOfRowsInSection section: Int) -> Int { 60 | return items.count 61 | } 62 | 63 | public func tableView(_ tableView: UITableView, cellForRowAt indexPath: IndexPath) -> UITableViewCell { 64 | let cell = tableView.dequeueReusableCell(withIdentifier: "QueryViewControllerCell", for: indexPath) as! QueryViewControllerCell 65 | 66 | cell.selector.text = items[indexPath.row].selector 67 | cell.example.text = items[indexPath.row].example 68 | cell.descriptionLabel.text = items[indexPath.row].description 69 | 70 | let color1 = UIColor.init(red: 245.0/255, green: 245.0/255, blue: 245.0/255, alpha: 1) 71 | let color2 = UIColor.init(red: 240.0/255, green: 240.0/255, blue: 240.0/255, alpha: 1) 72 | cell.backgroundColor = (indexPath.row % 2) == 0 ? color1 : color2 73 | 74 | return cell 75 | } 76 | } 77 | 78 | extension QueryViewController: UITableViewDelegate { 79 | func tableView(_ tableView: UITableView, didSelectRowAt indexPath: IndexPath) { 80 | // user select an item 81 | completionHandler(items[indexPath.row]) 82 | } 83 | } 84 | -------------------------------------------------------------------------------- /Example/Example/ViewController.swift: -------------------------------------------------------------------------------- 1 | // 2 | // ViewController.swift 3 | // Example 4 | // 5 | // Created by Nabil on 05/10/17. 6 | // Copyright © 2017 Nabil. All rights reserved. 7 | // 8 | 9 | import UIKit 10 | import SwiftSoup 11 | 12 | class ViewController: UIViewController { 13 | 14 | typealias Item = (text: String, html: String) 15 | 16 | @IBOutlet weak var tableView: UITableView! 17 | @IBOutlet var urlTextField: UITextField! 18 | @IBOutlet var cssTextField: UITextField! 19 | 20 | // current document 21 | var document: Document = Document.init("") 22 | // item founds 23 | var items: [Item] = [] 24 | 25 | override func viewDidLoad() { 26 | super.viewDidLoad() 27 | 28 | self.title = "SwiftSoup Example" 29 | 30 | self.tableView.rowHeight = UITableView.automaticDimension 31 | self.tableView.estimatedRowHeight = UITableView.automaticDimension 32 | 33 | urlTextField.text = "http://www.facebook.com" 34 | cssTextField.text = "div" 35 | 36 | // start first request 37 | downloadHTML() 38 | } 39 | 40 | //Download HTML 41 | func downloadHTML() { 42 | // url string to URL 43 | guard let url = URL(string: urlTextField.text ?? "") else { 44 | // an error occurred 45 | UIAlertController.showAlert("Error: \(urlTextField.text ?? "") doesn't seem to be a valid URL", self) 46 | return 47 | } 48 | 49 | do { 50 | // content of url 51 | let html = try String.init(contentsOf: url) 52 | // parse it into a Document 53 | document = try SwiftSoup.parse(html) 54 | // parse css query 55 | parse() 56 | } catch let error { 57 | // an error occurred 58 | UIAlertController.showAlert("Error: \(error)", self) 59 | } 60 | 61 | } 62 | 63 | //Parse CSS selector 64 | func parse() { 65 | do { 66 | //empty old items 67 | items = [] 68 | // firn css selector 69 | let elements: Elements = try document.select(cssTextField.text ?? "") 70 | //transform it into a local object (Item) 71 | for element in elements { 72 | let text = try element.text() 73 | let html = try element.outerHtml() 74 | items.append(Item(text: text, html: html)) 75 | } 76 | 77 | } catch let error { 78 | UIAlertController.showAlert("Error: \(error)", self) 79 | } 80 | 81 | tableView.reloadData() 82 | } 83 | 84 | @IBAction func chooseQuery(_ sender: Any) { 85 | guard let viewController = storyboard?.instantiateViewController( 86 | withIdentifier: "QueryViewController") as? QueryViewController else { 87 | return 88 | } 89 | viewController.completionHandler = {[weak self](resilt) in 90 | self?.navigationController?.popViewController(animated: true) 91 | self?.cssTextField.text = resilt.example 92 | self?.parse() 93 | } 94 | self.show(viewController, sender: self) 95 | } 96 | 97 | } 98 | 99 | extension ViewController: UITableViewDataSource { 100 | func numberOfSections(in tableView: UITableView) -> Int { 101 | return 1 102 | } 103 | 104 | public func tableView(_ tableView: UITableView, numberOfRowsInSection section: Int) -> Int { 105 | return items.count 106 | } 107 | 108 | public func tableView(_ tableView: UITableView, cellForRowAt indexPath: IndexPath) -> UITableViewCell { 109 | var cell = tableView.dequeueReusableCell(withIdentifier: "cell") 110 | if cell == nil { 111 | cell = UITableViewCell.init(style: UITableViewCell.CellStyle.subtitle, reuseIdentifier: "cell") 112 | cell?.textLabel?.numberOfLines = 2 113 | cell?.detailTextLabel?.numberOfLines = 6 114 | 115 | cell?.textLabel?.textColor = UIColor.init(red: 1.0/255, green: 174.0/255, blue: 66.0/255, alpha: 1) 116 | cell?.detailTextLabel?.textColor = UIColor.init(red: 55.0/255, green: 67.0/255, blue: 55.0/255, alpha: 1) 117 | 118 | cell?.backgroundColor = UIColor.init(red: 245.0/255, green: 245.0/255, blue: 245.0/255, alpha: 1) 119 | } 120 | 121 | cell?.textLabel?.text = items[indexPath.row].text 122 | cell?.detailTextLabel?.text = items[indexPath.row].html 123 | 124 | let color1 = UIColor.init(red: 245.0/255, green: 245.0/255, blue: 245.0/255, alpha: 1) 125 | let color2 = UIColor.init(red: 240.0/255, green: 240.0/255, blue: 240.0/255, alpha: 1) 126 | cell?.backgroundColor = (indexPath.row % 2) == 0 ? color1 : color2 127 | 128 | return cell! 129 | } 130 | } 131 | 132 | extension ViewController: UITableViewDelegate { 133 | } 134 | 135 | extension ViewController: UITextFieldDelegate { 136 | public func textFieldShouldReturn(_ textField: UITextField) -> Bool { 137 | textField.resignFirstResponder() 138 | return false 139 | } 140 | 141 | public func textFieldDidEndEditing(_ textField: UITextField) { 142 | 143 | if textField == urlTextField { 144 | downloadHTML() 145 | } 146 | 147 | if textField == cssTextField { 148 | parse() 149 | } 150 | } 151 | } 152 | 153 | extension UIAlertController { 154 | static public func showAlert(_ message: String, _ controller: UIViewController) { 155 | let alert = UIAlertController(title: "Alert", message: message, preferredStyle: UIAlertController.Style.alert) 156 | alert.addAction(UIAlertAction(title: "OK", style: UIAlertAction.Style.default, handler: nil)) 157 | controller.present(alert, animated: true, completion: nil) 158 | } 159 | } 160 | -------------------------------------------------------------------------------- /Example/Podfile: -------------------------------------------------------------------------------- 1 | use_frameworks! 2 | 3 | target 'Example' do 4 | #pod 'SwiftSoup' 5 | pod 'SwiftSoup', :path => '../' 6 | end 7 | -------------------------------------------------------------------------------- /Example/img1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scinfu/SwiftSoup/aa85ee96017a730031bafe411cde24a08a17a9c9/Example/img1.png -------------------------------------------------------------------------------- /Example/img2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scinfu/SwiftSoup/aa85ee96017a730031bafe411cde24a08a17a9c9/Example/img2.png -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License 2 | 3 | Copyright (c) 2009-2025 Jonathan Hedley 4 | Swift port copyright (c) 2016-2025 Nabil Chatbi 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in all 14 | copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | SOFTWARE. 23 | -------------------------------------------------------------------------------- /Package.swift: -------------------------------------------------------------------------------- 1 | // swift-tools-version:5.9 2 | 3 | import PackageDescription 4 | 5 | let package = Package( 6 | name: "SwiftSoup", 7 | platforms: [.macOS(.v10_15), .iOS(.v13), .watchOS(.v6)], 8 | products: [ 9 | .library(name: "SwiftSoup", targets: ["SwiftSoup"]) 10 | ], 11 | targets: [ 12 | .target( 13 | name: "SwiftSoup", 14 | path: "Sources"), 15 | .testTarget( 16 | name: "SwiftSoupTests", 17 | dependencies: ["SwiftSoup"]) 18 | ] 19 | ) 20 | -------------------------------------------------------------------------------- /Resources/Info.plist: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | CFBundleDevelopmentRegion 6 | en 7 | CFBundleExecutable 8 | $(EXECUTABLE_NAME) 9 | CFBundleIdentifier 10 | $(PRODUCT_BUNDLE_IDENTIFIER) 11 | CFBundleInfoDictionaryVersion 12 | 6.0 13 | CFBundleName 14 | $(PRODUCT_NAME) 15 | CFBundlePackageType 16 | FMWK 17 | CFBundleShortVersionString 18 | 1.1.1 19 | CFBundleVersion 20 | $(CURRENT_PROJECT_VERSION) 21 | NSHumanReadableCopyright 22 | Copyright © 2016 Nabil Chatbi (scinfu). All rights reserved 23 | NSPrincipalClass 24 | 25 | 26 | 27 | -------------------------------------------------------------------------------- /Resources/InfoMac.plist: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | CFBundleDevelopmentRegion 6 | en 7 | CFBundleExecutable 8 | $(EXECUTABLE_NAME) 9 | CFBundleIdentifier 10 | $(PRODUCT_BUNDLE_IDENTIFIER) 11 | CFBundleInfoDictionaryVersion 12 | 6.0 13 | CFBundleName 14 | $(PRODUCT_NAME) 15 | CFBundlePackageType 16 | FMWK 17 | CFBundleShortVersionString 18 | 1.5.8 19 | CFBundleVersion 20 | $(CURRENT_PROJECT_VERSION) 21 | NSHumanReadableCopyright 22 | Copyright © 2016 Nabil Chatbi (scinfu). All rights reserved 23 | NSPrincipalClass 24 | 25 | 26 | 27 | -------------------------------------------------------------------------------- /Resources/InfoWatchOS.plist: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | CFBundleDevelopmentRegion 6 | en 7 | CFBundleExecutable 8 | $(EXECUTABLE_NAME) 9 | CFBundleIdentifier 10 | $(PRODUCT_BUNDLE_IDENTIFIER) 11 | CFBundleInfoDictionaryVersion 12 | 6.0 13 | CFBundleName 14 | $(PRODUCT_NAME) 15 | CFBundlePackageType 16 | FMWK 17 | CFBundleShortVersionString 18 | 1.1.1 19 | CFBundleVersion 20 | $(CURRENT_PROJECT_VERSION) 21 | NSHumanReadableCopyright 22 | Copyright © 2016 Nabil Chatbi (scinfu). All rights reserved 23 | NSPrincipalClass 24 | 25 | 26 | 27 | -------------------------------------------------------------------------------- /Resources/InfotvOS.plist: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | CFBundleDevelopmentRegion 6 | en 7 | CFBundleExecutable 8 | $(EXECUTABLE_NAME) 9 | CFBundleIdentifier 10 | $(PRODUCT_BUNDLE_IDENTIFIER) 11 | CFBundleInfoDictionaryVersion 12 | 6.0 13 | CFBundleName 14 | $(PRODUCT_NAME) 15 | CFBundlePackageType 16 | FMWK 17 | CFBundleShortVersionString 18 | 1.1.1 19 | CFBundleVersion 20 | $(CURRENT_PROJECT_VERSION) 21 | NSHumanReadableCopyright 22 | Copyright © 2016 Nabil Chatbi (scinfu). All rights reserved 23 | NSPrincipalClass 24 | 25 | 26 | 27 | -------------------------------------------------------------------------------- /Sources/ArrayExt.swift: -------------------------------------------------------------------------------- 1 | // 2 | // ArrayExt.swift 3 | // SwifSoup 4 | // 5 | // Created by Nabil Chatbi on 05/10/16. 6 | // 7 | 8 | import Foundation 9 | 10 | extension Array where Element: Equatable { 11 | func lastIndexOf(_ e: Element) -> Int { 12 | for pos in (0.. String { 41 | return String(decoding: getKeyUTF8(), as: UTF8.self) 42 | } 43 | 44 | open func getKeyUTF8() -> [UInt8] { 45 | return key 46 | } 47 | 48 | /** 49 | Set the attribute key; case is preserved. 50 | @param key the new key; must not be null 51 | */ 52 | open func setKey(key: [UInt8]) throws { 53 | try Validate.notEmpty(string: key) 54 | self.key = key.trim() 55 | } 56 | 57 | open func setKey(key: String) throws { 58 | try setKey(key: key.utf8Array) 59 | } 60 | 61 | /** 62 | Get the attribute value. 63 | @return the attribute value 64 | */ 65 | open func getValue() -> String { 66 | return String(decoding: getValueUTF8(), as: UTF8.self) 67 | } 68 | 69 | open func getValueUTF8() -> [UInt8] { 70 | return value 71 | } 72 | 73 | /** 74 | Set the attribute value. 75 | @param value the new attribute value; must not be null 76 | */ 77 | @discardableResult 78 | open func setValue(value: [UInt8]) -> [UInt8] { 79 | let old = self.value 80 | self.value = value 81 | return old 82 | } 83 | 84 | /** 85 | Get the HTML representation of this attribute; e.g. {@code href="index.html"}. 86 | @return HTML 87 | */ 88 | public func html() -> String { 89 | let accum = StringBuilder() 90 | html(accum: accum, out: (Document([])).outputSettings()) 91 | return accum.toString() 92 | } 93 | 94 | @inlinable 95 | public func html(accum: StringBuilder, out: OutputSettings) { 96 | accum.append(key) 97 | if (!shouldCollapseAttribute(out: out)) { 98 | accum.append(UTF8Arrays.attributeEqualsQuoteMark) 99 | Entities.escape(&accum.buffer, value, out, true, false, false) 100 | accum.append(UTF8Arrays.quoteMark) 101 | } 102 | } 103 | 104 | /** 105 | Get the string representation of this attribute, implemented as {@link #html()}. 106 | @return string 107 | */ 108 | open func toString() -> String { 109 | return html() 110 | } 111 | 112 | /** 113 | * Create a new Attribute from an unencoded key and a HTML attribute encoded value. 114 | * @param unencodedKey assumes the key is not encoded, as can be only run of simple \w chars. 115 | * @param encodedValue HTML attribute encoded value 116 | * @return attribute 117 | */ 118 | public static func createFromEncoded(unencodedKey: [UInt8], encodedValue: [UInt8]) throws -> Attribute { 119 | let value = try Entities.unescape(string: encodedValue, strict: true) 120 | return try Attribute(key: unencodedKey, value: value) 121 | } 122 | 123 | public func isDataAttribute() -> Bool { 124 | return key.starts(with: Attributes.dataPrefix) && key.count > Attributes.dataPrefix.count 125 | } 126 | 127 | /** 128 | * Collapsible if it's a boolean attribute and value is empty or same as name 129 | * 130 | * @param out Outputsettings 131 | * @return Returns whether collapsible or not 132 | */ 133 | public final func shouldCollapseAttribute(out: OutputSettings) -> Bool { 134 | return (value.isEmpty || value.equalsIgnoreCase(string: key)) 135 | && out.syntax() == OutputSettings.Syntax.html 136 | && isBooleanAttribute() 137 | } 138 | 139 | public func isBooleanAttribute() -> Bool { 140 | return Attribute.booleanAttributes.contains(key.lowercased()[...]) 141 | } 142 | 143 | public func hashCode() -> Int { 144 | var result = key.hashValue 145 | result = 31 * result + value.hashValue 146 | return result 147 | } 148 | 149 | public func clone() -> Attribute { 150 | do { 151 | return try Attribute(key: key, value: value) 152 | } catch Exception.Error( _, let msg) { 153 | print(msg) 154 | } catch { 155 | 156 | } 157 | return try! Attribute(key: [], value: []) 158 | } 159 | } 160 | 161 | extension Attribute: Equatable { 162 | static public func == (lhs: Attribute, rhs: Attribute) -> Bool { 163 | return lhs.value == rhs.value && lhs.key == rhs.key 164 | } 165 | 166 | } 167 | -------------------------------------------------------------------------------- /Sources/BinarySearch.swift: -------------------------------------------------------------------------------- 1 | // 2 | // BinarySearch.swift 3 | // SwiftSoup-iOS 4 | // 5 | // Created by Garth Snyder on 2/28/19. 6 | // 7 | // Adapted from https://stackoverflow.com/questions/31904396/swift-binary-search-for-standard-array 8 | // 9 | 10 | import Foundation 11 | 12 | extension Collection { 13 | 14 | /// Generalized binary search algorithm for ordered Collections 15 | /// 16 | /// Behavior is undefined if the collection is not properly sorted. 17 | /// 18 | /// This is only O(logN) for RandomAccessCollections; Collections in 19 | /// general may implement offsetting of indexes as an O(K) operation. (E.g., 20 | /// Strings are like this). 21 | /// 22 | /// - Note: If you are using this for searching only (not insertion), you 23 | /// must always test the element at the returned index to ensure that 24 | /// it's a genuine match. If the element is not present in the array, 25 | /// you will still get a valid index back that represents the location 26 | /// where it should be inserted. Also check to be sure the returned 27 | /// index isn't off the end of the collection. 28 | /// 29 | /// - Parameter predicate: Reports the ordering of a given Element relative 30 | /// to the desired Element. Typically, this is <. 31 | /// 32 | /// - Returns: Index N such that the predicate is true for all elements up to 33 | /// but not including N, and is false for all elements N and beyond 34 | 35 | func binarySearch(predicate: (Element) -> Bool) -> Index { 36 | var low = startIndex 37 | var high = endIndex 38 | while low != high { 39 | let mid = index(low, offsetBy: distance(from: low, to: high)/2) 40 | if predicate(self[mid]) { 41 | low = index(after: mid) 42 | } else { 43 | high = mid 44 | } 45 | } 46 | return low 47 | } 48 | 49 | /// Binary search lookup for ordered Collections using a KeyPath 50 | /// relative to Element. 51 | /// 52 | /// Behavior is undefined if the collection is not properly sorted. 53 | /// 54 | /// This is only O(logN) for RandomAccessCollections; Collections in 55 | /// general may implement offsetting of indexes as an O(K) operation. (E.g., 56 | /// Strings are like this). 57 | /// 58 | /// - Note: If you are using this for searching only (not insertion), you 59 | /// must always test the element at the returned index to ensure that 60 | /// it's a genuine match. If the element is not present in the array, 61 | /// you will still get a valid index back that represents the location 62 | /// where it should be inserted. Also check to be sure the returned 63 | /// index isn't off the end of the collection. 64 | /// 65 | /// - Parameter keyPath: KeyPath that extracts the Element value on which 66 | /// the Collection is presorted. Must be Comparable and Equatable. 67 | /// ordering is presumed to be <, however that is defined for the type. 68 | /// 69 | /// - Returns: The index of a matching element, or nil if not found. If 70 | /// the return value is non-nil, it is always a valid index. 71 | 72 | func indexOfElement(withValue value: T, atKeyPath keyPath: KeyPath) -> Index? where T: Comparable & Equatable { 73 | let ix = binarySearch { $0[keyPath: keyPath] < value } 74 | guard ix < endIndex else { return nil } 75 | guard self[ix][keyPath: keyPath] == value else { return nil } 76 | return ix 77 | } 78 | 79 | func element(withValue value: T, atKeyPath keyPath: KeyPath) -> Element? where T: Comparable & Equatable { 80 | if let ix = indexOfElement(withValue: value, atKeyPath: keyPath) { 81 | return self[ix] 82 | } 83 | return nil 84 | } 85 | 86 | func elements(withValue value: T, atKeyPath keyPath: KeyPath) -> [Element] where T: Comparable & Equatable { 87 | guard let start = indexOfElement(withValue: value, atKeyPath: keyPath) else { return [] } 88 | var end = index(after: start) 89 | while end < endIndex && self[end][keyPath: keyPath] == value { 90 | end = index(after: end) 91 | } 92 | return Array(self[start.. Bool { 24 | return true 25 | } 26 | } 27 | -------------------------------------------------------------------------------- /Sources/CharacterExt.swift: -------------------------------------------------------------------------------- 1 | // 2 | // CharacterExt.swift 3 | // SwifSoup 4 | // 5 | // Created by Nabil Chatbi on 08/10/16. 6 | // 7 | 8 | import Foundation 9 | 10 | extension Character { 11 | 12 | public static let space: Character = " " 13 | public static let BackslashT: Character = "\t" 14 | public static let BackslashN: Character = "\n" 15 | public static let BackslashF: Character = Character(UnicodeScalar(12)) 16 | public static let BackslashR: Character = "\r" 17 | public static let BackshashRBackslashN: Character = "\r\n" 18 | 19 | //http://www.unicode.org/glossary/#supplementary_code_point 20 | public static let MIN_SUPPLEMENTARY_CODE_POINT: UInt32 = 0x010000 21 | 22 | /// True for any space character, and the control characters \t, \n, \r, \f, \v. 23 | 24 | var isWhitespace: Bool { 25 | switch self { 26 | case Character.space, Character.BackslashT, Character.BackslashN, Character.BackslashF, Character.BackslashR: return true 27 | case Character.BackshashRBackslashN: return true 28 | default: return false 29 | 30 | } 31 | } 32 | 33 | /// `true` if `self` normalized contains a single code unit that is in the category of Decimal Numbers. 34 | var isDigit: Bool { 35 | 36 | return isMemberOfCharacterSet(CharacterSet.decimalDigits) 37 | 38 | } 39 | 40 | /// Lowercase `self`. 41 | var lowercase: Character { 42 | 43 | let str = String(self).lowercased() 44 | return str[str.startIndex] 45 | 46 | } 47 | 48 | /// Return `true` if `self` normalized contains a single code unit that is a member of the supplied character set. 49 | /// 50 | /// - parameter set: The `NSCharacterSet` used to test for membership. 51 | /// - returns: `true` if `self` normalized contains a single code unit that is a member of the supplied character set. 52 | func isMemberOfCharacterSet(_ set: CharacterSet) -> Bool { 53 | 54 | let normalized = String(self).precomposedStringWithCanonicalMapping 55 | let unicodes = normalized.unicodeScalars 56 | 57 | guard unicodes.count == 1 else { return false } 58 | return set.contains(UnicodeScalar(unicodes.first!.value)!) 59 | 60 | } 61 | 62 | static func convertFromIntegerLiteral(value: IntegerLiteralType) -> Character { 63 | return Character(UnicodeScalar(value)!) 64 | } 65 | 66 | static func isLetter(_ char: Character) -> Bool { 67 | return char.isLetter() 68 | } 69 | func isLetter() -> Bool { 70 | return self.isMemberOfCharacterSet(CharacterSet.letters) 71 | } 72 | 73 | static func isLetterOrDigit(_ char: Character) -> Bool { 74 | return char.isLetterOrDigit() 75 | } 76 | func isLetterOrDigit() -> Bool { 77 | if(self.isLetter()) {return true} 78 | return self.isDigit 79 | } 80 | } 81 | -------------------------------------------------------------------------------- /Sources/Cleaner.swift: -------------------------------------------------------------------------------- 1 | // 2 | // Cleaner.swift 3 | // SwiftSoup 4 | // 5 | // Created by Nabil Chatbi on 15/10/16. 6 | // 7 | 8 | import Foundation 9 | 10 | open class Cleaner { 11 | fileprivate let headWhitelist: Whitelist? 12 | fileprivate let bodyWhitelist: Whitelist 13 | 14 | /// Create a new cleaner, that sanitizes documents' `` and `` using the supplied whitelist. 15 | /// - Parameters: 16 | /// - headWhitelist: Whitelist to clean the head with 17 | /// - bodyWhitelist: Whitelist to clean the body with 18 | public init(headWhitelist: Whitelist?, bodyWhitelist: Whitelist) { 19 | self.headWhitelist = headWhitelist 20 | self.bodyWhitelist = bodyWhitelist 21 | } 22 | 23 | /// Create a new cleaner, that sanitizes documents' `` using the supplied whitelist. 24 | /// - Parameter whitelist: Whitelist to clean the body with 25 | convenience init(_ whitelist: Whitelist) { 26 | self.init(headWhitelist: nil, bodyWhitelist: whitelist) 27 | } 28 | 29 | /// Creates a new, clean document, from the original dirty document, containing only elements allowed by the whitelist. 30 | /// The original document is not modified. Only elements from the dirt document's `` are used. 31 | /// - Parameter dirtyDocument: Untrusted base document to clean. 32 | /// - Returns: A cleaned document. 33 | public func clean(_ dirtyDocument: Document) throws -> Document { 34 | let clean = Document.createShell(dirtyDocument.getBaseUri()) 35 | if let headWhitelist, let dirtHead = dirtyDocument.head(), let cleanHead = clean.head() { // frameset documents won't have a head. the clean doc will have empty head. 36 | try copySafeNodes(dirtHead, cleanHead, whitelist: headWhitelist) 37 | } 38 | if let dirtBody = dirtyDocument.body(), let cleanBody = clean.body() { // frameset documents won't have a body. the clean doc will have empty body. 39 | try copySafeNodes(dirtBody, cleanBody, whitelist: bodyWhitelist) 40 | } 41 | return clean 42 | } 43 | 44 | /// Determines if the input document is valid, against the whitelist. It is considered valid if all the tags and attributes 45 | /// in the input HTML are allowed by the whitelist. 46 | /// 47 | /// This method can be used as a validator for user input forms. An invalid document will still be cleaned successfully 48 | /// using the ``clean(_:)`` document. If using as a validator, it is recommended to still clean the document 49 | /// to ensure enforced attributes are set correctly, and that the output is tidied. 50 | /// - Parameter dirtyDocument: document to test 51 | /// - Returns: true if no tags or attributes need to be removed; false if they do 52 | public func isValid(_ dirtyDocument: Document) throws -> Bool { 53 | let clean = Document.createShell(dirtyDocument.getBaseUri()) 54 | let numDiscarded = try copySafeNodes(dirtyDocument.body()!, clean.body()!, whitelist: bodyWhitelist) 55 | return numDiscarded == 0 56 | } 57 | 58 | @discardableResult 59 | fileprivate func copySafeNodes(_ source: Element, _ dest: Element, whitelist: Whitelist) throws -> Int { 60 | let cleaningVisitor = Cleaner.CleaningVisitor(source, dest, whitelist) 61 | try NodeTraversor(cleaningVisitor).traverse(source) 62 | return cleaningVisitor.numDiscarded 63 | } 64 | } 65 | 66 | extension Cleaner { 67 | fileprivate final class CleaningVisitor: NodeVisitor { 68 | private(set) var numDiscarded = 0 69 | 70 | private let root: Element 71 | private var destination: Element? // current element to append nodes to 72 | 73 | private let whitelist: Whitelist 74 | 75 | public init(_ root: Element, _ destination: Element, _ whitelist: Whitelist) { 76 | self.root = root 77 | self.destination = destination 78 | self.whitelist = whitelist 79 | } 80 | 81 | public func head(_ source: Node, _ depth: Int) throws { 82 | if let sourceEl = source as? Element { 83 | if whitelist.isSafeTag(sourceEl.tagNameUTF8()) { // safe, clone and copy safe attrs 84 | let meta = try createSafeElement(sourceEl) 85 | let destChild = meta.el 86 | try destination?.appendChild(destChild) 87 | 88 | numDiscarded += meta.numAttribsDiscarded 89 | destination = destChild 90 | } else if source != root { // not a safe tag, so don't add. don't count root against discarded. 91 | numDiscarded += 1 92 | } 93 | } else if let sourceText = source as? TextNode { 94 | let destText = TextNode(sourceText.getWholeTextUTF8(), source.getBaseUriUTF8()) 95 | try destination?.appendChild(destText) 96 | } else if let sourceData = source as? DataNode { 97 | if sourceData.parent() != nil && whitelist.isSafeTag(sourceData.parent()!.nodeNameUTF8()) { 98 | let destData = DataNode(sourceData.getWholeDataUTF8(), source.getBaseUriUTF8()) 99 | try destination?.appendChild(destData) 100 | } else { 101 | numDiscarded += 1 102 | } 103 | } else { // else, we don't care about comments, xml proc instructions, etc 104 | numDiscarded += 1 105 | } 106 | } 107 | 108 | public func tail(_ source: Node, _ depth: Int) throws { 109 | if let x = source as? Element { 110 | if whitelist.isSafeTag(x.nodeNameUTF8()) { 111 | // would have descended, so pop destination stack 112 | destination = destination?.parent() 113 | } 114 | } 115 | } 116 | 117 | private func createSafeElement(_ sourceEl: Element) throws -> ElementMeta { 118 | let sourceTag = sourceEl.tagName() 119 | let destAttrs = Attributes() 120 | var numDiscarded = 0 121 | 122 | if let sourceAttrs = sourceEl.getAttributes() { 123 | for sourceAttr in sourceAttrs { 124 | if try whitelist.isSafeAttribute(sourceTag, sourceEl, sourceAttr) { 125 | destAttrs.put(attribute: sourceAttr) 126 | } else { 127 | numDiscarded += 1 128 | } 129 | } 130 | } 131 | let enforcedAttrs = try whitelist.getEnforcedAttributes(sourceTag) 132 | destAttrs.addAll(incoming: enforcedAttrs) 133 | 134 | let dest = try Element(Tag.valueOf(sourceTag.utf8Array), sourceEl.getBaseUriUTF8(), destAttrs) 135 | return ElementMeta(dest, numDiscarded) 136 | } 137 | } 138 | } 139 | 140 | extension Cleaner { 141 | fileprivate struct ElementMeta { 142 | let el: Element 143 | let numAttribsDiscarded: Int 144 | 145 | init(_ el: Element, _ numAttribsDiscarded: Int) { 146 | self.el = el 147 | self.numAttribsDiscarded = numAttribsDiscarded 148 | } 149 | } 150 | } 151 | -------------------------------------------------------------------------------- /Sources/Collector.swift: -------------------------------------------------------------------------------- 1 | // 2 | // Collector.swift 3 | // SwiftSoup 4 | // 5 | // Created by Nabil Chatbi on 22/10/16. 6 | // 7 | 8 | import Foundation 9 | 10 | /** 11 | * Collects a list of elements that match the supplied criteria. 12 | * 13 | */ 14 | open class Collector { 15 | 16 | private init() { 17 | } 18 | 19 | /** 20 | Build a list of elements, by visiting root and every descendant of root, and testing it against the evaluator. 21 | @param eval Evaluator to test elements against 22 | @param root root of tree to descend 23 | @return list of matches; empty if none 24 | */ 25 | public static func collect (_ eval: Evaluator, _ root: Element) throws -> Elements { 26 | let elements: Elements = Elements() 27 | try NodeTraversor(Accumulator(root, elements, eval)).traverse(root) 28 | return elements 29 | } 30 | 31 | } 32 | 33 | private final class Accumulator: NodeVisitor { 34 | private let root: Element 35 | private let elements: Elements 36 | private let eval: Evaluator 37 | 38 | init(_ root: Element, _ elements: Elements, _ eval: Evaluator) { 39 | self.root = root 40 | self.elements = elements 41 | self.eval = eval 42 | } 43 | 44 | @inlinable 45 | public func head(_ node: Node, _ depth: Int) { 46 | guard let el = node as? Element else { 47 | return 48 | } 49 | do { 50 | if try eval.matches(root, el) { 51 | elements.add(el) 52 | } 53 | } catch {} 54 | } 55 | 56 | public func tail(_ node: Node, _ depth: Int) { 57 | // void 58 | } 59 | } 60 | -------------------------------------------------------------------------------- /Sources/CombiningEvaluator.swift: -------------------------------------------------------------------------------- 1 | // 2 | // CombiningEvaluator.swift 3 | // SwiftSoup 4 | // 5 | // Created by Nabil Chatbi on 23/10/16. 6 | // 7 | 8 | import Foundation 9 | 10 | /** 11 | * Base combining (and, or) evaluator. 12 | */ 13 | public class CombiningEvaluator: Evaluator { 14 | 15 | public private(set) var evaluators: Array 16 | var num: Int = 0 17 | 18 | public override init() { 19 | evaluators = Array() 20 | super.init() 21 | } 22 | 23 | public init(_ evaluators: Array) { 24 | self.evaluators = evaluators 25 | super.init() 26 | updateNumEvaluators() 27 | } 28 | 29 | public init(_ evaluators: Evaluator...) { 30 | self.evaluators = evaluators 31 | super.init() 32 | updateNumEvaluators() 33 | } 34 | 35 | func rightMostEvaluator() -> Evaluator? { 36 | return num > 0 && evaluators.count > 0 ? evaluators[num - 1] : nil 37 | } 38 | 39 | func replaceRightMostEvaluator(_ replacement: Evaluator) { 40 | evaluators[num - 1] = replacement 41 | } 42 | 43 | func updateNumEvaluators() { 44 | // used so we don't need to bash on size() for every match test 45 | num = evaluators.count 46 | } 47 | 48 | public final class And: CombiningEvaluator { 49 | public override init(_ evaluators: [Evaluator]) { 50 | super.init(evaluators) 51 | } 52 | 53 | public override init(_ evaluators: Evaluator...) { 54 | super.init(evaluators) 55 | } 56 | 57 | public override func matches(_ root: Element, _ node: Element) -> Bool { 58 | for index in 0.. String { 71 | let array: [String] = evaluators.map { String($0.toString()) } 72 | return StringUtil.join(array, sep: " ") 73 | } 74 | } 75 | 76 | public final class Or: CombiningEvaluator { 77 | /** 78 | * Create a new Or evaluator. The initial evaluators are ANDed together and used as the first clause of the OR. 79 | * @param evaluators initial OR clause (these are wrapped into an AND evaluator). 80 | */ 81 | public override init(_ evaluators: [Evaluator]) { 82 | super.init() 83 | if num > 1 { 84 | self.evaluators.append(And(evaluators)) 85 | } else { // 0 or 1 86 | self.evaluators.append(contentsOf: evaluators) 87 | } 88 | updateNumEvaluators() 89 | } 90 | 91 | override init(_ evaluators: Evaluator...) { 92 | super.init() 93 | if num > 1 { 94 | self.evaluators.append(And(evaluators)) 95 | } else { // 0 or 1 96 | self.evaluators.append(contentsOf: evaluators) 97 | } 98 | updateNumEvaluators() 99 | } 100 | 101 | override init() { 102 | super.init() 103 | } 104 | 105 | public func add(_ evaluator: Evaluator) { 106 | evaluators.append(evaluator) 107 | updateNumEvaluators() 108 | } 109 | 110 | public override func matches(_ root: Element, _ node: Element) -> Bool { 111 | for index in 0.. String { 123 | return ":or\(evaluators.map {String($0.toString())})" 124 | } 125 | } 126 | } 127 | -------------------------------------------------------------------------------- /Sources/Comment.swift: -------------------------------------------------------------------------------- 1 | // 2 | // Comment.swift 3 | // SwiftSoup 4 | // 5 | // Created by Nabil Chatbi on 22/10/16. 6 | // 7 | 8 | import Foundation 9 | 10 | /** 11 | A comment node. 12 | */ 13 | public class Comment: Node { 14 | private static let COMMENT_KEY: [UInt8] = UTF8Arrays.comment 15 | 16 | /** 17 | Create a new comment node. 18 | @param data The contents of the comment 19 | @param baseUri base URI 20 | */ 21 | public init(_ data: [UInt8], _ baseUri: [UInt8]) { 22 | super.init(baseUri) 23 | do { 24 | try attributes?.put(Comment.COMMENT_KEY, data) 25 | } catch {} 26 | } 27 | 28 | public override func nodeNameUTF8() -> [UInt8] { 29 | return nodeName().utf8Array 30 | } 31 | 32 | public override func nodeName() -> String { 33 | return "#comment" 34 | } 35 | 36 | /** 37 | Get the contents of the comment. 38 | @return comment content 39 | */ 40 | public func getData() -> String { 41 | return String(decoding: getDataUTF8(), as: UTF8.self) 42 | } 43 | 44 | public func getDataUTF8() -> [UInt8] { 45 | return attributes!.get(key: Comment.COMMENT_KEY) 46 | } 47 | 48 | override func outerHtmlHead(_ accum: StringBuilder, _ depth: Int, _ out: OutputSettings) { 49 | if (out.prettyPrint()) { 50 | indent(accum, depth, out) 51 | } 52 | accum 53 | .append("") 56 | } 57 | 58 | override func outerHtmlTail(_ accum: StringBuilder, _ depth: Int, _ out: OutputSettings) {} 59 | 60 | public override func copy(with zone: NSZone? = nil) -> Any { 61 | let clone = Comment(attributes!.get(key: Comment.COMMENT_KEY), baseUri!) 62 | return copy(clone: clone) 63 | } 64 | 65 | public override func copy(parent: Node?) -> Node { 66 | let clone = Comment(attributes!.get(key: Comment.COMMENT_KEY), baseUri!) 67 | return copy(clone: clone, parent: parent) 68 | } 69 | 70 | public override func copy(clone: Node, parent: Node?) -> Node { 71 | return super.copy(clone: clone, parent: parent) 72 | } 73 | } 74 | -------------------------------------------------------------------------------- /Sources/Connection.swift: -------------------------------------------------------------------------------- 1 | // 2 | // Connection.swift 3 | // SwifSoup 4 | // 5 | // Created by Nabil Chatbi on 29/09/16. 6 | // 7 | 8 | import Foundation 9 | //TODO: 10 | -------------------------------------------------------------------------------- /Sources/DataNode.swift: -------------------------------------------------------------------------------- 1 | // 2 | // DataNode.swift 3 | // SwifSoup 4 | // 5 | // Created by Nabil Chatbi on 29/09/16. 6 | // 7 | 8 | import Foundation 9 | 10 | /** 11 | A data node, for contents of style, script tags etc, where contents should not show in text(). 12 | */ 13 | open class DataNode: Node { 14 | private static let DATA_KEY = "data".utf8Array 15 | 16 | /** 17 | Create a new DataNode. 18 | @param data data contents 19 | @param baseUri base URI 20 | */ 21 | public init(_ data: [UInt8], _ baseUri: [UInt8]) { 22 | super.init(baseUri) 23 | do { 24 | try attributes?.put(DataNode.DATA_KEY, data) 25 | } catch {} 26 | 27 | } 28 | 29 | open override func nodeNameUTF8() -> [UInt8] { 30 | return nodeName().utf8Array 31 | } 32 | 33 | open override func nodeName() -> String { 34 | return "#data" 35 | } 36 | 37 | /** 38 | Get the data contents of this node. Will be unescaped and with original new lines, space etc. 39 | @return data 40 | */ 41 | open func getWholeData() -> String { 42 | return String(decoding: getWholeDataUTF8(), as: UTF8.self) 43 | } 44 | 45 | open func getWholeDataUTF8() -> [UInt8] { 46 | return attributes!.get(key: DataNode.DATA_KEY) 47 | } 48 | 49 | /** 50 | * Set the data contents of this node. 51 | * @param data unencoded data 52 | * @return this node, for chaining 53 | */ 54 | @discardableResult 55 | open func setWholeData(_ data: String) -> DataNode { 56 | do { 57 | try attributes?.put(DataNode.DATA_KEY, data.utf8Array) 58 | } catch {} 59 | return self 60 | } 61 | 62 | override func outerHtmlHead(_ accum: StringBuilder, _ depth: Int, _ out: OutputSettings)throws { 63 | accum.append(getWholeData()) // data is not escaped in return from data nodes, so " in script, style is plain 64 | } 65 | 66 | override func outerHtmlTail(_ accum: StringBuilder, _ depth: Int, _ out: OutputSettings) {} 67 | 68 | /** 69 | Create a new DataNode from HTML encoded data. 70 | @param encodedData encoded data 71 | @param baseUri bass URI 72 | @return new DataNode 73 | */ 74 | public static func createFromEncoded(_ encodedData: String, _ baseUri: String) throws -> DataNode { 75 | let data = try Entities.unescape(encodedData.utf8Array) 76 | return DataNode(data, baseUri.utf8Array) 77 | } 78 | 79 | public override func copy(with zone: NSZone? = nil) -> Any { 80 | let clone = DataNode(attributes!.get(key: DataNode.DATA_KEY), baseUri!) 81 | return copy(clone: clone) 82 | } 83 | 84 | public override func copy(parent: Node?) -> Node { 85 | let clone = DataNode(attributes!.get(key: DataNode.DATA_KEY), baseUri!) 86 | return copy(clone: clone, parent: parent) 87 | } 88 | 89 | public override func copy(clone: Node, parent: Node?) -> Node { 90 | return super.copy(clone: clone, parent: parent) 91 | } 92 | } 93 | -------------------------------------------------------------------------------- /Sources/DataUtil.swift: -------------------------------------------------------------------------------- 1 | // 2 | // DataUtil.swift 3 | // SwifSoup 4 | // 5 | // Created by Nabil Chatbi on 02/10/16. 6 | // 7 | 8 | import Foundation 9 | 10 | /** 11 | * Internal static utilities for handling data. 12 | * 13 | */ 14 | class DataUtil { 15 | 16 | static let charsetPattern = "(?i)\\bcharset=\\s*(?:\"|')?([^\\s,;\"']*)" 17 | static let defaultCharset = "UTF-8" // used if not found in header or meta charset 18 | static let bufferSize = 0x20000 // ~130K. 19 | static let UNICODE_BOM = 0xFEFF 20 | static let mimeBoundaryChars = "-_1234567890abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ" 21 | static let boundaryLength = 32 22 | 23 | } 24 | -------------------------------------------------------------------------------- /Sources/DocumentType.swift: -------------------------------------------------------------------------------- 1 | // 2 | // DocumentType.swift 3 | // SwifSoup 4 | // 5 | // Created by Nabil Chatbi on 29/09/16. 6 | // 7 | 8 | import Foundation 9 | 10 | /** 11 | * A {@code } node. 12 | */ 13 | public class DocumentType: Node { 14 | static let PUBLIC_KEY = "PUBLIC".utf8Array 15 | static let SYSTEM_KEY = "SYSTEM".utf8Array 16 | private static let NAME = "name".utf8Array 17 | private static let PUB_SYS_KEY = "pubSysKey".utf8Array // PUBLIC or SYSTEM 18 | private static let PUBLIC_ID = "publicId".utf8Array 19 | private static let SYSTEM_ID = "systemId".utf8Array 20 | // todo: quirk mode from publicId and systemId 21 | 22 | /** 23 | * Create a new doctype element. 24 | * @param name the doctype's name 25 | * @param publicId the doctype's public ID 26 | * @param systemId the doctype's system ID 27 | * @param baseUri the doctype's base URI 28 | */ 29 | public convenience init(_ name: String, _ publicId: String, _ systemId: String, _ baseUri: String) { 30 | self.init(name.utf8Array, publicId.utf8Array, systemId.utf8Array, baseUri.utf8Array) 31 | } 32 | 33 | public init(_ name: [UInt8], _ publicId: [UInt8], _ systemId: [UInt8], _ baseUri: [UInt8]) { 34 | super.init(baseUri) 35 | do { 36 | try attr(DocumentType.NAME, name) 37 | try attr(DocumentType.PUBLIC_ID, publicId) 38 | if (has(DocumentType.PUBLIC_ID)) { 39 | try attr(DocumentType.PUB_SYS_KEY, DocumentType.PUBLIC_KEY) 40 | } 41 | try attr(DocumentType.SYSTEM_ID, systemId) 42 | } catch {} 43 | } 44 | 45 | /** 46 | * Create a new doctype element. 47 | * @param name the doctype's name 48 | * @param publicId the doctype's public ID 49 | * @param systemId the doctype's system ID 50 | * @param baseUri the doctype's base URI 51 | */ 52 | public init(_ name: [UInt8], _ pubSysKey: [UInt8]?, _ publicId: [UInt8], _ systemId: [UInt8], _ baseUri: [UInt8]) { 53 | super.init(baseUri) 54 | do { 55 | try attr(DocumentType.NAME, name) 56 | if(pubSysKey != nil) { 57 | try attr(DocumentType.PUB_SYS_KEY, pubSysKey!) 58 | } 59 | try attr(DocumentType.PUBLIC_ID, publicId) 60 | try attr(DocumentType.SYSTEM_ID, systemId) 61 | } catch {} 62 | } 63 | 64 | public override func nodeNameUTF8() -> [UInt8] { 65 | return nodeName().utf8Array 66 | } 67 | 68 | public override func nodeName() -> String { 69 | return "#doctype" 70 | } 71 | 72 | override func outerHtmlHead(_ accum: StringBuilder, _ depth: Int, _ out: OutputSettings) { 73 | if (out.syntax() == OutputSettings.Syntax.html && !has(DocumentType.PUBLIC_ID) && !has(DocumentType.SYSTEM_ID)) { 74 | // looks like a html5 doctype, go lowercase for aesthetics 75 | accum.append("") 105 | } 106 | 107 | override func outerHtmlTail(_ accum: StringBuilder, _ depth: Int, _ out: OutputSettings) { 108 | } 109 | 110 | private func has(_ attribute: [UInt8]) -> Bool { 111 | do { 112 | return !StringUtil.isBlank(try String(decoding: attr(attribute), as: UTF8.self)) 113 | } catch {return false} 114 | } 115 | 116 | public override func copy(with zone: NSZone? = nil) -> Any { 117 | let clone = DocumentType(attributes!.get(key: DocumentType.NAME), 118 | attributes!.get(key: DocumentType.PUBLIC_ID), 119 | attributes!.get(key: DocumentType.SYSTEM_ID), 120 | baseUri!) 121 | return copy(clone: clone) 122 | } 123 | 124 | public override func copy(parent: Node?) -> Node { 125 | let clone = DocumentType(attributes!.get(key: DocumentType.NAME), 126 | attributes!.get(key: DocumentType.PUBLIC_ID), 127 | attributes!.get(key: DocumentType.SYSTEM_ID), 128 | baseUri!) 129 | return copy(clone: clone, parent: parent) 130 | } 131 | 132 | public override func copy(clone: Node, parent: Node?) -> Node { 133 | return super.copy(clone: clone, parent: parent) 134 | } 135 | 136 | } 137 | -------------------------------------------------------------------------------- /Sources/Exception.swift: -------------------------------------------------------------------------------- 1 | // 2 | // Exception.swift 3 | // SwifSoup 4 | // 5 | // Created by Nabil Chatbi on 02/10/16. 6 | // 7 | 8 | import Foundation 9 | 10 | public enum ExceptionType { 11 | case IllegalArgumentException 12 | case IOException 13 | case XmlDeclaration 14 | case MalformedURLException 15 | case CloneNotSupportedException 16 | case SelectorParseException 17 | } 18 | 19 | public enum Exception: Error { 20 | case Error(type:ExceptionType, Message: String) 21 | } 22 | -------------------------------------------------------------------------------- /Sources/FormElement.swift: -------------------------------------------------------------------------------- 1 | // 2 | // FormElement.swift 3 | // SwifSoup 4 | // 5 | // Created by Nabil Chatbi on 29/09/16. 6 | // 7 | 8 | import Foundation 9 | 10 | /** 11 | * A HTML Form Element provides ready access to the form fields/controls that are associated with it. It also allows a 12 | * form to easily be submitted. 13 | */ 14 | public class FormElement: Element { 15 | private let _elements: Elements = Elements() 16 | 17 | /** 18 | * Create a new, standalone form element. 19 | * 20 | * @param tag tag of this element 21 | * @param baseUri the base URI 22 | * @param attributes initial attributes 23 | */ 24 | public override init(_ tag: Tag, _ baseUri: [UInt8], _ attributes: Attributes) { 25 | super.init(tag, baseUri, attributes) 26 | } 27 | 28 | /** 29 | * Create a new, standalone form element. 30 | * 31 | * @param tag tag of this element 32 | * @param baseUri the base URI 33 | */ 34 | public override init(_ tag: Tag, _ baseUri: [UInt8]) { 35 | super.init(tag, baseUri) 36 | } 37 | 38 | /** 39 | * Get the list of form control elements associated with this form. 40 | * @return form controls associated with this element. 41 | */ 42 | public func elements() -> Elements { 43 | return _elements 44 | } 45 | 46 | /** 47 | * Add a form control element to this form. 48 | * @param element form control to add 49 | * @return this form element, for chaining 50 | */ 51 | @discardableResult 52 | public func addElement(_ element: Element) -> FormElement { 53 | _elements.add(element) 54 | return self 55 | } 56 | 57 | //todo: 58 | /** 59 | * Prepare to submit this form. A Connection object is created with the request set up from the form values. You 60 | * can then set up other options (like user-agent, timeout, cookies), then execute it. 61 | * @return a connection prepared from the values of this form. 62 | * @throws IllegalArgumentException if the form's absolute action URL cannot be determined. Make sure you pass the 63 | * document's base URI when parsing. 64 | */ 65 | // public func submit()throws->Connection { 66 | // let action: String = hasAttr("action") ? try absUrl("action") : try baseUri() 67 | // Validate.notEmpty(action, "Could not determine a form action URL for submit. Ensure you set a base URI when parsing.") 68 | // Connection.Method method = attr("method").toUpperCase().equals("POST") ? 69 | // Connection.Method.POST : Connection.Method.GET 70 | // 71 | // return Jsoup.connect(action) 72 | // .data(formData()) 73 | // .method(method) 74 | // } 75 | 76 | //todo: 77 | /** 78 | * Get the data that this form submits. The returned list is a copy of the data, and changes to the contents of the 79 | * list will not be reflected in the DOM. 80 | * @return a list of key vals 81 | */ 82 | // public List formData() { 83 | // ArrayList data = new ArrayList(); 84 | // 85 | // // iterate the form control elements and accumulate their values 86 | // for (Element el: elements) { 87 | // if (!el.tag().isFormSubmittable()) continue; // contents are form listable, superset of submitable 88 | // if (el.hasAttr("disabled")) continue; // skip disabled form inputs 89 | // String name = el.attr("name"); 90 | // if (name.length() == 0) continue; 91 | // String type = el.attr("type"); 92 | // 93 | // if ("select".equals(el.tagName())) { 94 | // Elements options = el.select("option[selected]"); 95 | // boolean set = false; 96 | // for (Element option: options) { 97 | // data.add(HttpConnection.KeyVal.create(name, option.val())); 98 | // set = true; 99 | // } 100 | // if (!set) { 101 | // Element option = el.select("option").first(); 102 | // if (option != null) 103 | // data.add(HttpConnection.KeyVal.create(name, option.val())); 104 | // } 105 | // } else if ("checkbox".equalsIgnoreCase(type) || "radio".equalsIgnoreCase(type)) { 106 | // // only add checkbox or radio if they have the checked attribute 107 | // if (el.hasAttr("checked")) { 108 | // final String val = el.val().length() > 0 ? el.val() : "on"; 109 | // data.add(HttpConnection.KeyVal.create(name, val)); 110 | // } 111 | // } else { 112 | // data.add(HttpConnection.KeyVal.create(name, el.val())); 113 | // } 114 | // } 115 | // return data; 116 | // } 117 | 118 | public override func copy(with zone: NSZone? = nil) -> Any { 119 | let clone = FormElement(_tag, baseUri!, attributes!) 120 | return copy(clone: clone) 121 | } 122 | 123 | public override func copy(parent: Node?) -> Node { 124 | let clone = FormElement(_tag, baseUri!, attributes!) 125 | return copy(clone: clone, parent: parent) 126 | } 127 | public override func copy(clone: Node, parent: Node?) -> Node { 128 | let clone = clone as! FormElement 129 | for att in _elements.array() { 130 | clone._elements.add(att) 131 | } 132 | return super.copy(clone: clone, parent: parent) 133 | } 134 | } 135 | -------------------------------------------------------------------------------- /Sources/HttpStatusException.swift: -------------------------------------------------------------------------------- 1 | // 2 | // HttpStatusException.swift 3 | // SwifSoup 4 | // 5 | // Created by Nabil Chatbi on 29/09/16. 6 | // 7 | 8 | import Foundation 9 | //TODO: 10 | -------------------------------------------------------------------------------- /Sources/Mutex.swift: -------------------------------------------------------------------------------- 1 | // 2 | // Mutex.swift 3 | // SwiftSoup 4 | // 5 | // Created by xukun on 2022/3/31. 6 | // 7 | 8 | import Foundation 9 | 10 | #if os(Windows) 11 | import WinSDK 12 | #endif 13 | 14 | final class Mutex: NSLocking { 15 | #if os(Windows) 16 | private var mutex = CRITICAL_SECTION() 17 | 18 | init() { 19 | InitializeCriticalSection(&mutex) 20 | } 21 | 22 | deinit { 23 | DeleteCriticalSection(&mutex) 24 | } 25 | 26 | func lock() { 27 | EnterCriticalSection(&mutex) 28 | } 29 | 30 | func unlock() { 31 | LeaveCriticalSection(&mutex) 32 | } 33 | #else 34 | private var mutex = pthread_mutex_t() 35 | 36 | init() { 37 | pthread_mutex_init(&mutex, nil) 38 | } 39 | 40 | deinit { 41 | pthread_mutex_destroy(&mutex) 42 | } 43 | 44 | func lock() { 45 | pthread_mutex_lock(&mutex) 46 | } 47 | 48 | func unlock() { 49 | pthread_mutex_unlock(&mutex) 50 | } 51 | #endif 52 | } 53 | -------------------------------------------------------------------------------- /Sources/NodeTraversor.swift: -------------------------------------------------------------------------------- 1 | // 2 | // NodeTraversor.swift 3 | // SwiftSoup 4 | // 5 | // Created by Nabil Chatbi on 17/10/16. 6 | // 7 | 8 | import Foundation 9 | 10 | open class NodeTraversor { 11 | private let visitor: NodeVisitor 12 | 13 | /** 14 | * Create a new traversor. 15 | * @param visitor a class implementing the {@link NodeVisitor} interface, to be called when visiting each node. 16 | */ 17 | public init(_ visitor: NodeVisitor) { 18 | self.visitor = visitor 19 | } 20 | 21 | /** 22 | * Start a depth-first traverse of the root and all of its descendants. 23 | * @param root the root node point to traverse. 24 | */ 25 | open func traverse(_ root: Node?) throws { 26 | var node: Node? = root 27 | var depth: Int = 0 28 | 29 | while (node != nil) { 30 | try visitor.head(node!, depth) 31 | if node!.hasChildNodes() { 32 | node = node!.childNode(0) 33 | depth += 1 34 | } else { 35 | while !node!.hasNextSibling() && depth > 0 { 36 | let parent = node!.getParentNode() 37 | try visitor.tail(node!, depth) 38 | node = parent 39 | depth -= 1 40 | } 41 | let nextSib = node!.nextSibling() 42 | try visitor.tail(node!, depth) 43 | if node === root { 44 | break 45 | } 46 | node = nextSib 47 | } 48 | } 49 | } 50 | } 51 | -------------------------------------------------------------------------------- /Sources/NodeVisitor.swift: -------------------------------------------------------------------------------- 1 | // 2 | // NodeVisitor.swift 3 | // SwiftSoup 4 | // 5 | // Created by Nabil Chatbi on 16/10/16. 6 | // 7 | 8 | import Foundation 9 | 10 | /** 11 | * Node visitor interface. Provide an implementing class to {@link NodeTraversor} to iterate through nodes. 12 | *

13 | * This interface provides two methods, {@code head} and {@code tail}. The head method is called when the node is first 14 | * seen, and the tail method when all of the node's children have been visited. As an example, head can be used to 15 | * create a start tag for a node, and tail to create the end tag. 16 | *

17 | */ 18 | public protocol NodeVisitor { 19 | /** 20 | * Callback for when a node is first visited. {@code head} cannot safely call {@code node.remove()}. 21 | * 22 | * @param node the node being visited. 23 | * @param depth the depth of the node, relative to the root node. E.g., the root node has depth 0, and a child node 24 | * of that will have depth 1. 25 | */ 26 | func head(_ node: Node, _ depth: Int) throws 27 | 28 | /** 29 | * Callback for when a node is last visited, after all of its descendants have been visited. {@code tail} can safely call {@code node.remove()}. 30 | * 31 | * @param node the node being visited. 32 | * @param depth the depth of the node, relative to the root node. E.g., the root node has depth 0, and a child node 33 | * of that will have depth 1. 34 | */ 35 | func tail(_ node: Node, _ depth: Int) throws 36 | } 37 | -------------------------------------------------------------------------------- /Sources/ParseError.swift: -------------------------------------------------------------------------------- 1 | // 2 | // ParseError.swift 3 | // SwiftSoup 4 | // 5 | // Created by Nabil Chatbi on 19/10/16. 6 | // 7 | 8 | import Foundation 9 | 10 | /** 11 | * A Parse Error records an error in the input HTML that occurs in either the tokenisation or the tree building phase. 12 | */ 13 | open class ParseError { 14 | private let pos: Int 15 | private let errorMsg: String 16 | 17 | init(_ pos: Int, _ errorMsg: String) { 18 | self.pos = pos 19 | self.errorMsg = errorMsg 20 | } 21 | 22 | /** 23 | * Retrieve the error message. 24 | * @return the error message. 25 | */ 26 | open func getErrorMessage() -> String { 27 | return errorMsg 28 | } 29 | 30 | /** 31 | * Retrieves the offset of the error. 32 | * @return error offset within input 33 | */ 34 | open func getPosition() -> Int { 35 | return pos 36 | } 37 | 38 | open func toString() -> String { 39 | return "\(pos): " + errorMsg 40 | } 41 | } 42 | -------------------------------------------------------------------------------- /Sources/ParseErrorList.swift: -------------------------------------------------------------------------------- 1 | // 2 | // ParseErrorList.swift 3 | // SwiftSoup 4 | // 5 | // Created by Nabil Chatbi on 19/10/16. 6 | // 7 | 8 | import Foundation 9 | 10 | public class ParseErrorList { 11 | private static let INITIAL_CAPACITY: Int = 16 12 | private let maxSize: Int 13 | private let initialCapacity: Int 14 | private var array: Array = Array() 15 | 16 | init(_ initialCapacity: Int, _ maxSize: Int) { 17 | self.maxSize = maxSize 18 | self.initialCapacity = initialCapacity 19 | array = Array(repeating: nil, count: maxSize) 20 | } 21 | 22 | func canAddError() -> Bool { 23 | return array.count < maxSize 24 | } 25 | 26 | func getMaxSize() -> Int { 27 | return maxSize 28 | } 29 | 30 | static func noTracking() -> ParseErrorList { 31 | return ParseErrorList(0, 0) 32 | } 33 | 34 | static func tracking(_ maxSize: Int) -> ParseErrorList { 35 | return ParseErrorList(INITIAL_CAPACITY, maxSize) 36 | } 37 | 38 | // // you need to provide the Equatable functionality 39 | // static func ==(leftFoo: Foo, rightFoo: Foo) -> Bool { 40 | // return ObjectIdentifier(leftFoo) == ObjectIdentifier(rightFoo) 41 | // } 42 | 43 | open func add(_ e: ParseError) { 44 | array.append(e) 45 | } 46 | 47 | open func add(_ index: Int, _ element: ParseError) { 48 | array.insert(element, at: index) 49 | } 50 | 51 | } 52 | -------------------------------------------------------------------------------- /Sources/ParseSettings.swift: -------------------------------------------------------------------------------- 1 | // 2 | // ParseSettings.swift 3 | // SwiftSoup 4 | // 5 | // Created by Nabil Chatbi on 14/10/16. 6 | // 7 | 8 | import Foundation 9 | 10 | open class ParseSettings { 11 | /** 12 | * HTML default settings: both tag and attribute names are lower-cased during parsing. 13 | */ 14 | public static let htmlDefault: ParseSettings = ParseSettings(false, false) 15 | /** 16 | * Preserve both tag and attribute case. 17 | */ 18 | public static let preserveCase: ParseSettings = ParseSettings(true, true) 19 | 20 | private let preserveTagCase: Bool 21 | private let preserveAttributeCase: Bool 22 | 23 | /** 24 | * Define parse settings. 25 | * @param tag preserve tag case? 26 | * @param attribute preserve attribute name case? 27 | */ 28 | public init(_ tag: Bool, _ attribute: Bool) { 29 | preserveTagCase = tag 30 | preserveAttributeCase = attribute 31 | } 32 | 33 | open func normalizeTag(_ name: [UInt8]) -> [UInt8] { 34 | var name = name.trim() 35 | if (!preserveTagCase) { 36 | name = name.lowercased() 37 | } 38 | return name 39 | } 40 | 41 | open func normalizeTag(_ name: String) -> String { 42 | return String(decoding: normalizeTag(name.utf8Array), as: UTF8.self) 43 | } 44 | 45 | open func normalizeAttribute(_ name: [UInt8]) -> [UInt8] { 46 | var name = name.trim() 47 | if (!preserveAttributeCase) { 48 | name = name.lowercased() 49 | } 50 | return name 51 | } 52 | 53 | open func normalizeAttribute(_ name: String) -> String { 54 | return String(decoding: normalizeAttribute(name.utf8Array), as: UTF8.self) 55 | } 56 | 57 | open func normalizeAttributes(_ attributes: Attributes) throws -> Attributes { 58 | if (!preserveAttributeCase) { 59 | attributes.lowercaseAllKeys() 60 | } 61 | return attributes 62 | } 63 | 64 | } 65 | -------------------------------------------------------------------------------- /Sources/ParsingStrings.swift: -------------------------------------------------------------------------------- 1 | import Foundation 2 | 3 | @inline(__always) 4 | func setBit(in mask: inout (UInt64, UInt64, UInt64, UInt64), forByte b: UInt8) { 5 | let idx = Int(b >> 6) 6 | let shift = b & 63 7 | switch idx { 8 | case 0: mask.0 |= (1 << shift) 9 | case 1: mask.1 |= (1 << shift) 10 | case 2: mask.2 |= (1 << shift) 11 | default: mask.3 |= (1 << shift) 12 | } 13 | } 14 | 15 | @inline(__always) 16 | public func testBit(_ mask: (UInt64, UInt64, UInt64, UInt64), _ b: UInt8) -> Bool { 17 | let idx = Int(b >> 6) 18 | let shift = b & 63 19 | let val: UInt64 20 | switch idx { 21 | case 0: val = mask.0 22 | case 1: val = mask.1 23 | case 2: val = mask.2 24 | default: val = mask.3 25 | } 26 | return (val & (1 << shift)) != 0 27 | } 28 | 29 | final class TrieNode { 30 | // For fastest lookup: a 256-element array for direct indexing by byte 31 | var children: [TrieNode?] = .init(repeating: nil, count: 256) 32 | 33 | // Mark that a path ending at this node represents a complete string 34 | var isTerminal: Bool = false 35 | } 36 | 37 | public struct ParsingStrings: Hashable, Equatable { 38 | let multiByteChars: [[UInt8]] 39 | let multiByteCharLengths: [Int] 40 | public let multiByteByteLookups: [(UInt64, UInt64, UInt64, UInt64)] 41 | public let multiByteSet: Set> 42 | public let multiByteByteLookupsCount: Int 43 | public var singleByteMask: (UInt64, UInt64, UInt64, UInt64) = (0, 0, 0, 0) // Precomputed set for single-byte lookups 44 | private let precomputedHash: Int 45 | private let root = TrieNode() 46 | 47 | public init(_ strings: [String]) { 48 | self.init(strings.map { $0.utf8Array }) 49 | } 50 | 51 | public init(_ strings: [[UInt8]]) { 52 | multiByteChars = strings 53 | multiByteCharLengths = strings.map { $0.count } 54 | let maxLen = multiByteCharLengths.max() ?? 0 55 | 56 | var multiByteByteLookups: [(UInt64, UInt64, UInt64, UInt64)] = Array(repeating: (0,0,0,0), count: maxLen) 57 | 58 | for bytes in strings { 59 | guard !bytes.isEmpty else { continue } 60 | 61 | var current = root 62 | for b in bytes { 63 | if current.children[Int(b)] == nil { 64 | current.children[Int(b)] = TrieNode() 65 | } 66 | current = current.children[Int(b)]! 67 | } 68 | current.isTerminal = true 69 | } 70 | 71 | for char in multiByteChars { 72 | if char.count == 1 { 73 | setBit(in: &singleByteMask, forByte: char[0]) 74 | } 75 | for (i, byte) in char.enumerated() { 76 | var mask = multiByteByteLookups[i] 77 | setBit(in: &mask, forByte: byte) 78 | multiByteByteLookups[i] = mask 79 | } 80 | } 81 | self.multiByteByteLookups = multiByteByteLookups 82 | multiByteByteLookupsCount = multiByteByteLookups.count 83 | 84 | multiByteSet = Set(multiByteChars.map { ArraySlice($0) }) 85 | self.precomputedHash = Self.computeHash( 86 | multiByteChars: multiByteChars, 87 | multiByteByteLookups: multiByteByteLookups 88 | ) 89 | } 90 | 91 | public init(_ strings: [UnicodeScalar]) { 92 | self.init(strings.map { Array($0.utf8) }) 93 | } 94 | 95 | private static func computeHash( 96 | multiByteChars: [[UInt8]], 97 | multiByteByteLookups: [(UInt64, UInt64, UInt64, UInt64)] 98 | ) -> Int { 99 | var hasher = Hasher() 100 | for char in multiByteChars { 101 | hasher.combine(char.count) 102 | for b in char { 103 | hasher.combine(b) 104 | } 105 | } 106 | for mbb in multiByteByteLookups { 107 | hasher.combine(mbb.0) 108 | hasher.combine(mbb.1) 109 | hasher.combine(mbb.2) 110 | hasher.combine(mbb.3) 111 | } 112 | return hasher.finalize() 113 | } 114 | 115 | public static func ==(lhs: ParsingStrings, rhs: ParsingStrings) -> Bool { 116 | return lhs.multiByteChars == rhs.multiByteChars 117 | } 118 | 119 | public func hash(into hasher: inout Hasher) { 120 | hasher.combine(precomputedHash) 121 | } 122 | 123 | @inlinable 124 | public func contains(_ bytes: [UInt8]) -> Bool { 125 | return contains(ArraySlice(bytes)) 126 | } 127 | 128 | @inlinable 129 | public func contains(_ slice: ArraySlice) -> Bool { 130 | var index = 0 131 | for byte in slice { 132 | if index >= multiByteByteLookupsCount || !testBit(multiByteByteLookups[index], byte) { 133 | return false 134 | } 135 | index &+= 1 136 | } 137 | return multiByteSet.contains(slice) 138 | } 139 | 140 | @inlinable 141 | public func contains(_ byte: UInt8) -> Bool { 142 | let idx = Int(byte >> 6) 143 | let shift = byte & 63 144 | 145 | // Pick which 64-bit in the tuple: 146 | let val: UInt64 147 | switch idx { 148 | case 0: val = singleByteMask.0 149 | case 1: val = singleByteMask.1 150 | case 2: val = singleByteMask.2 151 | default: val = singleByteMask.3 152 | } 153 | 154 | // If the corresponding bit is set, membership is true 155 | return (val & (1 << shift)) != 0 156 | } 157 | 158 | @inlinable 159 | public func contains(_ scalar: UnicodeScalar) -> Bool { 160 | // Fast path for ASCII 161 | if scalar.value < 0x80 { 162 | return contains(UInt8(scalar.value)) 163 | } 164 | 165 | var buffer = [UInt8](repeating: 0, count: 4) 166 | var length = 0 167 | for b in scalar.utf8 { 168 | buffer[length] = b 169 | length &+= 1 170 | } 171 | let slice = buffer[..) -> Bool { 182 | // Early single-byte check 183 | if slice.count == 1 { 184 | return contains(slice.first!) 185 | } 186 | 187 | var current = root 188 | for b in slice { 189 | guard let child = current.children[Int(b)] else { 190 | return false 191 | } 192 | current = child 193 | } 194 | return current.isTerminal 195 | } 196 | } 197 | -------------------------------------------------------------------------------- /Sources/Pattern.swift: -------------------------------------------------------------------------------- 1 | // 2 | // Regex.swift 3 | // SwifSoup 4 | // 5 | // Created by Nabil Chatbi on 08/10/16. 6 | // 7 | 8 | import Foundation 9 | 10 | public struct Pattern { 11 | public static let CASE_INSENSITIVE: Int = 0x02 12 | let pattern: String 13 | 14 | init(_ pattern: String) { 15 | self.pattern = pattern 16 | } 17 | 18 | static public func compile(_ s: String) -> Pattern { 19 | return Pattern(s) 20 | } 21 | static public func compile(_ s: String, _ op: Int) -> Pattern { 22 | return Pattern(s) 23 | } 24 | 25 | public func validate() throws { 26 | _ = try NSRegularExpression(pattern: self.pattern, options: []) 27 | } 28 | 29 | public func matcher(in text: String) -> Matcher { 30 | do { 31 | let regex = try NSRegularExpression(pattern: self.pattern, options: []) 32 | let nsString = NSString(string: text) 33 | let results = regex.matches(in: text, options: [], range: NSRange(location: 0, length: nsString.length)) 34 | 35 | return Matcher(results, text) 36 | } catch let error { 37 | print("invalid regex: \(error.localizedDescription)") 38 | return Matcher([], text) 39 | } 40 | } 41 | 42 | public func toString() -> String { 43 | return pattern 44 | } 45 | } 46 | 47 | public class Matcher { 48 | let matches: [NSTextCheckingResult] 49 | let string: String 50 | var index: Int = -1 51 | 52 | public var count: Int { return matches.count} 53 | 54 | init(_ m: [NSTextCheckingResult], _ s: String) { 55 | matches = m 56 | string = s 57 | } 58 | 59 | @discardableResult 60 | public func find() -> Bool { 61 | index += 1 62 | if(index < matches.count) { 63 | return true 64 | } 65 | return false 66 | } 67 | 68 | public func group(_ i: Int) -> String? { 69 | let b = matches[index] 70 | #if !os(Linux) && !swift(>=4) 71 | let c = b.rangeAt(i) 72 | #else 73 | let c = b.range(at: i) 74 | #endif 75 | 76 | if(c.location == NSNotFound) {return nil} 77 | let result = string.substring(c.location, c.length) 78 | return result 79 | } 80 | public func group() -> String? { 81 | return group(0) 82 | } 83 | } 84 | -------------------------------------------------------------------------------- /Sources/SerializationException.swift: -------------------------------------------------------------------------------- 1 | // 2 | // SerializationException.swift 3 | // SwifSoup 4 | // 5 | // Created by Nabil Chatbi on 29/09/16. 6 | // 7 | 8 | import Foundation 9 | //TODO: 10 | -------------------------------------------------------------------------------- /Sources/SimpleDictionary.swift: -------------------------------------------------------------------------------- 1 | // 2 | // SimpleDictionary.swift 3 | // SwiftSoup 4 | // 5 | // Created by Nabil Chatbi on 30/10/16. 6 | // 7 | 8 | import Foundation 9 | 10 | public class SimpleDictionary { 11 | 12 | public typealias DictionaryType = [KeyType: ValueType] 13 | public private(set) var values = DictionaryType() 14 | 15 | public init() { 16 | } 17 | 18 | public var count: Int { 19 | return values.count 20 | } 21 | 22 | public func remove(_ key: KeyType) { 23 | values.removeValue(forKey: key) 24 | } 25 | 26 | public func contains(_ key: KeyType) -> Bool { 27 | return self.values[key] != nil 28 | } 29 | 30 | public func put(_ value: ValueType, forKey key: KeyType) { 31 | self.values[key] = value 32 | } 33 | 34 | public func get(_ key: KeyType) -> ValueType? { 35 | return self.values[key] 36 | } 37 | 38 | } 39 | -------------------------------------------------------------------------------- /Sources/StreamReader.swift: -------------------------------------------------------------------------------- 1 | // 2 | // StreamReader.swift 3 | // SwifSoup 4 | // 5 | // Created by Nabil Chatbi on 08/10/16. 6 | // 7 | 8 | import Foundation 9 | 10 | class StreamReader { 11 | 12 | let encoding: String.Encoding 13 | let chunkSize: Int 14 | var fileHandle: FileHandle! 15 | let delimData: Data 16 | var buffer: Data 17 | var atEof: Bool 18 | 19 | init?(path: String, delimiter: String = "\n", encoding: String.Encoding = .utf8, 20 | chunkSize: Int = 4096) { 21 | 22 | guard let fileHandle = FileHandle(forReadingAtPath: path), 23 | let delimData = delimiter.data(using: encoding) else { 24 | return nil 25 | } 26 | self.encoding = encoding 27 | self.chunkSize = chunkSize 28 | self.fileHandle = fileHandle 29 | self.delimData = delimData 30 | self.buffer = Data(capacity: chunkSize) 31 | self.atEof = false 32 | } 33 | 34 | deinit { 35 | self.close() 36 | } 37 | 38 | /// Return next line, or nil on EOF. 39 | func nextLine() -> String? { 40 | precondition(fileHandle != nil, "Attempt to read from closed file") 41 | 42 | // Read data chunks from file until a line delimiter is found: 43 | while !atEof { 44 | if let range = buffer.range(of: delimData) { 45 | // Convert complete line (excluding the delimiter) to a string: 46 | let line = String(data: buffer.subdata(in: 0.. 0 { 53 | buffer.append(tmpData) 54 | } else { 55 | // EOF or read error. 56 | atEof = true 57 | if buffer.count > 0 { 58 | // Buffer contains last line in file (not terminated by delimiter). 59 | let line = String(data: buffer as Data, encoding: encoding) 60 | buffer.count = 0 61 | return line 62 | } 63 | } 64 | } 65 | return nil 66 | } 67 | 68 | /// Start reading from the beginning of file. 69 | func rewind() { 70 | fileHandle.seek(toFileOffset: 0) 71 | buffer.count = 0 72 | atEof = false 73 | } 74 | 75 | /// Close the underlying file. No reading must be done after calling this method. 76 | func close() { 77 | fileHandle?.closeFile() 78 | fileHandle = nil 79 | } 80 | } 81 | 82 | extension StreamReader: Sequence { 83 | func makeIterator() -> AnyIterator { 84 | return AnyIterator { 85 | return self.nextLine() 86 | } 87 | } 88 | } 89 | -------------------------------------------------------------------------------- /Sources/StringBuilder.swift: -------------------------------------------------------------------------------- 1 | /** 2 | Supports creation of a String from pieces 3 | Based on https://gist.github.com/kristopherjohnson/1fc55e811d944a430289 4 | */ 5 | open class StringBuilder { 6 | public var buffer: [UInt8] = [] 7 | 8 | /** 9 | Construct with initial String contents 10 | 11 | :param: string Initial value; defaults to empty string 12 | */ 13 | public init(string: String? = nil) { 14 | if let string, !string.isEmpty { 15 | buffer.append(contentsOf: string.utf8) 16 | } 17 | buffer.reserveCapacity(1024) 18 | } 19 | 20 | public init(_ size: Int) { 21 | buffer = Array() 22 | buffer.reserveCapacity(size) 23 | } 24 | 25 | /** 26 | Return the String object 27 | 28 | :return: String 29 | */ 30 | open func toString() -> String { 31 | return String(decoding: buffer, as: UTF8.self) 32 | } 33 | 34 | /** 35 | Return the current length of the String object 36 | */ 37 | open var xlength: Int { 38 | return buffer.count 39 | } 40 | 41 | open var isEmpty: Bool { 42 | return buffer.isEmpty 43 | } 44 | 45 | /** 46 | Append a String to the object 47 | 48 | :param: string String 49 | 50 | :return: reference to this StringBuilder instance 51 | */ 52 | @inline(__always) 53 | @discardableResult 54 | open func append(_ string: String) -> StringBuilder { 55 | buffer.append(contentsOf: string.utf8) 56 | return self 57 | } 58 | 59 | @inline(__always) 60 | open func append(_ chr: Character) { 61 | append(String(chr)) 62 | } 63 | 64 | @inline(__always) 65 | open func appendCodePoints(_ chr: [Character]) { 66 | append(String(chr)) 67 | } 68 | 69 | @inline(__always) 70 | open func appendCodePoint(_ ch: Int) { 71 | appendCodePoint(UnicodeScalar(ch)!) 72 | } 73 | 74 | @inlinable 75 | open func appendCodePoint(_ ch: UnicodeScalar) { 76 | let val = ch.value 77 | if val < 0x80 { 78 | // 1-byte ASCII 79 | buffer.append(UInt8(val)) 80 | } else if val < 0x800 { 81 | // 2-byte sequence 82 | buffer.append(contentsOf: [ 83 | UInt8(0xC0 | (val >> 6)), 84 | UInt8(0x80 | (val & 0x3F)) 85 | ]) 86 | } else if val < 0x10000 { 87 | // 3-byte sequence 88 | buffer.append(contentsOf: [ 89 | UInt8(0xE0 | (val >> 12)), 90 | UInt8(0x80 | ((val >> 6) & 0x3F)), 91 | UInt8(0x80 | (val & 0x3F)) 92 | ]) 93 | } else { 94 | // 4-byte sequence 95 | buffer.append(contentsOf: [ 96 | UInt8(0xF0 | (val >> 18)), 97 | UInt8(0x80 | ((val >> 12) & 0x3F)), 98 | UInt8(0x80 | ((val >> 6) & 0x3F)), 99 | UInt8(0x80 | (val & 0x3F)) 100 | ]) 101 | } 102 | } 103 | 104 | @inlinable 105 | open func appendCodePoints(_ chr: [UnicodeScalar]) { 106 | for chr in chr { 107 | appendCodePoint(chr) 108 | } 109 | } 110 | 111 | /** 112 | Append a Printable to the object 113 | 114 | :param: value a value supporting the Printable protocol 115 | 116 | :return: reference to this StringBuilder instance 117 | */ 118 | // @discardableResult 119 | // open func append(_ value: T) -> StringBuilder { 120 | // append(value.description) 121 | // return self 122 | // } 123 | 124 | @inlinable 125 | @discardableResult 126 | open func append(_ value: ArraySlice) -> StringBuilder { 127 | buffer.append(contentsOf: value) 128 | return self 129 | } 130 | 131 | @inlinable 132 | @discardableResult 133 | open func append(_ value: [UInt8]) -> StringBuilder { 134 | buffer.append(contentsOf: value) 135 | return self 136 | } 137 | 138 | @inlinable 139 | @discardableResult 140 | open func append(_ value: UInt8) -> StringBuilder { 141 | buffer.append(value) 142 | return self 143 | } 144 | 145 | @inlinable 146 | @discardableResult 147 | open func append(_ value: UnicodeScalar) -> StringBuilder { 148 | appendCodePoint(value) 149 | return self 150 | } 151 | 152 | /** 153 | Append a String and a newline to the object 154 | 155 | :param: string String 156 | 157 | :return: reference to this StringBuilder instance 158 | */ 159 | @discardableResult 160 | open func appendLine(_ string: String) -> StringBuilder { 161 | append(string) 162 | append("\n") 163 | return self 164 | } 165 | 166 | /** 167 | Append a Printable and a newline to the object 168 | 169 | :param: value a value supporting the Printable protocol 170 | 171 | :return: reference to this StringBuilder instance 172 | */ 173 | @discardableResult 174 | open func appendLine(_ value: T) -> StringBuilder { 175 | append(value.description) 176 | append("\n") 177 | return self 178 | } 179 | 180 | /** 181 | Reset the object to an empty string 182 | 183 | :return: reference to this StringBuilder instance 184 | */ 185 | @discardableResult 186 | @inlinable 187 | open func clear() -> StringBuilder { 188 | buffer.removeAll(keepingCapacity: true) 189 | return self 190 | } 191 | } 192 | 193 | /** 194 | Append a String to a StringBuilder using operator syntax 195 | 196 | :param: lhs StringBuilder 197 | :param: rhs String 198 | */ 199 | @inlinable 200 | public func += (lhs: StringBuilder, rhs: String) { 201 | lhs.append(rhs) 202 | } 203 | 204 | /** 205 | Append a Printable to a StringBuilder using operator syntax 206 | 207 | :param: lhs Printable 208 | :param: rhs String 209 | */ 210 | @inlinable 211 | public func += (lhs: StringBuilder, rhs: T) { 212 | lhs.append(rhs.description) 213 | } 214 | 215 | /** 216 | Create a StringBuilder by concatenating the values of two StringBuilders 217 | 218 | :param: lhs first StringBuilder 219 | :param: rhs second StringBuilder 220 | 221 | :result StringBuilder 222 | */ 223 | @inlinable 224 | public func +(lhs: StringBuilder, rhs: StringBuilder) -> StringBuilder { 225 | return StringBuilder(string: lhs.toString() + rhs.toString()) 226 | } 227 | -------------------------------------------------------------------------------- /Sources/StructuralEvaluator.swift: -------------------------------------------------------------------------------- 1 | // 2 | // StructuralEvaluator.swift 3 | // SwiftSoup 4 | // 5 | // Created by Nabil Chatbi on 23/10/16. 6 | // 7 | 8 | import Foundation 9 | 10 | /** 11 | * Base structural evaluator. 12 | */ 13 | public class StructuralEvaluator: Evaluator { 14 | let evaluator: Evaluator 15 | 16 | public init(_ evaluator: Evaluator) { 17 | self.evaluator = evaluator 18 | } 19 | 20 | public class Root: Evaluator { 21 | public override func matches(_ root: Element, _ element: Element) -> Bool { 22 | return root === element 23 | } 24 | } 25 | 26 | public class Has: StructuralEvaluator { 27 | public override init(_ evaluator: Evaluator) { 28 | super.init(evaluator) 29 | } 30 | 31 | public override func matches(_ root: Element, _ element: Element)throws->Bool { 32 | for e in try element.getAllElements().array() { 33 | do { 34 | if(e != element) { 35 | if ((try evaluator.matches(root, e))) { 36 | return true 37 | } 38 | } 39 | } catch {} 40 | } 41 | 42 | return false 43 | } 44 | 45 | public override func toString() -> String { 46 | return ":has(\(evaluator.toString()))" 47 | } 48 | } 49 | 50 | public class Not: StructuralEvaluator { 51 | public override init(_ evaluator: Evaluator) { 52 | super.init(evaluator) 53 | } 54 | 55 | public override func matches(_ root: Element, _ node: Element) -> Bool { 56 | do { 57 | return try !evaluator.matches(root, node) 58 | } catch {} 59 | return false 60 | } 61 | 62 | public override func toString() -> String { 63 | return ":not\(evaluator.toString())" 64 | } 65 | } 66 | 67 | public class Parent: StructuralEvaluator { 68 | public override init(_ evaluator: Evaluator) { 69 | super.init(evaluator) 70 | } 71 | 72 | public override func matches(_ root: Element, _ element: Element) -> Bool { 73 | if (root == element) { 74 | return false 75 | } 76 | 77 | var parent = element.parent() 78 | while (true) { 79 | do { 80 | if let p = parent, try evaluator.matches(root, p) { 81 | return true 82 | } 83 | } catch {} 84 | 85 | if (parent == root) { 86 | break 87 | } 88 | parent = parent?.parent() 89 | } 90 | return false 91 | } 92 | 93 | public override func toString() -> String { 94 | return ":parent\(evaluator.toString())" 95 | } 96 | } 97 | 98 | public class ImmediateParent: StructuralEvaluator { 99 | public override init(_ evaluator: Evaluator) { 100 | super.init(evaluator) 101 | } 102 | 103 | public override func matches(_ root: Element, _ element: Element) -> Bool { 104 | if (root == element) { 105 | return false 106 | } 107 | 108 | if let parent = element.parent() { 109 | do { 110 | return try evaluator.matches(root, parent) 111 | } catch {} 112 | } 113 | 114 | return false 115 | } 116 | 117 | public override func toString() -> String { 118 | return ":ImmediateParent\(evaluator.toString())" 119 | } 120 | } 121 | 122 | public class PreviousSibling: StructuralEvaluator { 123 | public override init(_ evaluator: Evaluator) { 124 | super.init(evaluator) 125 | } 126 | 127 | public override func matches(_ root: Element, _ element: Element)throws->Bool { 128 | if (root == element) { 129 | return false 130 | } 131 | 132 | var prev = try element.previousElementSibling() 133 | 134 | while (prev != nil) { 135 | do { 136 | if (try evaluator.matches(root, prev!)) { 137 | return true 138 | } 139 | } catch {} 140 | 141 | prev = try prev!.previousElementSibling() 142 | } 143 | return false 144 | } 145 | 146 | public override func toString() -> String { 147 | return ":prev*\(evaluator.toString())" 148 | } 149 | } 150 | 151 | class ImmediatePreviousSibling: StructuralEvaluator { 152 | public override init(_ evaluator: Evaluator) { 153 | super.init(evaluator) 154 | } 155 | 156 | public override func matches(_ root: Element, _ element: Element)throws->Bool { 157 | if (root == element) { 158 | return false 159 | } 160 | 161 | if let prev = try element.previousElementSibling() { 162 | do { 163 | return try evaluator.matches(root, prev) 164 | } catch {} 165 | } 166 | return false 167 | } 168 | 169 | public override func toString() -> String { 170 | return ":prev\(evaluator.toString())" 171 | } 172 | } 173 | } 174 | -------------------------------------------------------------------------------- /Sources/SwiftSoup.h: -------------------------------------------------------------------------------- 1 | // 2 | // SwiftSoup.h 3 | // SwiftSoup 4 | // 5 | // Created by Nabil Chatbi on 09/10/16. 6 | // 7 | 8 | 9 | 10 | -------------------------------------------------------------------------------- /Sources/TreeBuilder.swift: -------------------------------------------------------------------------------- 1 | // 2 | // TreeBuilder.swift 3 | // SwiftSoup 4 | // 5 | // Created by Nabil Chatbi on 24/10/16. 6 | // 7 | 8 | import Foundation 9 | 10 | public class TreeBuilder { 11 | public var reader: CharacterReader 12 | var tokeniser: Tokeniser 13 | public var doc: Document // current doc we are building into 14 | public var stack: Array // the stack of open elements 15 | public var baseUri: [UInt8] // current base uri, for creating new elements 16 | public var currentToken: Token? // currentToken is used only for error tracking. 17 | public var errors: ParseErrorList // null when not tracking errors 18 | public var settings: ParseSettings 19 | 20 | private let start: Token.StartTag = Token.StartTag() // start tag to process 21 | private let end: Token.EndTag = Token.EndTag() 22 | 23 | public func defaultSettings() -> ParseSettings {preconditionFailure("This method must be overridden")} 24 | 25 | public init() { 26 | doc = Document([]) 27 | reader = CharacterReader([]) 28 | tokeniser = Tokeniser(reader, nil) 29 | stack = Array() 30 | baseUri = [] 31 | errors = ParseErrorList(0, 0) 32 | settings = ParseSettings(false, false) 33 | } 34 | 35 | public func initialiseParse(_ input: [UInt8], _ baseUri: [UInt8], _ errors: ParseErrorList, _ settings: ParseSettings) { 36 | doc = Document(baseUri) 37 | self.settings = settings 38 | reader = CharacterReader(input) 39 | self.errors = errors 40 | tokeniser = Tokeniser(reader, errors) 41 | stack = Array() 42 | self.baseUri = baseUri 43 | } 44 | 45 | func parse(_ input: [UInt8], _ baseUri: [UInt8], _ errors: ParseErrorList, _ settings: ParseSettings)throws->Document { 46 | initialiseParse(input, baseUri, errors, settings) 47 | try runParser() 48 | return doc 49 | } 50 | 51 | public func runParser() throws { 52 | while (true) { 53 | let token: Token = try tokeniser.read() 54 | try process(token) 55 | token.reset() 56 | 57 | if (token.type == Token.TokenType.EOF) { 58 | break 59 | } 60 | } 61 | } 62 | 63 | @discardableResult 64 | public func process(_ token: Token)throws->Bool {preconditionFailure("This method must be overridden")} 65 | 66 | @discardableResult 67 | public func processStartTag(_ name: [UInt8]) throws -> Bool { 68 | if (currentToken === start) { // don't recycle an in-use token 69 | return try process(Token.StartTag().name(name)) 70 | } 71 | return try process(start.reset().name(name)) 72 | } 73 | 74 | @discardableResult 75 | public func processStartTag(_ name: String) throws -> Bool { 76 | return try processStartTag(name.utf8Array) 77 | } 78 | 79 | @discardableResult 80 | public func processStartTag(_ name: [UInt8], _ attrs: Attributes) throws -> Bool { 81 | if (currentToken === start) { // don't recycle an in-use token 82 | return try process(Token.StartTag().nameAttr(name, attrs)) 83 | } 84 | start.reset() 85 | start.nameAttr(name, attrs) 86 | return try process(start) 87 | } 88 | 89 | @discardableResult 90 | public func processStartTag(_ name: String, _ attrs: Attributes) throws -> Bool { 91 | return try processStartTag(name.utf8Array, attrs) 92 | } 93 | 94 | @discardableResult 95 | public func processEndTag(_ name: [UInt8]) throws -> Bool { 96 | if (currentToken === end) { // don't recycle an in-use token 97 | return try process(Token.EndTag().name(name)) 98 | } 99 | 100 | return try process(end.reset().name(name)) 101 | } 102 | 103 | @discardableResult 104 | public func processEndTag(_ name: String) throws -> Bool { 105 | return try processEndTag(name.utf8Array) 106 | } 107 | 108 | public func currentElement() -> Element? { 109 | let size: Int = stack.count 110 | return size > 0 ? stack[size-1] : nil 111 | } 112 | } 113 | -------------------------------------------------------------------------------- /Sources/UnfairLock.swift: -------------------------------------------------------------------------------- 1 | // 2 | // UnfairLock.swift 3 | // SwiftSoup 4 | // 5 | // Created by xukun on 2022/3/31. 6 | // 7 | 8 | import Foundation 9 | 10 | #if os(macOS) || os(iOS) || os(tvOS) || os(watchOS) 11 | @available(iOS 10.0, macOS 10.12, watchOS 3.0, tvOS 10.0, *) 12 | final class UnfairLock: NSLocking { 13 | 14 | private let unfairLock: UnsafeMutablePointer = { 15 | let pointer = UnsafeMutablePointer.allocate(capacity: 1) 16 | pointer.initialize(to: os_unfair_lock()) 17 | return pointer 18 | }() 19 | 20 | deinit { 21 | unfairLock.deinitialize(count: 1) 22 | unfairLock.deallocate() 23 | } 24 | 25 | func lock() { 26 | os_unfair_lock_lock(unfairLock) 27 | } 28 | 29 | func tryLock() -> Bool { 30 | return os_unfair_lock_trylock(unfairLock) 31 | } 32 | 33 | func unlock() { 34 | os_unfair_lock_unlock(unfairLock) 35 | } 36 | } 37 | #endif 38 | -------------------------------------------------------------------------------- /Sources/UnicodeScalar.swift: -------------------------------------------------------------------------------- 1 | // 2 | // UnicodeScalar.swift 3 | // SwiftSoup 4 | // 5 | // Created by Nabil Chatbi on 14/11/16. 6 | // 7 | 8 | import Foundation 9 | 10 | private let uppercaseSet = CharacterSet.uppercaseLetters 11 | private let lowercaseSet = CharacterSet.lowercaseLetters 12 | private let alphaSet = CharacterSet.letters 13 | private let alphaNumericSet = CharacterSet.alphanumerics 14 | private let symbolSet = CharacterSet.symbols 15 | private let digitSet = CharacterSet.decimalDigits 16 | 17 | extension UnicodeScalar { 18 | public static let Ampersand: UnicodeScalar = "&" 19 | public static let LessThan: UnicodeScalar = "<" 20 | public static let GreaterThan: UnicodeScalar = ">" 21 | 22 | public static let Space: UnicodeScalar = " " 23 | public static let BackslashF: UnicodeScalar = UnicodeScalar(12) 24 | public static let BackslashT: UnicodeScalar = "\t" 25 | public static let BackslashN: UnicodeScalar = "\n" 26 | public static let BackslashR: UnicodeScalar = "\r" 27 | public static let Slash: UnicodeScalar = "/" 28 | 29 | public static let FormFeed: UnicodeScalar = "\u{000B}"// Form Feed 30 | public static let VerticalTab: UnicodeScalar = "\u{000C}"// vertical tab 31 | 32 | func isMemberOfCharacterSet(_ set: CharacterSet) -> Bool { 33 | return set.contains(self) 34 | } 35 | 36 | /// True for any space character, and the control characters \t, \n, \r, \f, \v. 37 | var isWhitespace: Bool { 38 | 39 | switch self { 40 | 41 | case UnicodeScalar.Space, UnicodeScalar.BackslashT, UnicodeScalar.BackslashN, UnicodeScalar.BackslashR, UnicodeScalar.BackslashF: return true 42 | 43 | case UnicodeScalar.FormFeed, UnicodeScalar.VerticalTab: return true // Form Feed, vertical tab 44 | 45 | default: return false 46 | 47 | } 48 | 49 | } 50 | 51 | /// `true` if `self` normalized contains a single code unit that is in the categories of Uppercase and Titlecase Letters. 52 | var isUppercase: Bool { 53 | return isMemberOfCharacterSet(uppercaseSet) 54 | } 55 | 56 | /// `true` if `self` normalized contains a single code unit that is in the category of Lowercase Letters. 57 | var isLowercase: Bool { 58 | return isMemberOfCharacterSet(lowercaseSet) 59 | 60 | } 61 | 62 | var uppercase: UnicodeScalar { 63 | let str = String(self).uppercased() 64 | return str.unicodeScalar(0) 65 | } 66 | } 67 | -------------------------------------------------------------------------------- /Sources/Validate.swift: -------------------------------------------------------------------------------- 1 | // 2 | // Validate.swift 3 | // SwifSoup 4 | // 5 | // Created by Nabil Chatbi on 02/10/16. 6 | // 7 | 8 | import Foundation 9 | 10 | public struct Validate { 11 | 12 | /** 13 | * Validates that the object is not null 14 | * @param obj object to test 15 | */ 16 | public static func notNull(obj: Any?) throws { 17 | if (obj == nil) { 18 | throw Exception.Error(type: ExceptionType.IllegalArgumentException, Message: "Object must not be null") 19 | } 20 | } 21 | 22 | /** 23 | * Validates that the object is not null 24 | * @param obj object to test 25 | * @param msg message to output if validation fails 26 | */ 27 | public static func notNull(obj: AnyObject?, msg: String) throws { 28 | if (obj == nil) { 29 | throw Exception.Error(type: ExceptionType.IllegalArgumentException, Message: msg) 30 | } 31 | } 32 | 33 | /** 34 | * Validates that the value is true 35 | * @param val object to test 36 | */ 37 | public static func isTrue(val: Bool) throws { 38 | if (!val) { 39 | throw Exception.Error(type: ExceptionType.IllegalArgumentException, Message: "Must be true") 40 | } 41 | } 42 | 43 | /** 44 | * Validates that the value is true 45 | * @param val object to test 46 | * @param msg message to output if validation fails 47 | */ 48 | public static func isTrue(val: Bool, msg: String) throws { 49 | if (!val) { 50 | throw Exception.Error(type: ExceptionType.IllegalArgumentException, Message: msg) 51 | } 52 | } 53 | 54 | /** 55 | * Validates that the value is false 56 | * @param val object to test 57 | */ 58 | public static func isFalse(val: Bool) throws { 59 | if (val) { 60 | throw Exception.Error(type: ExceptionType.IllegalArgumentException, Message: "Must be false") 61 | } 62 | } 63 | 64 | /** 65 | * Validates that the value is false 66 | * @param val object to test 67 | * @param msg message to output if validation fails 68 | */ 69 | public static func isFalse(val: Bool, msg: String) throws { 70 | if (val) { 71 | throw Exception.Error(type: ExceptionType.IllegalArgumentException, Message: msg) 72 | } 73 | } 74 | 75 | /** 76 | * Validates that the array contains no null elements 77 | * @param objects the array to test 78 | */ 79 | public static func noNullElements(objects: [AnyObject?]) throws { 80 | try noNullElements(objects: objects, msg: "Array must not contain any null objects") 81 | } 82 | 83 | /** 84 | * Validates that the array contains no null elements 85 | * @param objects the array to test 86 | * @param msg message to output if validation fails 87 | */ 88 | public static func noNullElements(objects: [AnyObject?], msg: String) throws { 89 | for obj in objects { 90 | if (obj == nil) { 91 | throw Exception.Error(type: ExceptionType.IllegalArgumentException, Message: msg) 92 | } 93 | } 94 | } 95 | 96 | /** 97 | * Validates that the string is not empty 98 | * @param string the string to test 99 | */ 100 | public static func notEmpty(string: T?) throws where T.Element == UInt8 { 101 | if string?.isEmpty ?? true { 102 | throw Exception.Error(type: ExceptionType.IllegalArgumentException, Message: "String must not be empty") 103 | } 104 | 105 | } 106 | 107 | public static func notEmpty(string: String?) throws { 108 | try notEmpty(string: string?.utf8Array) 109 | } 110 | 111 | /** 112 | * Validates that the string is not empty 113 | * @param string the string to test 114 | * @param msg message to output if validation fails 115 | */ 116 | public static func notEmpty(string: [UInt8]?, msg: String ) throws { 117 | if string?.isEmpty ?? true { 118 | throw Exception.Error(type: ExceptionType.IllegalArgumentException, Message: msg) 119 | } 120 | } 121 | 122 | public static func notEmpty(string: String?, msg: String) throws { 123 | try notEmpty(string: string?.utf8Array, msg: msg) 124 | } 125 | 126 | /** 127 | Cause a failure. 128 | @param msg message to output. 129 | */ 130 | public static func fail(msg: String) throws { 131 | throw Exception.Error(type: ExceptionType.IllegalArgumentException, Message: msg) 132 | } 133 | 134 | /** 135 | Helper 136 | */ 137 | public static func exception(msg: String) throws { 138 | throw Exception.Error(type: ExceptionType.IllegalArgumentException, Message: msg) 139 | } 140 | } 141 | -------------------------------------------------------------------------------- /Sources/XmlDeclaration.swift: -------------------------------------------------------------------------------- 1 | // 2 | // XmlDeclaration.swift 3 | // SwifSoup 4 | // 5 | // Created by Nabil Chatbi on 29/09/16. 6 | // 7 | 8 | import Foundation 9 | 10 | /** 11 | An XML Declaration. 12 | */ 13 | public class XmlDeclaration: Node { 14 | private let _name: [UInt8] 15 | private let isProcessingInstruction: Bool // [UInt8] { 34 | return nodeName().utf8Array 35 | } 36 | 37 | public override func nodeName() -> String { 38 | return "#declaration" 39 | } 40 | 41 | /** 42 | * Get the name of this declaration. 43 | * @return name of this declaration. 44 | */ 45 | public func name() -> String { 46 | return String(decoding: _name, as: UTF8.self) 47 | } 48 | 49 | /** 50 | Get the unencoded XML declaration. 51 | @return XML declaration 52 | */ 53 | public func getWholeDeclaration()throws->String { 54 | return try attributes!.html().trim() // attr html starts with a " " 55 | } 56 | 57 | override func outerHtmlHead(_ accum: StringBuilder, _ depth: Int, _ out: OutputSettings) { 58 | accum 59 | .append(UTF8Arrays.tagStart) 60 | .append(isProcessingInstruction ? "!" : "?") 61 | .append(_name) 62 | do { 63 | try attributes?.html(accum: accum, out: out) 64 | } catch {} 65 | accum 66 | .append(isProcessingInstruction ? "!" : "?") 67 | .append(UTF8Arrays.tagEnd) 68 | } 69 | 70 | override func outerHtmlTail(_ accum: StringBuilder, _ depth: Int, _ out: OutputSettings) {} 71 | 72 | public override func copy(with zone: NSZone? = nil) -> Any { 73 | let clone = XmlDeclaration(_name, baseUri!, isProcessingInstruction) 74 | return copy(clone: clone) 75 | } 76 | 77 | public override func copy(parent: Node?) -> Node { 78 | let clone = XmlDeclaration(_name, baseUri!, isProcessingInstruction) 79 | return copy(clone: clone, parent: parent) 80 | } 81 | public override func copy(clone: Node, parent: Node?) -> Node { 82 | return super.copy(clone: clone, parent: parent) 83 | } 84 | } 85 | -------------------------------------------------------------------------------- /Sources/XmlTreeBuilder.swift: -------------------------------------------------------------------------------- 1 | // 2 | // XmlTreeBuilder.swift 3 | // SwiftSoup 4 | // 5 | // Created by Nabil Chatbi on 14/10/16. 6 | // 7 | 8 | import Foundation 9 | 10 | /** 11 | * Use the {@code XmlTreeBuilder} when you want to parse XML without any of the HTML DOM rules being applied to the 12 | * document. 13 | *

Usage example: {@code Document xmlDoc = Jsoup.parse(html, baseUrl, Parser.xmlParser())}

14 | * 15 | */ 16 | public class XmlTreeBuilder: TreeBuilder { 17 | 18 | public override init() { 19 | super.init() 20 | } 21 | 22 | public override func defaultSettings() -> ParseSettings { 23 | return ParseSettings.preserveCase 24 | } 25 | 26 | public func parse(_ input: [UInt8], _ baseUri: [UInt8]) throws -> Document { 27 | return try parse(input, baseUri, ParseErrorList.noTracking(), ParseSettings.preserveCase) 28 | } 29 | 30 | public func parse(_ input: String, _ baseUri: String) throws -> Document { 31 | return try parse(input.utf8Array, baseUri.utf8Array, ParseErrorList.noTracking(), ParseSettings.preserveCase) 32 | } 33 | 34 | override public func initialiseParse(_ input: [UInt8], _ baseUri: [UInt8], _ errors: ParseErrorList, _ settings: ParseSettings) { 35 | super.initialiseParse(input, baseUri, errors, settings) 36 | stack.append(doc) // place the document onto the stack. differs from HtmlTreeBuilder (not on stack) 37 | doc.outputSettings().syntax(syntax: OutputSettings.Syntax.xml) 38 | } 39 | 40 | override public func process(_ token: Token) throws -> Bool { 41 | // start tag, end tag, doctype, comment, character, eof 42 | switch (token.type) { 43 | case .StartTag: 44 | try insert(token.asStartTag()) 45 | break 46 | case .EndTag: 47 | try popStackToClose(token.asEndTag()) 48 | break 49 | case .Comment: 50 | try insert(token.asComment()) 51 | break 52 | case .Char: 53 | try insert(token.asCharacter()) 54 | break 55 | case .Doctype: 56 | try insert(token.asDoctype()) 57 | break 58 | case .EOF: // could put some normalisation here if desired 59 | break 60 | // default: 61 | // try Validate.fail(msg: "Unexpected token type: " + token.tokenType()) 62 | } 63 | return true 64 | } 65 | 66 | private func insertNode(_ node: Node)throws { 67 | try currentElement()?.appendChild(node) 68 | } 69 | 70 | @discardableResult 71 | func insert(_ startTag: Token.StartTag)throws->Element { 72 | let tag: Tag = try Tag.valueOf(startTag.name(), settings) 73 | // todo: wonder if for xml parsing, should treat all tags as unknown? because it's not html. 74 | let el: Element 75 | if let attributes = startTag._attributes { 76 | el = try Element(tag, baseUri, settings.normalizeAttributes(attributes)) 77 | } else { 78 | el = Element(tag, baseUri) 79 | } 80 | try insertNode(el) 81 | if (startTag.isSelfClosing()) { 82 | tokeniser.acknowledgeSelfClosingFlag() 83 | if (!tag.isKnownTag()) // unknown tag, remember this is self closing for output. see above. 84 | { 85 | tag.setSelfClosing() 86 | } 87 | } else { 88 | stack.append(el) 89 | } 90 | return el 91 | } 92 | 93 | func insert(_ commentToken: Token.Comment)throws { 94 | let comment: Comment = Comment(commentToken.getData(), baseUri) 95 | var insert: Node = comment 96 | if (commentToken.bogus) { // xml declarations are emitted as bogus comments (which is right for html, but not xml) 97 | // so we do a bit of a hack and parse the data as an element to pull the attributes out 98 | let data: String = comment.getData() 99 | if (data.count > 1 && (data.startsWith("!") || data.startsWith("?"))) { 100 | let doc: Document = try SwiftSoup.parse("<" + data.substring(1, data.count - 2) + ">", String(decoding: baseUri, as: UTF8.self), Parser.xmlParser()) 101 | let el: Element = doc.child(0) 102 | insert = XmlDeclaration(settings.normalizeTag(el.tagNameUTF8()), comment.getBaseUriUTF8(), data.startsWith("!")) 103 | insert.getAttributes()?.addAll(incoming: el.getAttributes()) 104 | } 105 | } 106 | try insertNode(insert) 107 | } 108 | 109 | func insert(_ characterToken: Token.Char)throws { 110 | let node: Node = TextNode(characterToken.getData()!, baseUri) 111 | try insertNode(node) 112 | } 113 | 114 | func insert(_ d: Token.Doctype)throws { 115 | let doctypeNode = DocumentType( 116 | settings.normalizeTag(d.getName()), 117 | d.getPubSysKey(), 118 | d.getPublicIdentifier(), 119 | d.getSystemIdentifier(), 120 | baseUri 121 | ) 122 | try insertNode(doctypeNode) 123 | } 124 | 125 | /** 126 | * If the stack contains an element with this tag's name, pop up the stack to remove the first occurrence. If not 127 | * found, skips. 128 | * 129 | * @param endTag 130 | */ 131 | private func popStackToClose(_ endTag: Token.EndTag) throws { 132 | let elName: [UInt8] = try endTag.name() 133 | var firstFound: Element? = nil 134 | 135 | for pos in (0.. Array { 156 | initialiseParse(inputFragment, baseUri, errors, settings) 157 | try runParser() 158 | return doc.getChildNodes() 159 | } 160 | } 161 | -------------------------------------------------------------------------------- /SwiftSoup.podspec: -------------------------------------------------------------------------------- 1 | Pod::Spec.new do |s| 2 | s.name = 'SwiftSoup' 3 | s.version = '2.7.6' 4 | s.summary = 'Swift HTML Parser / Reader, XML , with best of DOM, CSS, and jquery' 5 | s.description = <<-DESC 6 | SwiftSoup is a Swift library for working with real-world HTML. It provides a very convenient API for extracting and manipulating data, using the best of DOM, CSS, and jquery-like methods. 7 | DESC 8 | 9 | s.homepage = 'https://github.com/scinfu/SwiftSoup' 10 | s.license = { :type => 'MIT', :file => 'LICENSE' } 11 | s.author = { 'Nabil Chatbi' => 'scinfu@gmail.com' } 12 | s.source = { :git => 'https://github.com/scinfu/SwiftSoup.git', :tag => s.version.to_s } 13 | s.social_media_url = 'https://twitter.com/scinfu' 14 | 15 | s.ios.deployment_target = '13.0' 16 | s.osx.deployment_target = '10.15' 17 | s.watchos.deployment_target = '6.0' 18 | s.tvos.deployment_target = '13.0' 19 | 20 | s.source_files = 'Sources/**/*.swift' 21 | s.swift_versions = ['4.0', '4.2', '5.0', '5.1'] 22 | end 23 | -------------------------------------------------------------------------------- /SwiftSoup.xcodeproj/project.xcworkspace/contents.xcworkspacedata: -------------------------------------------------------------------------------- 1 | 2 | 4 | 6 | 7 | -------------------------------------------------------------------------------- /SwiftSoup.xcodeproj/project.xcworkspace/xcshareddata/IDEWorkspaceChecks.plist: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | IDEDidComputeMac32BitWarning 6 | 7 | 8 | 9 | -------------------------------------------------------------------------------- /SwiftSoup.xcodeproj/xcshareddata/xcbaselines/8CE4181E1DAA54A900240B42.xcbaseline/9F80FB9B-4E37-45A8-BFE8-9AF36737A6F3.plist: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | classNames 6 | 7 | SwiftSoupTests 8 | 9 | testPerformanceDiv() 10 | 11 | com.apple.XCTPerformanceMetric_WallClockTime 12 | 13 | baselineAverage 14 | 36.859 15 | baselineIntegrationDisplayName 16 | Local Baseline 17 | 18 | 19 | 20 | 21 | 22 | 23 | -------------------------------------------------------------------------------- /SwiftSoup.xcodeproj/xcshareddata/xcbaselines/8CE4181E1DAA54A900240B42.xcbaseline/F9553B46-8F24-4C2B-8A1E-8CC5535D12E1.plist: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | classNames 6 | 7 | CssTest 8 | 9 | testPerformanceExample() 10 | 11 | com.apple.XCTPerformanceMetric_WallClockTime 12 | 13 | baselineAverage 14 | 1.2831e-06 15 | baselineIntegrationDisplayName 16 | Local Baseline 17 | 18 | 19 | 20 | 21 | 22 | 23 | -------------------------------------------------------------------------------- /SwiftSoup.xcodeproj/xcshareddata/xcbaselines/8CE4181E1DAA54A900240B42.xcbaseline/Info.plist: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | runDestinationsByUUID 6 | 7 | 9F80FB9B-4E37-45A8-BFE8-9AF36737A6F3 8 | 9 | localComputer 10 | 11 | busSpeedInMHz 12 | 100 13 | cpuCount 14 | 1 15 | cpuKind 16 | Intel Core i7 17 | cpuSpeedInMHz 18 | 3100 19 | logicalCPUCoresPerPackage 20 | 4 21 | modelCode 22 | MacBookPro12,1 23 | physicalCPUCoresPerPackage 24 | 2 25 | platformIdentifier 26 | com.apple.platform.macosx 27 | 28 | targetArchitecture 29 | x86_64 30 | targetDevice 31 | 32 | modelCode 33 | iPhone7,2 34 | platformIdentifier 35 | com.apple.platform.iphonesimulator 36 | 37 | 38 | F9553B46-8F24-4C2B-8A1E-8CC5535D12E1 39 | 40 | localComputer 41 | 42 | busSpeedInMHz 43 | 100 44 | cpuCount 45 | 1 46 | cpuKind 47 | Intel Core i7 48 | cpuSpeedInMHz 49 | 3100 50 | logicalCPUCoresPerPackage 51 | 4 52 | modelCode 53 | MacBookPro12,1 54 | physicalCPUCoresPerPackage 55 | 2 56 | platformIdentifier 57 | com.apple.platform.macosx 58 | 59 | targetArchitecture 60 | i386 61 | targetDevice 62 | 63 | modelCode 64 | iPhone5,1 65 | platformIdentifier 66 | com.apple.platform.iphonesimulator 67 | 68 | 69 | 70 | 71 | 72 | -------------------------------------------------------------------------------- /SwiftSoup.xcodeproj/xcshareddata/xcschemes/PerformanceTest.xcscheme: -------------------------------------------------------------------------------- 1 | 2 | 5 | 8 | 9 | 15 | 21 | 22 | 23 | 29 | 35 | 36 | 37 | 38 | 39 | 44 | 45 | 47 | 53 | 54 | 55 | 56 | 57 | 63 | 64 | 65 | 66 | 67 | 68 | 78 | 79 | 85 | 86 | 87 | 88 | 89 | 90 | 96 | 97 | 103 | 104 | 105 | 106 | 108 | 109 | 112 | 113 | 114 | -------------------------------------------------------------------------------- /SwiftSoup.xcodeproj/xcshareddata/xcschemes/SwiftSoup-Package.xcscheme: -------------------------------------------------------------------------------- 1 | 2 | 5 | 8 | 9 | 15 | 21 | 22 | 23 | 24 | 25 | 30 | 31 | 33 | 39 | 40 | 41 | 42 | 43 | 53 | 54 | 60 | 61 | 62 | 63 | 69 | 70 | 72 | 73 | 76 | 77 | 78 | -------------------------------------------------------------------------------- /SwiftSoup.xcodeproj/xcshareddata/xcschemes/SwiftSoup-iOS.xcscheme: -------------------------------------------------------------------------------- 1 | 2 | 5 | 8 | 9 | 15 | 21 | 22 | 23 | 29 | 35 | 36 | 37 | 38 | 39 | 44 | 45 | 51 | 52 | 53 | 54 | 56 | 62 | 63 | 64 | 65 | 66 | 76 | 77 | 83 | 84 | 85 | 86 | 92 | 93 | 99 | 100 | 101 | 102 | 104 | 105 | 108 | 109 | 110 | -------------------------------------------------------------------------------- /SwiftSoup.xcodeproj/xcshareddata/xcschemes/SwiftSoup-macOS.xcscheme: -------------------------------------------------------------------------------- 1 | 2 | 5 | 8 | 9 | 15 | 21 | 22 | 23 | 24 | 25 | 30 | 31 | 37 | 38 | 39 | 40 | 41 | 42 | 52 | 53 | 59 | 60 | 61 | 62 | 68 | 69 | 71 | 72 | 75 | 76 | 77 | -------------------------------------------------------------------------------- /SwiftSoup.xcodeproj/xcshareddata/xcschemes/SwiftSoup-tvOS.xcscheme: -------------------------------------------------------------------------------- 1 | 2 | 5 | 8 | 9 | 15 | 21 | 22 | 23 | 24 | 25 | 30 | 31 | 32 | 33 | 43 | 44 | 50 | 51 | 52 | 53 | 59 | 60 | 66 | 67 | 68 | 69 | 71 | 72 | 75 | 76 | 77 | -------------------------------------------------------------------------------- /SwiftSoup.xcodeproj/xcshareddata/xcschemes/SwiftSoup-watchOS.xcscheme: -------------------------------------------------------------------------------- 1 | 2 | 5 | 8 | 9 | 15 | 21 | 22 | 23 | 24 | 25 | 30 | 31 | 32 | 33 | 43 | 44 | 50 | 51 | 52 | 53 | 59 | 60 | 66 | 67 | 68 | 69 | 71 | 72 | 75 | 76 | 77 | -------------------------------------------------------------------------------- /Tests-macOS/Info.plist: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | CFBundleDevelopmentRegion 6 | $(DEVELOPMENT_LANGUAGE) 7 | CFBundleExecutable 8 | $(EXECUTABLE_NAME) 9 | CFBundleIdentifier 10 | $(PRODUCT_BUNDLE_IDENTIFIER) 11 | CFBundleInfoDictionaryVersion 12 | 6.0 13 | CFBundleName 14 | $(PRODUCT_NAME) 15 | CFBundlePackageType 16 | BNDL 17 | CFBundleShortVersionString 18 | 1.0 19 | CFBundleVersion 20 | 1 21 | 22 | 23 | -------------------------------------------------------------------------------- /Tests-macOS/ParserBenchmark.swift: -------------------------------------------------------------------------------- 1 | // 2 | // ParserBenchmark.swift 3 | // SwiftSoupTests 4 | // 5 | // Created by garth on 2/26/19. 6 | // 7 | 8 | import XCTest 9 | import SwiftSoup 10 | 11 | class ParserBenchmark: XCTestCase { 12 | 13 | enum Const { 14 | static var corpusHTMLData: [String] = [] 15 | static let repetitions = 5 16 | } 17 | 18 | override func setUp() { 19 | let bundle = Bundle(for: type(of: self)) 20 | let urls = bundle.urls(forResourcesWithExtension: ".html", subdirectory: nil) 21 | Const.corpusHTMLData = urls!.compactMap { try? Data(contentsOf: $0) }.map { String(decoding: $0, as: UTF8.self) } 22 | } 23 | 24 | func testParserPerformance() throws { 25 | var count = 0 26 | measure { 27 | for htmlDoc in Const.corpusHTMLData { 28 | for _ in 1...Const.repetitions { 29 | do { 30 | let _ = try SwiftSoup.parse(htmlDoc) 31 | count += 1 32 | } catch { 33 | XCTFail("Exception while parsing HTML") 34 | } 35 | } 36 | } 37 | print("Did \(count) iterations") 38 | } 39 | } 40 | 41 | } 42 | -------------------------------------------------------------------------------- /Tests/Info.plist: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | CFBundleDevelopmentRegion 6 | en 7 | CFBundleExecutable 8 | $(EXECUTABLE_NAME) 9 | CFBundleIdentifier 10 | $(PRODUCT_BUNDLE_IDENTIFIER) 11 | CFBundleInfoDictionaryVersion 12 | 6.0 13 | CFBundleName 14 | $(PRODUCT_NAME) 15 | CFBundlePackageType 16 | BNDL 17 | CFBundleShortVersionString 18 | 1.0 19 | CFBundleVersion 20 | 1 21 | 22 | 23 | -------------------------------------------------------------------------------- /Tests/LinuxMain.swift: -------------------------------------------------------------------------------- 1 | // 2 | // LinuxMain.swift 3 | // SwiftSoup 4 | // 5 | // Created by Nabil Chatbi on 20/12/16. 6 | // 7 | 8 | import XCTest 9 | @testable import SwiftSoupTests 10 | 11 | XCTMain([ 12 | testCase(CssTest.allTests), 13 | testCase(ElementsTest.allTests), 14 | testCase(QueryParserTest.allTests), 15 | testCase(SelectorTest.allTests), 16 | testCase(AttributeParseTest.allTests), 17 | testCase(CharacterReaderTest.allTests), 18 | testCase(HtmlParserTest.allTests), 19 | testCase(ParseSettingsTest.allTests), 20 | testCase(TagTest.allTests), 21 | testCase(TokenQueueTest.allTests), 22 | testCase(XmlTreeBuilderTest.allTests), 23 | testCase(FormElementTest.allTests), 24 | testCase(ElementTest.allTests), 25 | testCase(EntitiesTest.allTests), 26 | testCase(DocumentTypeTest.allTests), 27 | testCase(TextNodeTest.allTests), 28 | testCase(DocumentTest.allTests), 29 | testCase(AttributesTest.allTests), 30 | testCase(NodeTest.allTests), 31 | testCase(AttributeTest.allTests), 32 | testCase(CleanerTest.allTests), 33 | testCase(StringUtilTest.allTests) 34 | ]) 35 | -------------------------------------------------------------------------------- /Tests/SwiftSoupTests/AttributeParseTest.swift: -------------------------------------------------------------------------------- 1 | // 2 | // AttributeParseTest.swift 3 | // SwiftSoup 4 | // 5 | // Created by Nabil Chatbi on 10/11/16. 6 | // 7 | /** 8 | Test suite for attribute parser. 9 | */ 10 | 11 | import XCTest 12 | import SwiftSoup 13 | 14 | class AttributeParseTest: XCTestCase { 15 | 16 | func testLinuxTestSuiteIncludesAllTests() { 17 | #if os(macOS) || os(iOS) || os(tvOS) || os(watchOS) 18 | let thisClass = type(of: self) 19 | let linuxCount = thisClass.allTests.count 20 | let darwinCount = Int(thisClass.defaultTestSuite.testCaseCount) 21 | XCTAssertEqual(linuxCount, darwinCount, "\(darwinCount - linuxCount) tests are missing from allTests") 22 | #endif 23 | } 24 | 25 | func testparsesRoughAttributeString() throws { 26 | let html: String = "
" 27 | // should be: , , , , , 28 | 29 | let el: Element = try SwiftSoup.parse(html).getElementsByTag("a").get(0) 30 | let attr: Attributes = el.getAttributes()! 31 | XCTAssertEqual(7, attr.size()) 32 | XCTAssertEqual("123", attr.get(key: "id")) 33 | XCTAssertEqual("baz = 'bar'", attr.get(key: "class")) 34 | XCTAssertEqual("border: 2px", attr.get(key: "style")) 35 | XCTAssertEqual("", attr.get(key: "qux")) 36 | XCTAssertEqual("", attr.get(key: "zim")) 37 | XCTAssertEqual("12", attr.get(key: "foo")) 38 | XCTAssertEqual("18", attr.get(key: "mux")) 39 | } 40 | 41 | func testhandlesNewLinesAndReturns() throws { 42 | let html: String = "One" 43 | let el: Element = try SwiftSoup.parse(html).select("a").first()! 44 | XCTAssertEqual(2, el.getAttributes()?.size()) 45 | XCTAssertEqual("bar\r\nqux", try el.attr("foo")) // currently preserves newlines in quoted attributes. todo confirm if should. 46 | XCTAssertEqual("two", try el.attr("bar")) 47 | } 48 | 49 | func testparsesEmptyString() throws { 50 | let html: String = "" 51 | let el: Element = try SwiftSoup.parse(html).getElementsByTag("a").get(0) 52 | let attr: Attributes = el.getAttributes()! 53 | XCTAssertEqual(0, attr.size()) 54 | } 55 | 56 | func testcanStartWithEq() throws { 57 | let html: String = "" 58 | let el: Element = try SwiftSoup.parse(html).getElementsByTag("a").get(0) 59 | let attr: Attributes = el.getAttributes()! 60 | XCTAssertEqual(1, attr.size()) 61 | XCTAssertTrue(attr.hasKey(key: "=empty")) 62 | XCTAssertEqual("", attr.get(key: "=empty")) 63 | } 64 | 65 | func teststrictAttributeUnescapes() throws { 66 | let html: String = "One Two" 67 | let els: Elements = try SwiftSoup.parse(html).select("a") 68 | XCTAssertEqual("?foo=bar&mid<=true", try els.first()!.attr("href")) 69 | XCTAssertEqual("?foo=bar" 80 | let el: Element = try SwiftSoup.parse(html).select("a").first()! 81 | 82 | XCTAssertEqual("123", try el.attr("normal")) 83 | XCTAssertEqual("", try el.attr("boolean")) 84 | XCTAssertEqual("", try el.attr("empty")) 85 | 86 | let attributes: Array = el.getAttributes()!.asList() 87 | XCTAssertEqual(3, attributes.count, "There should be 3 attribute present") 88 | 89 | // Assuming the list order always follows the parsed html 90 | XCTAssertFalse((attributes[0] as? BooleanAttribute) != nil, "'normal' attribute should not be boolean") 91 | XCTAssertTrue((attributes[1] as? BooleanAttribute) != nil, "'boolean' attribute should be boolean") 92 | XCTAssertFalse((attributes[2] as? BooleanAttribute) != nil, "'empty' attribute should not be boolean") 93 | 94 | XCTAssertEqual(html, try el.outerHtml()) 95 | } 96 | 97 | func testretainsSlashFromAttributeName() throws { 98 | let html: String = "" 99 | var doc: Document = try SwiftSoup.parse(html) 100 | XCTAssertTrue(try doc.select("img[onerror]").size() != 0, "SelfClosingStartTag ignores last character") 101 | XCTAssertEqual("", try doc.body()!.html()) 102 | 103 | doc = try SwiftSoup.parse(html, "", Parser.xmlParser()) 104 | XCTAssertEqual("", try doc.html()) 105 | } 106 | 107 | static var allTests = { 108 | return [ 109 | ("testLinuxTestSuiteIncludesAllTests", testLinuxTestSuiteIncludesAllTests), 110 | ("testparsesRoughAttributeString", testparsesRoughAttributeString), 111 | ("testhandlesNewLinesAndReturns", testhandlesNewLinesAndReturns), 112 | ("testparsesEmptyString", testparsesEmptyString), 113 | ("testcanStartWithEq", testcanStartWithEq), 114 | ("teststrictAttributeUnescapes", teststrictAttributeUnescapes), 115 | ("testmoreAttributeUnescapes", testmoreAttributeUnescapes), 116 | ("testparsesBooleanAttributes", testparsesBooleanAttributes), 117 | ("testretainsSlashFromAttributeName", testretainsSlashFromAttributeName) 118 | ] 119 | }() 120 | 121 | } 122 | -------------------------------------------------------------------------------- /Tests/SwiftSoupTests/AttributeTest.swift: -------------------------------------------------------------------------------- 1 | // 2 | // AttributeTest.swift 3 | // SwifSoup 4 | // 5 | // Created by Nabil Chatbi on 07/10/16. 6 | // 7 | 8 | import XCTest 9 | @testable import SwiftSoup 10 | class AttributeTest: XCTestCase { 11 | 12 | func testLinuxTestSuiteIncludesAllTests() { 13 | #if os(macOS) || os(iOS) || os(tvOS) || os(watchOS) 14 | let thisClass = type(of: self) 15 | let linuxCount = thisClass.allTests.count 16 | let darwinCount = Int(thisClass.defaultTestSuite.testCaseCount) 17 | XCTAssertEqual(linuxCount, darwinCount, "\(darwinCount - linuxCount) tests are missing from allTests") 18 | #endif 19 | } 20 | 21 | func testHtml() throws { 22 | let attr = try Attribute(key: "key", value: "value &") 23 | XCTAssertEqual("key=\"value &\"", attr.html()) 24 | XCTAssertEqual(attr.html(), attr.toString()) 25 | } 26 | 27 | func testWithSupplementaryCharacterInAttributeKeyAndValue() throws { 28 | let string = "135361" 29 | let attr = try Attribute(key: string, value: "A" + string + "B") 30 | XCTAssertEqual(string + "=\"A" + string + "B\"", attr.html()) 31 | XCTAssertEqual(attr.html(), attr.toString()) 32 | } 33 | 34 | func testRemoveCaseSensitive() throws { 35 | let atteibute: Attributes = Attributes() 36 | try atteibute.put("Tot", "a&p") 37 | try atteibute.put("tot", "one") 38 | try atteibute.put("Hello", "There") 39 | try atteibute.put("hello", "There") 40 | try atteibute.put("data-name", "Jsoup") 41 | 42 | XCTAssertEqual(5, atteibute.size()) 43 | try atteibute.remove(key: "Tot") 44 | try atteibute.remove(key: "Hello") 45 | XCTAssertEqual(3, atteibute.size()) 46 | XCTAssertTrue(atteibute.hasKey(key: "tot")) 47 | XCTAssertFalse(atteibute.hasKey(key: "Tot")) 48 | } 49 | 50 | static var allTests = { 51 | return [ 52 | ("testLinuxTestSuiteIncludesAllTests", testLinuxTestSuiteIncludesAllTests), 53 | ("testHtml", testHtml), 54 | ("testWithSupplementaryCharacterInAttributeKeyAndValue", testWithSupplementaryCharacterInAttributeKeyAndValue), 55 | ("testRemoveCaseSensitive", testRemoveCaseSensitive) 56 | ] 57 | }() 58 | 59 | } 60 | -------------------------------------------------------------------------------- /Tests/SwiftSoupTests/AttributesTest.swift: -------------------------------------------------------------------------------- 1 | // 2 | // AttributesTest.swift 3 | // SwiftSoup 4 | // 5 | // Created by Nabil Chatbi on 29/10/16. 6 | // 7 | 8 | import XCTest 9 | import SwiftSoup 10 | 11 | class AttributesTest: XCTestCase { 12 | 13 | func testLinuxTestSuiteIncludesAllTests() { 14 | #if os(macOS) || os(iOS) || os(tvOS) || os(watchOS) 15 | let thisClass = type(of: self) 16 | let linuxCount = thisClass.allTests.count 17 | let darwinCount = Int(thisClass.defaultTestSuite.testCaseCount) 18 | XCTAssertEqual(linuxCount, darwinCount, "\(darwinCount - linuxCount) tests are missing from allTests") 19 | #endif 20 | } 21 | 22 | func testHtml() { 23 | let a: Attributes = Attributes() 24 | do { 25 | try a.put("Tot", "a&p") 26 | try a.put("Hello", "There") 27 | try a.put("data-name", "Jsoup") 28 | } catch {} 29 | 30 | XCTAssertEqual(3, a.size()) 31 | XCTAssertTrue(a.hasKey(key: "Tot")) 32 | XCTAssertTrue(a.hasKey(key: "Hello")) 33 | XCTAssertTrue(a.hasKey(key: "data-name")) 34 | XCTAssertFalse(a.hasKey(key: "tot")) 35 | XCTAssertTrue(a.hasKeyIgnoreCase(key: "tot")) 36 | XCTAssertEqual("There", try a.getIgnoreCase(key: "hEllo")) 37 | 38 | XCTAssertEqual(1, a.dataset().count) 39 | XCTAssertEqual("Jsoup", a.dataset()["name"]) 40 | XCTAssertEqual("", a.get(key: "tot")) 41 | XCTAssertEqual("a&p", a.get(key: "Tot")) 42 | XCTAssertEqual("a&p", try a.getIgnoreCase(key: "tot")) 43 | 44 | XCTAssertEqual(" Tot=\"a&p\" Hello=\"There\" data-name=\"Jsoup\"", try a.html()) 45 | XCTAssertEqual(try a.html(), try a.toString()) 46 | } 47 | //todo: se serve 48 | // func testIteratorRemovable() { 49 | // let a = Attributes() 50 | // do{ 51 | // try a.put("Tot", "a&p") 52 | // try a.put("Hello", "There") 53 | // try a.put("data-name", "Jsoup") 54 | // }catch{} 55 | // 56 | // var iterator = a.iterator() 57 | // 58 | // iterator.next() 59 | // iterator.dropFirst() 60 | // XCTAssertEqual(2, a.size()) 61 | // } 62 | 63 | func testIterator() { 64 | let a: Attributes = Attributes() 65 | let datas: [[String]] = [["Tot", "raul"], ["Hello", "pismuth"], ["data-name", "Jsoup"]] 66 | 67 | for atts in datas { 68 | try! a.put(atts[0], atts[1]) 69 | } 70 | 71 | let iterator = a.makeIterator() 72 | XCTAssertTrue(iterator.next() != nil) 73 | var i = 0 74 | for attribute in a { 75 | XCTAssertEqual(datas[i][0], attribute.getKey()) 76 | XCTAssertEqual(datas[i][1], attribute.getValue()) 77 | i += 1 78 | } 79 | XCTAssertEqual(datas.count, i) 80 | } 81 | 82 | func testIteratorEmpty() { 83 | let a = Attributes() 84 | 85 | let iterator = a.makeIterator() 86 | XCTAssertNil(iterator.next()) 87 | } 88 | 89 | static var allTests = { 90 | return [ 91 | ("testLinuxTestSuiteIncludesAllTests", testLinuxTestSuiteIncludesAllTests), 92 | ("testHtml", testHtml), 93 | ("testIterator", testIterator), 94 | ("testIteratorEmpty", testIteratorEmpty) 95 | ] 96 | }() 97 | } 98 | -------------------------------------------------------------------------------- /Tests/SwiftSoupTests/BuildEntities.swift: -------------------------------------------------------------------------------- 1 | // 2 | // BuildEntities.swift 3 | // SwiftSoup 4 | // 5 | // Created by Nabil Chatbi on 31/10/16. 6 | // 7 | 8 | import Foundation 9 | //todo: 10 | -------------------------------------------------------------------------------- /Tests/SwiftSoupTests/DocumentTypeTest.swift: -------------------------------------------------------------------------------- 1 | // 2 | // DocumentTypeTest.swift 3 | // SwiftSoup 4 | // 5 | // Created by Nabil Chatbi on 06/11/16. 6 | // 7 | 8 | import XCTest 9 | import SwiftSoup 10 | 11 | class DocumentTypeTest: XCTestCase { 12 | 13 | func testLinuxTestSuiteIncludesAllTests() { 14 | #if os(macOS) || os(iOS) || os(tvOS) || os(watchOS) 15 | let thisClass = type(of: self) 16 | let linuxCount = thisClass.allTests.count 17 | let darwinCount = Int(thisClass.defaultTestSuite.testCaseCount) 18 | XCTAssertEqual(linuxCount, darwinCount, "\(darwinCount - linuxCount) tests are missing from allTests") 19 | #endif 20 | } 21 | 22 | func testConstructorValidationOkWithBlankName() { 23 | let fail: DocumentType? = DocumentType("", "", "", "") 24 | XCTAssertTrue(fail != nil) 25 | } 26 | 27 | func testConstructorValidationThrowsExceptionOnNulls() { 28 | let fail: DocumentType? = DocumentType("html", "", "", "") 29 | XCTAssertTrue(fail != nil) 30 | } 31 | 32 | func testConstructorValidationOkWithBlankPublicAndSystemIds() { 33 | let fail: DocumentType? = DocumentType("html", "", "", "") 34 | XCTAssertTrue(fail != nil) 35 | } 36 | 37 | func testOuterHtmlGeneration() { 38 | let html5 = DocumentType("html", "", "", "") 39 | XCTAssertEqual("", try! html5.outerHtml()) 40 | 41 | let publicDocType = DocumentType("html", "-//IETF//DTD HTML//", "", "") 42 | XCTAssertEqual("", try! publicDocType.outerHtml()) 43 | 44 | let systemDocType = DocumentType("html", "", "http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd", "") 45 | XCTAssertEqual("", try! systemDocType.outerHtml()) 46 | 47 | let combo = DocumentType("notHtml", "--public", "--system", "") 48 | XCTAssertEqual("", try! combo.outerHtml()) 49 | } 50 | 51 | static var allTests = { 52 | return [ 53 | ("testLinuxTestSuiteIncludesAllTests", testLinuxTestSuiteIncludesAllTests), 54 | ("testConstructorValidationOkWithBlankName", testConstructorValidationOkWithBlankName), 55 | ("testConstructorValidationThrowsExceptionOnNulls", testConstructorValidationThrowsExceptionOnNulls), 56 | ("testConstructorValidationOkWithBlankPublicAndSystemIds", testConstructorValidationOkWithBlankPublicAndSystemIds), 57 | ("testOuterHtmlGeneration", testOuterHtmlGeneration) 58 | ] 59 | }() 60 | } 61 | -------------------------------------------------------------------------------- /Tests/SwiftSoupTests/EntitiesTest.swift: -------------------------------------------------------------------------------- 1 | // 2 | // EntitiesTest.swift 3 | // SwiftSoup 4 | // 5 | // Created by Nabil Chatbi on 09/10/16. 6 | // 7 | 8 | import Foundation 9 | import XCTest 10 | import SwiftSoup 11 | 12 | class EntitiesTest: XCTestCase { 13 | 14 | func testLinuxTestSuiteIncludesAllTests() { 15 | #if os(macOS) || os(iOS) || os(tvOS) || os(watchOS) 16 | let thisClass = type(of: self) 17 | let linuxCount = thisClass.allTests.count 18 | let darwinCount = Int(thisClass.defaultTestSuite.testCaseCount) 19 | XCTAssertEqual(linuxCount, darwinCount, "\(darwinCount - linuxCount) tests are missing from allTests") 20 | #endif 21 | } 22 | 23 | func testEscape() throws { 24 | let text = "Hello &<> Å å π 新 there ¾ © »" 25 | 26 | let escapedAscii = Entities.escape(text, OutputSettings().encoder(String.Encoding.ascii).escapeMode(Entities.EscapeMode.base)) 27 | let escapedAsciiFull = Entities.escape(text, OutputSettings().charset(String.Encoding.ascii).escapeMode(Entities.EscapeMode.extended)) 28 | let escapedAsciiXhtml = Entities.escape(text, OutputSettings().charset(String.Encoding.ascii).escapeMode(Entities.EscapeMode.xhtml)) 29 | let escapedUtfFull = Entities.escape(text, OutputSettings().charset(String.Encoding.utf8).escapeMode(Entities.EscapeMode.extended)) 30 | let escapedUtfFull2 = Entities.escape(text) 31 | let escapedUtfMin = Entities.escape(text, OutputSettings().charset(String.Encoding.utf8).escapeMode(Entities.EscapeMode.xhtml)) 32 | 33 | XCTAssertEqual("Hello &<> Å å π 新 there ¾ © »", escapedAscii) 34 | XCTAssertEqual("Hello &<> Å å π 新 there ¾ © »", escapedAsciiFull) 35 | XCTAssertEqual("Hello &<> Å å π 新 there ¾ © »", escapedAsciiXhtml) 36 | XCTAssertEqual("Hello &<> Å å π 新 there ¾ © »", escapedUtfFull) 37 | XCTAssertEqual("Hello &<> Å å π 新 there ¾ © »", escapedUtfFull2) 38 | XCTAssertEqual("Hello &<> Å å π 新 there ¾ © »", escapedUtfMin) 39 | // odd that it's defined as aring in base but angst in full 40 | 41 | // round trip 42 | XCTAssertEqual(text, try Entities.unescape(escapedAscii)) 43 | XCTAssertEqual(text, try Entities.unescape(escapedAsciiFull)) 44 | XCTAssertEqual(text, try Entities.unescape(escapedAsciiXhtml)) 45 | XCTAssertEqual(text, try Entities.unescape(escapedUtfFull)) 46 | XCTAssertEqual(text, try Entities.unescape(escapedUtfFull2)) 47 | XCTAssertEqual(text, try Entities.unescape(escapedUtfMin)) 48 | } 49 | 50 | func testXhtml() { 51 | //let text = "& > < ""; 52 | XCTAssertEqual(UnicodeScalar(38), Entities.EscapeMode.xhtml.codepointForName("amp")) 53 | XCTAssertEqual(UnicodeScalar(62), Entities.EscapeMode.xhtml.codepointForName("gt")) 54 | XCTAssertEqual(UnicodeScalar(60), Entities.EscapeMode.xhtml.codepointForName("lt")) 55 | XCTAssertEqual(UnicodeScalar(34), Entities.EscapeMode.xhtml.codepointForName("quot")) 56 | 57 | XCTAssertEqual("amp", Entities.EscapeMode.xhtml.nameForCodepoint(UnicodeScalar(38)!)) 58 | XCTAssertEqual("gt", Entities.EscapeMode.xhtml.nameForCodepoint(UnicodeScalar(62)!)) 59 | XCTAssertEqual("lt", Entities.EscapeMode.xhtml.nameForCodepoint(UnicodeScalar(60)!)) 60 | XCTAssertEqual("quot", Entities.EscapeMode.xhtml.nameForCodepoint(UnicodeScalar(34)!)) 61 | } 62 | 63 | func testGetByName() { 64 | //XCTAssertEqual("≫⃒", Entities.getByName(name: "nGt"));//todo:nabil same codepoint 8811 in java but charachters different 65 | //XCTAssertEqual("fj", Entities.getByName(name: "fjlig")); 66 | XCTAssertEqual("≫", Entities.getByName(name: "gg")) 67 | XCTAssertEqual("©", Entities.getByName(name: "copy")) 68 | } 69 | 70 | func testEscapeSupplementaryCharacter() { 71 | let text: String = "𡃁" 72 | let escapedAscii: String = Entities.escape(text, OutputSettings().charset(.ascii).escapeMode(Entities.EscapeMode.base)) 73 | XCTAssertEqual("𡃁", escapedAscii) 74 | let escapedUtf: String = Entities.escape(text, OutputSettings().charset(.utf8).escapeMode(Entities.EscapeMode.base)) 75 | XCTAssertEqual(text, escapedUtf) 76 | } 77 | 78 | func testNotMissingMultis() throws { 79 | let text: String = "⫽⃥" 80 | let un: String = "\u{2AFD}\u{20E5}" 81 | XCTAssertEqual(un, try Entities.unescape(text)) 82 | } 83 | 84 | func testnotMissingSupplementals() throws { 85 | let text: String = "⨔ 𝔮" 86 | let un: String = "⨔ 𝔮"//+"\u{D835}\u{DD2E}" // 𝔮 87 | XCTAssertEqual(un, try Entities.unescape(text)) 88 | } 89 | 90 | func testUnescape() throws { 91 | let text: String = "Hello Æ &<> ® Å &angst π π 新 there &! ¾ © ©" 92 | XCTAssertEqual("Hello Æ &<> ® Å &angst π π 新 there &! ¾ © ©", try Entities.unescape(text)) 93 | 94 | XCTAssertEqual("&0987654321; &unknown", try Entities.unescape("&0987654321; &unknown")) 95 | } 96 | 97 | func testStrictUnescape() throws { // for attributes, enforce strict unescaping (must look like &#xxx; , not just &#xxx) 98 | let text: String = "&a" 99 | // XCTAssertEqual("&a", try Entities.unescape(string: text, strict: true)) 100 | // let text2: String = "&" // accepted as a "base" form entity, unlike "extended" forms 101 | // XCTAssertEqual("&", try Entities.unescape(string: text2, strict: true)) 102 | let text3: String = "Hello &= &" 103 | XCTAssertEqual("Hello &= &", try Entities.unescape(string: text3, strict: true)) 104 | XCTAssertEqual("Hello &= &", try Entities.unescape(text3)) 105 | XCTAssertEqual("Hello &= &", try Entities.unescape(string: text3, strict: false)) 106 | } 107 | 108 | func testCaseSensitive() throws { 109 | let unescaped: String = "Ü ü & &" 110 | XCTAssertEqual("Ü ü & &", 111 | Entities.escape(unescaped, OutputSettings().charset(.ascii).escapeMode(Entities.EscapeMode.extended))) 112 | 113 | let escaped: String = "Ü ü & &" 114 | XCTAssertEqual("Ü ü & &", try Entities.unescape(escaped)) 115 | } 116 | 117 | func testQuoteReplacements() throws { 118 | let escaped: String = "\ $" 119 | let unescaped: String = "\\ $" 120 | 121 | XCTAssertEqual(unescaped, try Entities.unescape(escaped)) 122 | } 123 | 124 | func testLetterDigitEntities() throws { 125 | let html: String = "

¹²³¼½¾

" 126 | let doc: Document = try SwiftSoup.parse(html) 127 | doc.outputSettings().charset(.ascii) 128 | let p: Element = try doc.select("p").first()! 129 | XCTAssertEqual("¹²³¼½¾", try p.html()) 130 | XCTAssertEqual("¹²³¼½¾", try p.text()) 131 | doc.outputSettings().charset(.utf8) 132 | XCTAssertEqual("¹²³¼½¾", try p.html()) 133 | } 134 | 135 | func testNoSpuriousDecodes() throws { 136 | let string: String = "http://www.foo.com?a=1&num_rooms=1&children=0&int=VA&b=2" 137 | XCTAssertEqual(string, try Entities.unescape(string)) 138 | } 139 | 140 | func testUscapesGtInXmlAttributesButNotInHtml() throws { 141 | // https://github.com/jhy/jsoup/issues/528 - < is OK in HTML attribute values, but not in XML 142 | 143 | let docHtml: String = "One" 144 | let doc: Document = try SwiftSoup.parse(docHtml) 145 | let element: Element = try doc.select("a").first()! 146 | 147 | doc.outputSettings().escapeMode(Entities.EscapeMode.base) 148 | XCTAssertEqual("One

\">One
", try element.outerHtml()) 149 | 150 | doc.outputSettings().escapeMode(Entities.EscapeMode.xhtml) 151 | XCTAssertEqual("One</p>\">One", try element.outerHtml()) 152 | } 153 | 154 | static var allTests = { 155 | return [ 156 | ("testLinuxTestSuiteIncludesAllTests", testLinuxTestSuiteIncludesAllTests), 157 | ("testEscape", testEscape), 158 | ("testXhtml", testXhtml), 159 | ("testGetByName", testGetByName), 160 | ("testEscapeSupplementaryCharacter", testEscapeSupplementaryCharacter), 161 | ("testNotMissingMultis", testNotMissingMultis), 162 | ("testnotMissingSupplementals", testnotMissingSupplementals), 163 | ("testUnescape", testUnescape), 164 | ("testStrictUnescape", testStrictUnescape), 165 | ("testCaseSensitive", testCaseSensitive), 166 | ("testQuoteReplacements", testQuoteReplacements), 167 | ("testLetterDigitEntities", testLetterDigitEntities), 168 | ("testNoSpuriousDecodes", testNoSpuriousDecodes), 169 | ("testUscapesGtInXmlAttributesButNotInHtml", testUscapesGtInXmlAttributesButNotInHtml) 170 | ] 171 | }() 172 | } 173 | -------------------------------------------------------------------------------- /Tests/SwiftSoupTests/FormElementTest.swift: -------------------------------------------------------------------------------- 1 | // 2 | // FormElementTest.swift 3 | // SwiftSoup 4 | // 5 | // Created by Nabil Chatbi on 09/11/16. 6 | // 7 | 8 | import XCTest 9 | import SwiftSoup 10 | 11 | class FormElementTest: XCTestCase { 12 | 13 | func testLinuxTestSuiteIncludesAllTests() { 14 | #if os(macOS) || os(iOS) || os(tvOS) || os(watchOS) 15 | let thisClass = type(of: self) 16 | let linuxCount = thisClass.allTests.count 17 | let darwinCount = Int(thisClass.defaultTestSuite.testCaseCount) 18 | XCTAssertEqual(linuxCount, darwinCount, "\(darwinCount - linuxCount) tests are missing from allTests") 19 | #endif 20 | } 21 | 22 | func testHasAssociatedControls() throws { 23 | //"button", "fieldset", "input", "keygen", "object", "output", "select", "textarea" 24 | let html = "