├── Sources ├── CHTMLSAXParser │ ├── include │ │ ├── module.modulemap │ │ └── CHTMLSAXParser.h │ └── CHTMLSAXParser.c └── HTMLSAXParser │ ├── HTMLSAXParser.h │ ├── EscapeSpecialCharacters.swift │ ├── HTMLSAXParser.swift │ └── HTMLSAXParser+libxml2.swift ├── Tests └── HTMLSAXParser │ ├── testdata │ ├── test_uft16le.html │ ├── test_shiftjis.html │ ├── test_jp2022.html │ ├── test_uft8.html │ └── article_with_images.html │ ├── Info.plist │ ├── HTMLEncodeEntitiesTests.swift │ └── HTMLParserTests.swift ├── HTMLSAXParser.xcodeproj ├── project.xcworkspace │ └── contents.xcworkspacedata ├── xcshareddata │ └── xcschemes │ │ ├── HTMLSAXParser MacOS.xcscheme │ │ ├── HTMLSAXParser AppleTV.xcscheme │ │ ├── HTMLParserDemo.xcscheme │ │ └── HTMLSAXParser iOS.xcscheme └── project.pbxproj ├── Info_AppleTV.plist ├── Info_iOS.plist ├── Info_MacOS.plist ├── Package.swift ├── .swiftlint.yml ├── HTMLParserDemo ├── Info.plist ├── Base.lproj │ ├── Main.storyboard │ └── LaunchScreen.storyboard ├── ViewController.swift ├── Assets.xcassets │ └── AppIcon.appiconset │ │ └── Contents.json └── AppDelegate.swift ├── .gitignore ├── README.md └── LICENSE /Sources/CHTMLSAXParser/include/module.modulemap: -------------------------------------------------------------------------------- 1 | module CHTMLSAXParser { 2 | header "CHTMLSAXParser.h" 3 | link "xml2" 4 | export * 5 | } 6 | -------------------------------------------------------------------------------- /Tests/HTMLSAXParser/testdata/test_uft16le.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/raymccrae/swift-htmlsaxparser/HEAD/Tests/HTMLSAXParser/testdata/test_uft16le.html -------------------------------------------------------------------------------- /Tests/HTMLSAXParser/testdata/test_shiftjis.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/raymccrae/swift-htmlsaxparser/HEAD/Tests/HTMLSAXParser/testdata/test_shiftjis.html -------------------------------------------------------------------------------- /HTMLSAXParser.xcodeproj/project.xcworkspace/contents.xcworkspacedata: -------------------------------------------------------------------------------- 1 | 2 | 4 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /Tests/HTMLSAXParser/testdata/test_jp2022.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 |

My First Heading

9 | 10 |

My first paragraph.

11 | 12 | $B$3$s$K$A$O(B 13 | 14 | 15 | 16 | 17 | -------------------------------------------------------------------------------- /Tests/HTMLSAXParser/testdata/test_uft8.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 |

My First Heading

9 | 10 |

My first paragraph.

11 | 12 | © Copyright 13 | α to Ω 14 | 15 | 16 | 17 | 18 | -------------------------------------------------------------------------------- /Tests/HTMLSAXParser/Info.plist: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | CFBundleDevelopmentRegion 6 | $(DEVELOPMENT_LANGUAGE) 7 | CFBundleExecutable 8 | $(EXECUTABLE_NAME) 9 | CFBundleIdentifier 10 | $(PRODUCT_BUNDLE_IDENTIFIER) 11 | CFBundleInfoDictionaryVersion 12 | 6.0 13 | CFBundleName 14 | $(PRODUCT_NAME) 15 | CFBundlePackageType 16 | BNDL 17 | CFBundleShortVersionString 18 | 1.0 19 | CFBundleVersion 20 | 1 21 | 22 | 23 | -------------------------------------------------------------------------------- /Info_AppleTV.plist: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | CFBundleDevelopmentRegion 6 | $(DEVELOPMENT_LANGUAGE) 7 | CFBundleExecutable 8 | $(EXECUTABLE_NAME) 9 | CFBundleIdentifier 10 | $(PRODUCT_BUNDLE_IDENTIFIER) 11 | CFBundleInfoDictionaryVersion 12 | 6.0 13 | CFBundleName 14 | $(PRODUCT_NAME) 15 | CFBundlePackageType 16 | FMWK 17 | CFBundleShortVersionString 18 | 0.4 19 | CFBundleVersion 20 | $(CURRENT_PROJECT_VERSION) 21 | NSPrincipalClass 22 | 23 | 24 | 25 | -------------------------------------------------------------------------------- /Info_iOS.plist: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | CFBundleDevelopmentRegion 6 | $(DEVELOPMENT_LANGUAGE) 7 | CFBundleExecutable 8 | $(EXECUTABLE_NAME) 9 | CFBundleIdentifier 10 | $(PRODUCT_BUNDLE_IDENTIFIER) 11 | CFBundleInfoDictionaryVersion 12 | 6.0 13 | CFBundleName 14 | $(PRODUCT_NAME) 15 | CFBundlePackageType 16 | FMWK 17 | CFBundleShortVersionString 18 | 0.4 19 | CFBundleVersion 20 | $(CURRENT_PROJECT_VERSION) 21 | NSPrincipalClass 22 | 23 | 24 | 25 | -------------------------------------------------------------------------------- /Info_MacOS.plist: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | CFBundleDevelopmentRegion 6 | $(DEVELOPMENT_LANGUAGE) 7 | CFBundleExecutable 8 | $(EXECUTABLE_NAME) 9 | CFBundleIdentifier 10 | $(PRODUCT_BUNDLE_IDENTIFIER) 11 | CFBundleInfoDictionaryVersion 12 | 6.0 13 | CFBundleName 14 | $(PRODUCT_NAME) 15 | CFBundlePackageType 16 | FMWK 17 | CFBundleShortVersionString 18 | 0.4 19 | CFBundleVersion 20 | $(CURRENT_PROJECT_VERSION) 21 | NSHumanReadableCopyright 22 | Copyright © 2017 Raymond Mccrae. All rights reserved. 23 | NSPrincipalClass 24 | 25 | 26 | 27 | -------------------------------------------------------------------------------- /Sources/HTMLSAXParser/HTMLSAXParser.h: -------------------------------------------------------------------------------- 1 | // 2 | // HTMLSAXParser.h 3 | // HTMLSAXParser 4 | // 5 | // Created by Raymond McCrae on 20/07/2017. 6 | // Copyright © 2017 Raymond McCrae. 7 | // 8 | // Licensed under the Apache License, Version 2.0 (the "License"); 9 | // you may not use this file except in compliance with the License. 10 | // You may obtain a copy of the License at 11 | // 12 | // http://www.apache.org/licenses/LICENSE-2.0 13 | // 14 | // Unless required by applicable law or agreed to in writing, software 15 | // distributed under the License is distributed on an "AS IS" BASIS, 16 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | // See the License for the specific language governing permissions and 18 | // limitations under the License. 19 | // 20 | 21 | #import 22 | 23 | //! Project version number for HTMLSAXParser. 24 | FOUNDATION_EXPORT double HTMLSAXParserVersionNumber; 25 | 26 | //! Project version string for HTMLSAXParser. 27 | FOUNDATION_EXPORT const unsigned char HTMLSAXParserVersionString[]; 28 | 29 | // In this header, you should import all the public headers of your framework using statements like #import 30 | 31 | 32 | -------------------------------------------------------------------------------- /Package.swift: -------------------------------------------------------------------------------- 1 | // swift-tools-version:4.0 2 | // The swift-tools-version declares the minimum version of Swift required to build this package. 3 | 4 | import PackageDescription 5 | 6 | let package = Package( 7 | name: "HTMLSAXParser", 8 | products: [ 9 | // Products define the executables and libraries produced by a package, and make them visible to other packages. 10 | .library( 11 | name: "HTMLSAXParser", 12 | targets: ["HTMLSAXParser"]), 13 | ], 14 | dependencies: [ 15 | // Dependencies declare other packages that this package depends on. 16 | // .package(url: /* package url */, from: "1.0.0"), 17 | ], 18 | targets: [ 19 | // Targets are the basic building blocks of a package. A target can define a module or a test suite. 20 | // Targets can depend on other targets in this package, and on products in packages which this package depends on. 21 | .target( 22 | name: "HTMLSAXParser", 23 | dependencies: ["CHTMLSAXParser"]), 24 | .target( 25 | name: "CHTMLSAXParser", 26 | dependencies: []), 27 | .testTarget( 28 | name: "HTMLSAXParserTests", 29 | dependencies: ["HTMLSAXParser"], 30 | path: "Tests" 31 | ) 32 | ] 33 | ) 34 | -------------------------------------------------------------------------------- /.swiftlint.yml: -------------------------------------------------------------------------------- 1 | included: 2 | - Sources 3 | - Tests 4 | excluded: 5 | 6 | opt_in_rules: 7 | - closure_spacing # closure should have single space inside each brace 8 | - empty_count # prefer isEmpty over comparing to 0 9 | - number_separator # underscore should be used as thousand separator in large decimal numbers 10 | disabled_rules: 11 | - todo # todo and fixme should be avoided. Use custom script for this 12 | - vertical_parameter_alignment # parameter alignment in functions. Xcode use another alignment logic 13 | file_length: 14 | warning: 600 15 | error: 1000 16 | line_length: 120 17 | function_body_length: 18 | - 30 19 | - 50 20 | nesting: 21 | type_level: 22 | warning: 3 23 | statement_level: 24 | warning: 10 25 | type_name: 26 | excluded: K 27 | variable_name: 28 | excluded: 29 | - id 30 | custom_rules: 31 | open_bracket_blank_line: 32 | included: ".*.swift" 33 | name: "blank line after open bracket" 34 | regex: "{\n[[[:blank:]]\n]*\n" 35 | message: "Blank line after open bracket should be removed" 36 | severity: warning 37 | closed_brackets_blank_line: 38 | included: ".*.swift" 39 | name: "blank lines between closed brackets" 40 | regex: "}\n[[[:blank:]]\n]*\n}" 41 | message: "Blank lines between closed brackets should be removed" 42 | severity: warning -------------------------------------------------------------------------------- /Tests/HTMLSAXParser/testdata/article_with_images.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | Images: img 6 | 18 | 19 | 20 |
21 |

Images

22 | Anaconda 23 |

Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua.

24 | Tarantula 25 | 26 | 30 | 31 |

Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat.

32 | Owl monkeys 33 |
34 | 35 | -------------------------------------------------------------------------------- /HTMLParserDemo/Info.plist: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | CFBundleDevelopmentRegion 6 | $(DEVELOPMENT_LANGUAGE) 7 | CFBundleExecutable 8 | $(EXECUTABLE_NAME) 9 | CFBundleIdentifier 10 | $(PRODUCT_BUNDLE_IDENTIFIER) 11 | CFBundleInfoDictionaryVersion 12 | 6.0 13 | CFBundleName 14 | $(PRODUCT_NAME) 15 | CFBundlePackageType 16 | APPL 17 | CFBundleShortVersionString 18 | 1.0 19 | CFBundleVersion 20 | 1 21 | LSRequiresIPhoneOS 22 | 23 | UILaunchStoryboardName 24 | LaunchScreen 25 | UIMainStoryboardFile 26 | Main 27 | UIRequiredDeviceCapabilities 28 | 29 | armv7 30 | 31 | UISupportedInterfaceOrientations 32 | 33 | UIInterfaceOrientationPortrait 34 | UIInterfaceOrientationLandscapeLeft 35 | UIInterfaceOrientationLandscapeRight 36 | 37 | UISupportedInterfaceOrientations~ipad 38 | 39 | UIInterfaceOrientationPortrait 40 | UIInterfaceOrientationPortraitUpsideDown 41 | UIInterfaceOrientationLandscapeLeft 42 | UIInterfaceOrientationLandscapeRight 43 | 44 | 45 | 46 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Xcode 2 | # 3 | # gitignore contributors: remember to update Global/Xcode.gitignore, Objective-C.gitignore & Swift.gitignore 4 | 5 | .DS_Store 6 | 7 | ## Build generated 8 | build/ 9 | DerivedData/ 10 | 11 | ## Various settings 12 | *.pbxuser 13 | !default.pbxuser 14 | *.mode1v3 15 | !default.mode1v3 16 | *.mode2v3 17 | !default.mode2v3 18 | *.perspectivev3 19 | !default.perspectivev3 20 | xcuserdata/ 21 | 22 | ## Other 23 | *.moved-aside 24 | *.xccheckout 25 | *.xcscmblueprint 26 | 27 | ## Obj-C/Swift specific 28 | *.hmap 29 | *.ipa 30 | *.dSYM.zip 31 | *.dSYM 32 | 33 | ## Playgrounds 34 | timeline.xctimeline 35 | playground.xcworkspace 36 | 37 | # Swift Package Manager 38 | # 39 | # Add this line if you want to avoid checking in source code from Swift Package Manager dependencies. 40 | # Packages/ 41 | # Package.pins 42 | .build/ 43 | 44 | # CocoaPods 45 | # 46 | # We recommend against adding the Pods directory to your .gitignore. However 47 | # you should judge for yourself, the pros and cons are mentioned at: 48 | # https://guides.cocoapods.org/using/using-cocoapods.html#should-i-check-the-pods-directory-into-source-control 49 | # 50 | # Pods/ 51 | 52 | # Carthage 53 | # 54 | # Add this line if you want to avoid checking in source code from Carthage dependencies. 55 | # Carthage/Checkouts 56 | 57 | Carthage/Build 58 | 59 | # fastlane 60 | # 61 | # It is recommended to not store the screenshots in the git repo. Instead, use fastlane to re-generate the 62 | # screenshots whenever they are needed. 63 | # For more information about the recommended setup visit: 64 | # https://docs.fastlane.tools/best-practices/source-control/#source-control 65 | 66 | fastlane/report.xml 67 | fastlane/Preview.html 68 | fastlane/screenshots 69 | fastlane/test_output 70 | -------------------------------------------------------------------------------- /HTMLParserDemo/Base.lproj/Main.storyboard: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | -------------------------------------------------------------------------------- /HTMLParserDemo/Base.lproj/LaunchScreen.storyboard: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | -------------------------------------------------------------------------------- /HTMLParserDemo/ViewController.swift: -------------------------------------------------------------------------------- 1 | // 2 | // ViewController.swift 3 | // HTMLParserDemo 4 | // 5 | // Created by Raymond Mccrae on 20/07/2017. 6 | // Copyright © 2017 Raymond McCrae. 7 | // 8 | // Licensed under the Apache License, Version 2.0 (the "License"); 9 | // you may not use this file except in compliance with the License. 10 | // You may obtain a copy of the License at 11 | // 12 | // http://www.apache.org/licenses/LICENSE-2.0 13 | // 14 | // Unless required by applicable law or agreed to in writing, software 15 | // distributed under the License is distributed on an "AS IS" BASIS, 16 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | // See the License for the specific language governing permissions and 18 | // limitations under the License. 19 | // 20 | 21 | import UIKit 22 | import HTMLSAXParser 23 | 24 | class ViewController: UIViewController { 25 | 26 | override func viewDidLoad() { 27 | super.viewDidLoad() 28 | // Do any additional setup after loading the view, typically from a nib. 29 | 30 | do { 31 | print("hello & goodbye".encodeHTMLEntities()!) 32 | let parser = HTMLSAXParser() 33 | try parser.parse(string: "test") { context, event in 34 | switch (event) { 35 | case let .characters(text): 36 | let location = context.location 37 | print("Found \(text) at \(location.column)") 38 | 39 | default: 40 | break 41 | } 42 | } 43 | } 44 | catch { 45 | 46 | } 47 | } 48 | 49 | override func didReceiveMemoryWarning() { 50 | super.didReceiveMemoryWarning() 51 | // Dispose of any resources that can be recreated. 52 | } 53 | 54 | 55 | } 56 | 57 | -------------------------------------------------------------------------------- /HTMLParserDemo/Assets.xcassets/AppIcon.appiconset/Contents.json: -------------------------------------------------------------------------------- 1 | { 2 | "images" : [ 3 | { 4 | "idiom" : "iphone", 5 | "size" : "20x20", 6 | "scale" : "2x" 7 | }, 8 | { 9 | "idiom" : "iphone", 10 | "size" : "20x20", 11 | "scale" : "3x" 12 | }, 13 | { 14 | "idiom" : "iphone", 15 | "size" : "29x29", 16 | "scale" : "2x" 17 | }, 18 | { 19 | "idiom" : "iphone", 20 | "size" : "29x29", 21 | "scale" : "3x" 22 | }, 23 | { 24 | "idiom" : "iphone", 25 | "size" : "40x40", 26 | "scale" : "2x" 27 | }, 28 | { 29 | "idiom" : "iphone", 30 | "size" : "40x40", 31 | "scale" : "3x" 32 | }, 33 | { 34 | "idiom" : "iphone", 35 | "size" : "60x60", 36 | "scale" : "2x" 37 | }, 38 | { 39 | "idiom" : "iphone", 40 | "size" : "60x60", 41 | "scale" : "3x" 42 | }, 43 | { 44 | "idiom" : "ipad", 45 | "size" : "20x20", 46 | "scale" : "1x" 47 | }, 48 | { 49 | "idiom" : "ipad", 50 | "size" : "20x20", 51 | "scale" : "2x" 52 | }, 53 | { 54 | "idiom" : "ipad", 55 | "size" : "29x29", 56 | "scale" : "1x" 57 | }, 58 | { 59 | "idiom" : "ipad", 60 | "size" : "29x29", 61 | "scale" : "2x" 62 | }, 63 | { 64 | "idiom" : "ipad", 65 | "size" : "40x40", 66 | "scale" : "1x" 67 | }, 68 | { 69 | "idiom" : "ipad", 70 | "size" : "40x40", 71 | "scale" : "2x" 72 | }, 73 | { 74 | "idiom" : "ipad", 75 | "size" : "76x76", 76 | "scale" : "1x" 77 | }, 78 | { 79 | "idiom" : "ipad", 80 | "size" : "76x76", 81 | "scale" : "2x" 82 | }, 83 | { 84 | "idiom" : "ipad", 85 | "size" : "83.5x83.5", 86 | "scale" : "2x" 87 | } 88 | ], 89 | "info" : { 90 | "version" : 1, 91 | "author" : "xcode" 92 | } 93 | } -------------------------------------------------------------------------------- /Tests/HTMLSAXParser/HTMLEncodeEntitiesTests.swift: -------------------------------------------------------------------------------- 1 | // 2 | // HTMLEncodeEntitiesTests.swift 3 | // HTMLParserTests 4 | // 5 | // Created by Raymond Mccrae on 21/07/2017. 6 | // Copyright © 2017 Raymond McCrae. 7 | // 8 | // Licensed under the Apache License, Version 2.0 (the "License"); 9 | // you may not use this file except in compliance with the License. 10 | // You may obtain a copy of the License at 11 | // 12 | // http://www.apache.org/licenses/LICENSE-2.0 13 | // 14 | // Unless required by applicable law or agreed to in writing, software 15 | // distributed under the License is distributed on an "AS IS" BASIS, 16 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | // See the License for the specific language governing permissions and 18 | // limitations under the License. 19 | // 20 | 21 | import XCTest 22 | @testable import HTMLSAXParser 23 | 24 | class HTMLEncodeEntitiesTests: XCTestCase { 25 | 26 | func testStringEncodeHTMLEntities() { 27 | XCTAssertEqual("".encodeHTMLEntities(), "") 28 | XCTAssertEqual("A".encodeHTMLEntities(), "A") 29 | XCTAssertEqual("&".encodeHTMLEntities(), "&") 30 | XCTAssertEqual("<".encodeHTMLEntities(), "<") 31 | XCTAssertEqual(">".encodeHTMLEntities(), ">") 32 | XCTAssertEqual("€".encodeHTMLEntities(), "€") 33 | 34 | XCTAssertEqual("\"".encodeHTMLEntities(), """) 35 | XCTAssertEqual("\"".encodeHTMLEntities(quoteCharacter: .none), "\"") 36 | XCTAssertEqual("\"".encodeHTMLEntities(quoteCharacter: .singleQuote), "\"") 37 | XCTAssertEqual("\"".encodeHTMLEntities(quoteCharacter: .doubleQuote), """) 38 | 39 | XCTAssertEqual("'".encodeHTMLEntities(), "'") 40 | XCTAssertEqual("'".encodeHTMLEntities(quoteCharacter: .none), "'") 41 | XCTAssertEqual("'".encodeHTMLEntities(quoteCharacter: .singleQuote), "'") 42 | XCTAssertEqual("'".encodeHTMLEntities(quoteCharacter: .doubleQuote), "'") 43 | } 44 | 45 | func testEmptyDataEncodeHTMLEntities() { 46 | let emptyData = Data() 47 | guard let result = emptyData.encodeHTMLEntities() else { 48 | XCTFail("encodeHTMLEntities should not return nil") 49 | return 50 | } 51 | XCTAssert(result.isEmpty, "Resulting Data object should have zero length") 52 | } 53 | 54 | func testInvalidCharDataEncodeHTMLEntities() { 55 | let invalidData = Data(bytes: [0xff]) 56 | let result = invalidData.encodeHTMLEntities() 57 | XCTAssertEqual(result, nil) 58 | } 59 | 60 | } 61 | -------------------------------------------------------------------------------- /HTMLParserDemo/AppDelegate.swift: -------------------------------------------------------------------------------- 1 | // 2 | // AppDelegate.swift 3 | // HTMLParserDemo 4 | // 5 | // Created by Raymond Mccrae on 20/07/2017. 6 | // Copyright © 2017 Raymond McCrae. 7 | // 8 | // Licensed under the Apache License, Version 2.0 (the "License"); 9 | // you may not use this file except in compliance with the License. 10 | // You may obtain a copy of the License at 11 | // 12 | // http://www.apache.org/licenses/LICENSE-2.0 13 | // 14 | // Unless required by applicable law or agreed to in writing, software 15 | // distributed under the License is distributed on an "AS IS" BASIS, 16 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | // See the License for the specific language governing permissions and 18 | // limitations under the License. 19 | // 20 | 21 | import UIKit 22 | 23 | @UIApplicationMain 24 | class AppDelegate: UIResponder, UIApplicationDelegate { 25 | 26 | var window: UIWindow? 27 | 28 | 29 | func application(_ application: UIApplication, didFinishLaunchingWithOptions launchOptions: [UIApplicationLaunchOptionsKey: Any]?) -> Bool { 30 | // Override point for customization after application launch. 31 | return true 32 | } 33 | 34 | func applicationWillResignActive(_ application: UIApplication) { 35 | // Sent when the application is about to move from active to inactive state. This can occur for certain types of temporary interruptions (such as an incoming phone call or SMS message) or when the user quits the application and it begins the transition to the background state. 36 | // Use this method to pause ongoing tasks, disable timers, and invalidate graphics rendering callbacks. Games should use this method to pause the game. 37 | } 38 | 39 | func applicationDidEnterBackground(_ application: UIApplication) { 40 | // Use this method to release shared resources, save user data, invalidate timers, and store enough application state information to restore your application to its current state in case it is terminated later. 41 | // If your application supports background execution, this method is called instead of applicationWillTerminate: when the user quits. 42 | } 43 | 44 | func applicationWillEnterForeground(_ application: UIApplication) { 45 | // Called as part of the transition from the background to the active state; here you can undo many of the changes made on entering the background. 46 | } 47 | 48 | func applicationDidBecomeActive(_ application: UIApplication) { 49 | // Restart any tasks that were paused (or not yet started) while the application was inactive. If the application was previously in the background, optionally refresh the user interface. 50 | } 51 | 52 | func applicationWillTerminate(_ application: UIApplication) { 53 | // Called when the application is about to terminate. Save data if appropriate. See also applicationDidEnterBackground:. 54 | } 55 | 56 | 57 | } 58 | 59 | -------------------------------------------------------------------------------- /HTMLSAXParser.xcodeproj/xcshareddata/xcschemes/HTMLSAXParser MacOS.xcscheme: -------------------------------------------------------------------------------- 1 | 2 | 5 | 8 | 9 | 15 | 21 | 22 | 23 | 24 | 25 | 31 | 32 | 33 | 34 | 35 | 36 | 47 | 48 | 54 | 55 | 56 | 57 | 58 | 59 | 65 | 66 | 72 | 73 | 74 | 75 | 77 | 78 | 81 | 82 | 83 | -------------------------------------------------------------------------------- /HTMLSAXParser.xcodeproj/xcshareddata/xcschemes/HTMLSAXParser AppleTV.xcscheme: -------------------------------------------------------------------------------- 1 | 2 | 5 | 8 | 9 | 15 | 21 | 22 | 23 | 24 | 25 | 31 | 32 | 33 | 34 | 35 | 36 | 47 | 48 | 54 | 55 | 56 | 57 | 58 | 59 | 65 | 66 | 72 | 73 | 74 | 75 | 77 | 78 | 81 | 82 | 83 | -------------------------------------------------------------------------------- /Sources/CHTMLSAXParser/include/CHTMLSAXParser.h: -------------------------------------------------------------------------------- 1 | // 2 | // CHTMLSAXParser.h 3 | // HTMLSAXParser 4 | // 5 | // Created by Raymond Mccrae on 31/07/2017. 6 | // Copyright © 2017 Raymond McCrae. 7 | // 8 | // Licensed under the Apache License, Version 2.0 (the "License"); 9 | // you may not use this file except in compliance with the License. 10 | // You may obtain a copy of the License at 11 | // 12 | // http://www.apache.org/licenses/LICENSE-2.0 13 | // 14 | // Unless required by applicable law or agreed to in writing, software 15 | // distributed under the License is distributed on an "AS IS" BASIS, 16 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | // See the License for the specific language governing permissions and 18 | // limitations under the License. 19 | // 20 | 21 | #import 22 | #import 23 | #import 24 | 25 | #ifndef CHTMLSAXParser_h 26 | #define CHTMLSAXParser_h 27 | 28 | typedef void(*HTMLParserWrappedErrorSAXFunc)(void *ctx, const char *msg); 29 | typedef void(*HTMLParserWrappedWarningSAXFunc)(void *ctx, const char *msg); 30 | 31 | /** 32 | htmlparser_global_error_sax_func is a global function pointer to a wrapper for a variadic c function 33 | for handling libxml2 parsing error. This is used since Swift does not support C variadic fuctions, 34 | (only va_list fuctions are supported). This global variable is used in conjuction with the function 35 | htmlparser_set_global_error_handler to install the global paser error function which forwards to 36 | the function this global variable points to. 37 | 38 | This global is only intended to ever be set once and before any parsing has been initiated. 39 | */ 40 | extern HTMLParserWrappedErrorSAXFunc htmlparser_global_error_sax_func; 41 | 42 | /** 43 | htmlparser_global_warning_sax_func is a global function pointer to a wrapper for a variadic c function 44 | for handling libxml2 parsing warnings. This is used since Swift does not support C variadic fuctions, 45 | (only va_list fuctions are supported). This global variable is used in conjuction with the function 46 | htmlparser_set_global_warning_handler to install the global paser warning function which forwards to 47 | the function this global variable points to. 48 | 49 | This global is only intended to ever be set once and before any parsing has been initiated. 50 | */ 51 | extern HTMLParserWrappedWarningSAXFunc htmlparser_global_warning_sax_func; 52 | 53 | /** 54 | Sets the error handler for the htmlSAXHander struct to a global error handling function that 55 | in turn forwards to the function pointed to by htmlparser_global_error_sax_func. 56 | This layer of indirections is used to overcome Swifts lack of handling for C variadic fuctions. 57 | 58 | The global error handling function will process the format string and variable arguments into 59 | a single string for the wrapped error function. 60 | */ 61 | void htmlparser_set_global_error_handler(htmlSAXHandlerPtr sax_handler); 62 | 63 | /** 64 | Sets the warning handler for the htmlSAXHander struct to a global warning handling function that 65 | in turn forwards to the function pointed to by htmlparser_global_warning_sax_func. 66 | This layer of indirections is used to overcome Swifts lack of handling for C variadic fuctions. 67 | 68 | The global warning handling function will process the format string and variable arguments into 69 | a single string for the wrapped warning function. 70 | */ 71 | void htmlparser_set_global_warning_handler(htmlSAXHandlerPtr sax_handler); 72 | 73 | #endif /* CHTMLSAXParser_h */ 74 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | HTML SAX Parser for Swift 4 2 | ====== 3 | **HTMLSAXParser** is a swift module that wraps the libxml2 HTMLParser for the purposes 4 | of providing a simple lightweight SAX parser for HTML content. libxml2 is part of the 5 | Mac, iOS and Apple TV SDK, if you are developing for those platforms then you will not 6 | require any additional dependencies. SAX parsers provide an event based parsing process, 7 | where a closure you provide will be called with a series of events as the parser moves 8 | through the document. 9 | 10 | **HTMLSAXParser** take inspiration from NSXMLParser however it uses enums with associated 11 | types for the parsing events, rather than a delegate class. A simple of example of usage 12 | is: - 13 | 14 | ```swift 15 | let parser = HTMLSAXParser() 16 | do { 17 | try parser.parse(string: "Some HTML Content") { context, event in 18 | switch event { 19 | case let .startElement(name, attributes): 20 | print("Found character : \(name)") 21 | case let .character(text): 22 | print("Found character : \(text)") 23 | default: 24 | break 25 | } 26 | } 27 | } 28 | catch { 29 | // Handle error 30 | } 31 | ``` 32 | 33 | This approach lends itself to short inlined processing of HTML without the need for 34 | a parser delegate class. 35 | 36 | ```swift 37 | /** 38 | Example function to extract all the image sources from HTML data. Specifically 39 | fetching the "src" attribute from all "img" tags. 40 | */ 41 | func imageSources(from htmlData: Data) throws -> [String] { 42 | var sources: [String] = [] 43 | let parser = HTMLSAXParser() 44 | try parser.parse(data: htmlData) { context, event in 45 | switch event { 46 | case let .startElement(name, attributes) where name == "img": 47 | if let source = attributes["src"] { 48 | sources.append(source) 49 | } 50 | default: 51 | break 52 | } 53 | } 54 | return sources 55 | } 56 | ``` 57 | 58 | ## Installation 59 | 60 | ### Swift Package Manager 61 | 62 | Add HTMLSAXParser as a dependency to your projects `Package.swift`. For example: - 63 | 64 | ```swift 65 | // swift-tools-version:4.0 66 | import PackageDescription 67 | 68 | let package = Package( 69 | name: "YourProject", 70 | dependencies: [ 71 | // Dependencies declare other packages that this package depends on. 72 | .package(url: "https://github.com/raymccrae/swift-htmlsaxparser.git", .branch("master")) 73 | ], 74 | targets: [ 75 | // Targets are the basic building blocks of a package. A target can define a module or a test suite. 76 | // Targets can depend on other targets in this package, and on products in packages which this package depends on. 77 | .target( 78 | name: "YourProject", 79 | dependencies: ["HTMLSAXParser"]), 80 | ] 81 | ) 82 | ``` 83 | 84 | Since this module makes use of `libxml2` you will need to inform the C compiler where the 85 | header files for libxml2 are located. If you have Xcode installed (Mac Only) then you can 86 | include the following additional arguments to the swift build command to the current SDK 87 | path: 88 | 89 | ```bash 90 | $ swift build -Xcc -I"$(xcrun --show-sdk-path)/usr/include/libxml2" 91 | ``` 92 | 93 | ## Contributors 94 | 95 | ### Contributors on GitHub 96 | * [Contributors](https://github.com/raymccrae/swift-htmlsaxparser/graphs/contributors) 97 | 98 | ## License 99 | * see [LICENSE](https://github.com/raymccrae/swift-htmlsaxparser/blob/master/LICENSE) file 100 | 101 | ## Version 102 | * Version 0.4 -------------------------------------------------------------------------------- /Sources/CHTMLSAXParser/CHTMLSAXParser.c: -------------------------------------------------------------------------------- 1 | // 2 | // CHTMLSAXParser.c 3 | // HTMLSAXParser 4 | // 5 | // Created by Raymond Mccrae on 31/07/2017. 6 | // Copyright © 2017 Raymond McCrae. 7 | // 8 | // Licensed under the Apache License, Version 2.0 (the "License"); 9 | // you may not use this file except in compliance with the License. 10 | // You may obtain a copy of the License at 11 | // 12 | // http://www.apache.org/licenses/LICENSE-2.0 13 | // 14 | // Unless required by applicable law or agreed to in writing, software 15 | // distributed under the License is distributed on an "AS IS" BASIS, 16 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | // See the License for the specific language governing permissions and 18 | // limitations under the License. 19 | // 20 | 21 | #include 22 | #include 23 | #include "CHTMLSAXParser.h" 24 | 25 | HTMLParserWrappedErrorSAXFunc htmlparser_global_error_sax_func; 26 | HTMLParserWrappedWarningSAXFunc htmlparser_global_warning_sax_func; 27 | 28 | static char* formatstr(const char *format, va_list args) { 29 | int consumed = 0; 30 | size_t buffer_size = 0; 31 | char *buffer = NULL; 32 | va_list vl; 33 | 34 | do { 35 | if (consumed > buffer_size) { 36 | buffer_size = consumed + 1; // Add 1 for the null character 37 | } 38 | else { 39 | buffer_size += 100; 40 | } 41 | buffer = realloc(buffer, buffer_size); 42 | 43 | // Check buffer is not null in case malloc / realloc failed. 44 | if (buffer != NULL) { 45 | va_copy(vl, args); 46 | consumed = vsnprintf(buffer, buffer_size, format, vl); 47 | va_end(vl); 48 | } 49 | } while (buffer != NULL && consumed > 0 && consumed >= buffer_size); 50 | 51 | if (buffer != NULL) { 52 | if (consumed > 0 && consumed < buffer_size) { 53 | return buffer; 54 | } 55 | 56 | free(buffer); 57 | } 58 | 59 | return NULL; 60 | } 61 | 62 | /** 63 | The global error handling function for the module. This function will format the 64 | message into a single string before calling the wrapped error function. 65 | */ 66 | static void htmlparser_error_sax_handler(void *ctx, const char *msg, ...) { 67 | va_list vl; 68 | char *formatted_msg = NULL; 69 | 70 | if (htmlparser_global_error_sax_func == NULL) { 71 | return; 72 | } 73 | 74 | va_start(vl, msg); 75 | formatted_msg = formatstr(msg, vl); 76 | va_end(vl); 77 | 78 | htmlparser_global_error_sax_func(ctx, formatted_msg); 79 | 80 | if (formatted_msg != NULL) { 81 | free(formatted_msg); 82 | } 83 | } 84 | 85 | static void htmlparser_warning_sax_handler(void *ctx, const char *msg, ...) { 86 | va_list vl; 87 | char *formatted_msg = NULL; 88 | 89 | if (htmlparser_global_warning_sax_func == NULL) { 90 | return; 91 | } 92 | 93 | va_start(vl, msg); 94 | formatted_msg = formatstr(msg, vl); 95 | va_end(vl); 96 | 97 | htmlparser_global_warning_sax_func(ctx, formatted_msg); 98 | 99 | if (formatted_msg != NULL) { 100 | free(formatted_msg); 101 | } 102 | } 103 | 104 | void htmlparser_set_global_error_handler(htmlSAXHandlerPtr sax_handler) { 105 | if (sax_handler != NULL) { 106 | sax_handler->error = htmlparser_error_sax_handler; 107 | } 108 | } 109 | 110 | void htmlparser_set_global_warning_handler(htmlSAXHandlerPtr sax_handler) { 111 | if (sax_handler != NULL) { 112 | sax_handler->warning = htmlparser_warning_sax_handler; 113 | } 114 | } 115 | -------------------------------------------------------------------------------- /HTMLSAXParser.xcodeproj/xcshareddata/xcschemes/HTMLParserDemo.xcscheme: -------------------------------------------------------------------------------- 1 | 2 | 5 | 8 | 9 | 15 | 21 | 22 | 23 | 24 | 25 | 31 | 32 | 33 | 34 | 40 | 41 | 42 | 43 | 44 | 45 | 56 | 58 | 64 | 65 | 66 | 67 | 68 | 69 | 75 | 77 | 83 | 84 | 85 | 86 | 88 | 89 | 92 | 93 | 94 | -------------------------------------------------------------------------------- /HTMLSAXParser.xcodeproj/xcshareddata/xcschemes/HTMLSAXParser iOS.xcscheme: -------------------------------------------------------------------------------- 1 | 2 | 5 | 8 | 9 | 15 | 21 | 22 | 23 | 24 | 25 | 32 | 33 | 35 | 41 | 42 | 43 | 44 | 45 | 51 | 52 | 53 | 54 | 55 | 56 | 67 | 68 | 74 | 75 | 76 | 77 | 78 | 79 | 85 | 86 | 92 | 93 | 94 | 95 | 97 | 98 | 101 | 102 | 103 | -------------------------------------------------------------------------------- /Sources/HTMLSAXParser/EscapeSpecialCharacters.swift: -------------------------------------------------------------------------------- 1 | // 2 | // EscapeSpecialCharacters.swift 3 | // HTMLSAXParser 4 | // 5 | // Created by Raymond Mccrae on 21/07/2017. 6 | // Copyright © 2017 Raymond McCrae. 7 | // 8 | // Licensed under the Apache License, Version 2.0 (the "License"); 9 | // you may not use this file except in compliance with the License. 10 | // You may obtain a copy of the License at 11 | // 12 | // http://www.apache.org/licenses/LICENSE-2.0 13 | // 14 | // Unless required by applicable law or agreed to in writing, software 15 | // distributed under the License is distributed on an "AS IS" BASIS, 16 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | // See the License for the specific language governing permissions and 18 | // limitations under the License. 19 | // 20 | 21 | import Foundation 22 | import CHTMLSAXParser 23 | 24 | public enum HTMLQuoteCharacter: Character { 25 | case none = "\0" 26 | case singleQuote = "'" 27 | case doubleQuote = "\"" 28 | 29 | var characterCode: CInt { 30 | switch self { 31 | case .none: 32 | return 0 33 | case .singleQuote: 34 | return 39 35 | case .doubleQuote: 36 | return 34 37 | } 38 | } 39 | } 40 | 41 | public extension Data { 42 | 43 | // swiftlint:disable:next function_parameter_count 44 | fileprivate func encodeHTMLEntitiesBytes(_ outputLength: inout Int, 45 | _ outputLengthBytes: UnsafeMutablePointer, 46 | _ inputLengthBytes: UnsafeMutablePointer, 47 | _ quoteCharacter: HTMLQuoteCharacter, 48 | _ inputLength: Int, 49 | _ loop: inout Bool, 50 | _ bufferGrowthFactor: Double) -> Data? { 51 | return self.withUnsafeBytes { (inputBytes: UnsafePointer) -> Data? in 52 | let outputBufferCapacity = outputLength 53 | let outputBuffer = UnsafeMutablePointer.allocate(capacity: outputBufferCapacity) 54 | defer { 55 | outputBuffer.deallocate(capacity: Int(outputBufferCapacity)) 56 | } 57 | let result = htmlEncodeEntities(outputBuffer, 58 | outputLengthBytes, 59 | inputBytes, 60 | inputLengthBytes, 61 | quoteCharacter.characterCode) 62 | 63 | if result == 0 { // zero represents success 64 | // Have we consumed the length of the input buffer 65 | let consumed = inputLengthBytes.pointee 66 | if consumed == inputLength { 67 | loop = false 68 | return Data(bytes: outputBuffer, count: Int(outputLengthBytes.pointee)) 69 | } else { 70 | // if we have not consumed the full input buffer. 71 | // estimate a new output buffer length 72 | let ratio: Double 73 | if inputLength == 0 { 74 | ratio = 0.0 75 | } else { 76 | ratio = Double(consumed) / Double(inputLength) 77 | } 78 | outputLength = Int( (2.0 - ratio) * Double(outputLength) * bufferGrowthFactor ) 79 | } 80 | } else { 81 | loop = false 82 | } 83 | 84 | return nil 85 | } 86 | } 87 | 88 | /** 89 | Encodes the HTML entities within the receiver. This method interperates the receiver Data 90 | instance as UTF-8 encoded string data. The returns the resulting UTF-8 encoded string with 91 | the HTML entities encoded or nil if an error occurred. 92 | 93 | - parameter quoteCharacter: The HTML quote character for escaping. 94 | - returns: UTF-8 encoded data instance representing the encoded string, or nil if an error occurred. 95 | */ 96 | public func encodeHTMLEntities(quoteCharacter: HTMLQuoteCharacter = .doubleQuote) -> Data? { 97 | let bufferGrowthFactor = 1.4 98 | let inputLength = self.count 99 | var outputLength = Int(Double(inputLength) * bufferGrowthFactor) 100 | 101 | var inputLengthBytes = UnsafeMutablePointer.allocate(capacity: 1) 102 | var outputLengthBytes = UnsafeMutablePointer.allocate(capacity: 1) 103 | defer { 104 | inputLengthBytes.deallocate(capacity: 1) 105 | outputLengthBytes.deallocate(capacity: 1) 106 | } 107 | 108 | var loop = true 109 | 110 | repeat { 111 | inputLengthBytes.pointee = CInt(inputLength) 112 | outputLengthBytes.pointee = CInt(outputLength) 113 | 114 | let outputData = encodeHTMLEntitiesBytes(&outputLength, 115 | outputLengthBytes, 116 | inputLengthBytes, 117 | quoteCharacter, 118 | inputLength, 119 | &loop, 120 | bufferGrowthFactor) 121 | 122 | if let outputData = outputData { 123 | return outputData 124 | } 125 | 126 | } while loop 127 | 128 | return nil 129 | } 130 | } 131 | 132 | public extension String { 133 | 134 | /** 135 | Encodes the HTML entities within the receiver. 136 | 137 | - parameter quoteCharacter: The HTML quote character for escaping. 138 | - returns: The encoded string, or nil if an error occurred. 139 | */ 140 | public func encodeHTMLEntities(quoteCharacter: HTMLQuoteCharacter = .doubleQuote) -> String? { 141 | let utf8Data = Data(self.utf8) 142 | guard let encoded = utf8Data.encodeHTMLEntities(quoteCharacter: quoteCharacter) else { 143 | return nil 144 | } 145 | return String(data: encoded, encoding: .utf8) 146 | } 147 | 148 | } 149 | -------------------------------------------------------------------------------- /Tests/HTMLSAXParser/HTMLParserTests.swift: -------------------------------------------------------------------------------- 1 | // 2 | // HTMLSAXParserTests.swift 3 | // HTMLParserTests 4 | // 5 | // Created by Raymond Mccrae on 20/07/2017. 6 | // Copyright © 2017 Raymond McCrae. 7 | // 8 | // Licensed under the Apache License, Version 2.0 (the "License"); 9 | // you may not use this file except in compliance with the License. 10 | // You may obtain a copy of the License at 11 | // 12 | // http://www.apache.org/licenses/LICENSE-2.0 13 | // 14 | // Unless required by applicable law or agreed to in writing, software 15 | // distributed under the License is distributed on an "AS IS" BASIS, 16 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | // See the License for the specific language governing permissions and 18 | // limitations under the License. 19 | // 20 | 21 | // swiftlint:disable line_length function_body_length 22 | 23 | import XCTest 24 | @testable import HTMLSAXParser 25 | 26 | class HTMLParserTests: XCTestCase { 27 | 28 | fileprivate static let bundle: Bundle = Bundle.init(for: HTMLParserTests.self) 29 | fileprivate static let testHTMLDocumentUTF8: Data = loadHTMLDocumentData(named: "test_uft16le") 30 | fileprivate static let testHTMLArticleWithImages: Data = loadHTMLDocumentData(named: "article_with_images") 31 | 32 | static func loadHTMLDocumentData(named: String) -> Data { 33 | let docuemntURL = bundle.url(forResource: named, withExtension: "html")! 34 | guard let documentData = try? Data(contentsOf: docuemntURL) else { 35 | fatalError("Test data file \(named).html not found within test bundle") 36 | } 37 | return documentData 38 | } 39 | 40 | func test_parse_data_empty() { 41 | let data = Data() 42 | var threwError = false 43 | do { 44 | let parser = HTMLSAXParser() 45 | try parser.parse(data: data, handler: { (_, _) in 46 | XCTFail("Empty document should not generate any events") 47 | }) 48 | XCTFail("Empty document should throw an error") 49 | } catch HTMLSAXParser.Error.emptyDocument { 50 | threwError = true 51 | } catch { 52 | XCTFail("Wrong type of error thrown") 53 | } 54 | 55 | XCTAssertTrue(threwError) 56 | } 57 | 58 | func test_parse_string_empty() { 59 | let string = "" 60 | var threwError = false 61 | do { 62 | let parser = HTMLSAXParser() 63 | try parser.parse(string: string, handler: { (_, _) in 64 | XCTFail("Empty document should not generate any events") 65 | }) 66 | XCTFail("Empty document should throw an error") 67 | } catch HTMLSAXParser.Error.emptyDocument { 68 | threwError = true 69 | } catch { 70 | XCTFail("Wrong type of error thrown") 71 | } 72 | 73 | XCTAssertTrue(threwError) 74 | } 75 | 76 | func testExample() { 77 | // This is an example of a functional test case. 78 | // Use XCTAssert and related functions to verify your tests produce the correct results. 79 | 80 | var calledStartElement = false 81 | var calledCharacters = false 82 | let parser = HTMLSAXParser() 83 | do { 84 | try parser.parse(string: "こんにちは") { (_, event) in 85 | switch event { 86 | case let .startElement(name, _): 87 | XCTAssertEqual(name, "hello") 88 | calledStartElement = true 89 | case let .characters(text): 90 | XCTAssertEqual(text, "こんにちは") 91 | calledCharacters = true 92 | default: 93 | break 94 | } 95 | } 96 | } catch { 97 | XCTFail("Unexpected error thrown") 98 | } 99 | 100 | XCTAssertTrue(calledStartElement) 101 | XCTAssertTrue(calledCharacters) 102 | } 103 | 104 | func testInvalidHTML() { 105 | let parser = HTMLSAXParser() 106 | do { 107 | try parser.parse(string: "N\" Style=\"background-color: red;\" hidden>" 126 | try parser.parse(string: html) { (_, event) in 127 | switch event { 128 | case let .startElement(name, attributes): 129 | XCTAssertEqual(startDocCount, 1) 130 | startElementCount += 1 131 | XCTAssertEqual(name, "p") 132 | XCTAssertEqual(attributes["id"], "123") 133 | XCTAssertEqual(attributes["class"], "paragraph") 134 | XCTAssertEqual(attributes["comment"], "P>N") 135 | XCTAssertEqual(attributes["style"], "background-color: red;") 136 | XCTAssertEqual(attributes["hidden"], "") 137 | XCTAssertEqual(attributes.count, 5) 138 | 139 | case let .endElement(name): 140 | XCTAssertEqual(startElementCount, 1) 141 | endElementCount += 1 142 | XCTAssertEqual(name, "p") 143 | 144 | case .startDocument: 145 | startDocCount += 1 146 | 147 | case .endDocument: 148 | XCTAssertEqual(endElementCount, 1) 149 | endDocCount += 1 150 | 151 | default: 152 | XCTFail("Unexpected event") 153 | } 154 | } 155 | } catch { 156 | XCTFail("Unexpected error thrown") 157 | } 158 | 159 | XCTAssertEqual(startDocCount, 1) 160 | XCTAssertEqual(endDocCount, 1) 161 | XCTAssertEqual(startElementCount, 1) 162 | XCTAssertEqual(endElementCount, 1) 163 | } 164 | 165 | func imageSources(from htmlData: Data) throws -> [String] { 166 | var sources: [String] = [] 167 | let parser = HTMLSAXParser() 168 | try parser.parse(data: htmlData) { _, event in 169 | switch event { 170 | case let .startElement(name, attributes) where name == "img": 171 | if let source = attributes["src"] { 172 | sources.append(source) 173 | } 174 | default: 175 | break 176 | } 177 | } 178 | return sources 179 | } 180 | 181 | func testImageExtraction() { 182 | do { 183 | let imageSources = try self.imageSources(from: HTMLParserTests.testHTMLArticleWithImages) 184 | XCTAssertEqual(imageSources, [ 185 | "https://upload.wikimedia.org/wikipedia/commons/thumb/9/93/01-COBRA-SUCURI-3M-WAGNER-MEIER_MG_2458.JPG/640px-01-COBRA-SUCURI-3M-WAGNER-MEIER_MG_2458.JPG", 186 | "https://upload.wikimedia.org/wikipedia/commons/thumb/9/98/Brachypelma_smithi_2009_G03.jpg/640px-Brachypelma_smithi_2009_G03.jpg", 187 | "https://upload.wikimedia.org/wikipedia/commons/d/d7/Panamanian_night_monkey.jpg"]) 188 | } catch { 189 | XCTFail("Error thrown while parsing") 190 | } 191 | } 192 | 193 | } 194 | -------------------------------------------------------------------------------- /Sources/HTMLSAXParser/HTMLSAXParser.swift: -------------------------------------------------------------------------------- 1 | // 2 | // HTMLSAXParser.swift 3 | // HTMLSAXParser 4 | // 5 | // Created by Raymond McCrae on 20/07/2017. 6 | // Copyright © 2017 Raymond McCrae. 7 | // 8 | // Licensed under the Apache License, Version 2.0 (the "License"); 9 | // you may not use this file except in compliance with the License. 10 | // You may obtain a copy of the License at 11 | // 12 | // http://www.apache.org/licenses/LICENSE-2.0 13 | // 14 | // Unless required by applicable law or agreed to in writing, software 15 | // distributed under the License is distributed on an "AS IS" BASIS, 16 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | // See the License for the specific language governing permissions and 18 | // limitations under the License. 19 | // 20 | 21 | import Foundation 22 | import CHTMLSAXParser 23 | 24 | public protocol HTMLSAXParseContext { 25 | 26 | /// The current parsing location during the parsing process. 27 | var location: HTMLSAXParser.Location { get } 28 | var systemId: String? { get } 29 | var publicId: String? { get } 30 | 31 | /** 32 | Aborts the current HTML parsings to prevent further calls 33 | to the parser event handler. 34 | */ 35 | func abortParsing() 36 | 37 | } 38 | 39 | /** 40 | The HTMLSAXParser is a SAX style parser for HTML similar to NSXMLParser, however it uses enums with 41 | associated types for the parsing events, rather than a delegate class. It is implemented as a simple 42 | light-weight wrapper around HTMLParser found within the libxml2 library. 43 | 44 | Thread-safety: Instances of HTMLSAXParser are immutable and may safely be used by different thread 45 | concurrently, including running multiple concurrent parse invocations. Please note however with 46 | respect to the callers EventHandler closure, you should not retain references to the HTMLSAXParseContext 47 | instance passed to the closure beyond the scope of the call. Additionally you should only access the 48 | HTMLSAXParseContext instance from the dispatch queue that called your event handler closure. 49 | */ 50 | open class HTMLSAXParser { 51 | 52 | public struct ParseOptions: OptionSet { 53 | public let rawValue: Int 54 | 55 | public init(rawValue: Int) { 56 | self.rawValue = rawValue 57 | } 58 | 59 | public static let recover = ParseOptions(rawValue: Int(HTML_PARSE_RECOVER.rawValue)) 60 | public static let noDefaultDTD = ParseOptions(rawValue: Int(HTML_PARSE_NODEFDTD.rawValue)) 61 | public static let noError = ParseOptions(rawValue: Int(HTML_PARSE_NOERROR.rawValue)) 62 | public static let noWarning = ParseOptions(rawValue: Int(HTML_PARSE_NOWARNING.rawValue)) 63 | public static let pedantic = ParseOptions(rawValue: Int(HTML_PARSE_PEDANTIC.rawValue)) 64 | public static let noBlanks = ParseOptions(rawValue: Int(HTML_PARSE_NOBLANKS.rawValue)) 65 | public static let noNetwork = ParseOptions(rawValue: Int(HTML_PARSE_NONET.rawValue)) 66 | public static let noImpliedElements = ParseOptions(rawValue: Int(HTML_PARSE_NOIMPLIED.rawValue)) 67 | public static let compactTextNodes = ParseOptions(rawValue: Int(HTML_PARSE_COMPACT.rawValue)) 68 | public static let ignoreEncodingHint = ParseOptions(rawValue: Int(HTML_PARSE_IGNORE_ENC.rawValue)) 69 | 70 | /// Default set of parse options. 71 | public static let `default`: ParseOptions = [ 72 | .recover, 73 | .noBlanks, 74 | .noNetwork, 75 | .noImpliedElements, 76 | .compactTextNodes] 77 | } 78 | 79 | public struct Location { 80 | public let line: Int 81 | public let column: Int 82 | } 83 | 84 | public enum Event { 85 | /// Event parser found the start of the document. 86 | case startDocument 87 | 88 | /// Event parser found the end of the document. 89 | case endDocument 90 | 91 | /// Event parser found an opening html tag. 92 | case startElement(name: String, attributes: [String: String]) 93 | 94 | /// Event parser found an ending html tag. 95 | case endElement(name: String) 96 | 97 | /// Event parser found character nodes. 98 | case characters(text: String) 99 | 100 | /// Event parser found a comment node. 101 | case comment(text: String) 102 | 103 | /// Event parser found a CDATA block. 104 | case cdata(block: Data) 105 | 106 | /// Event parser found a processing instruction. 107 | case processingInstruction(target: String, data: String?) 108 | 109 | /// Event parser generated a warning during parsing. 110 | case warning(message: String) 111 | 112 | /// Event parser generated an error during parsing. 113 | case error(message: String) 114 | } 115 | 116 | /// An Error enum representing all possible errors that may be thrown by the parser. 117 | public enum Error: Swift.Error { 118 | 119 | /// An unknown error occurred. 120 | case unknown 121 | 122 | /// The character encoding given is not supported by the parser. 123 | case unsupportedCharEncoding 124 | 125 | /// The parser encountered an empty document 126 | case emptyDocument 127 | 128 | /// An error occurred during the parsing process. 129 | case parsingFailure(location: Location, message: String) 130 | } 131 | 132 | public typealias EventHandler = (HTMLSAXParseContext, Event) -> Void 133 | 134 | /// The parse options the html parser was initialised with. 135 | public let parseOptions: ParseOptions 136 | 137 | /** 138 | Initialize an instance of HTMLSAXParser with the given set of options. If no 139 | options are specified a default set of options will be used. For more details 140 | on the default options see HTMLSAXParser.ParseOptions.`default`. Instances 141 | of HTMLSAXParser are immutable and the options may not be changed on an 142 | existing instance. If you require a difference set of options then you will 143 | be required to create a new instance. 144 | 145 | - Parameter parseOptions: An option set specifying options for parsing. 146 | */ 147 | public init(parseOptions: ParseOptions = .`default`) { 148 | self.parseOptions = parseOptions 149 | } 150 | 151 | /** 152 | Parse a string containing HTML content, calling the events on the handler 153 | supplied. Despite the handler being marked as escaping the parse method will 154 | operate synchronously. 155 | 156 | Note that your handler should not retain references to the HTMLSAXParseContext 157 | instance passed to it beyond the scope of the call. Additionally you should only 158 | access the HTMLSAXParseContext instance from the dispatch queue that called your 159 | event handler closure. 160 | 161 | - Parameter string: The string containing the HTML content. 162 | - Parameter handler: The event handler closure that will be called during parsing. 163 | - Throws: `HTMLParser.Error` if a fatal error occured during parsing. 164 | */ 165 | open func parse(string: String, handler: @escaping EventHandler) throws { 166 | let utf8Data = Data(string.utf8) 167 | try parse(data: utf8Data, encoding: .utf8, handler: handler) 168 | } 169 | 170 | /** 171 | Parse a data representation of HTML content, calling the events on the handler 172 | supplied. The data will be interpreted using the encoding if supplied. If no 173 | encoding is given then the parser will attempt to detect the encoding. Despite 174 | the handler being marked as escaping the parse method will operate synchronously. 175 | 176 | Note that your handler should not retain references to the HTMLSAXParseContext 177 | instance passed to it beyond the scope of the call. Additionally you should only 178 | access the HTMLSAXParseContext instance from the dispatch queue that called your 179 | event handler closure. 180 | 181 | - Parameter data: The data containing the HTML content. 182 | - Parameter encoding: The character encoding to interpret the data. If no encoding 183 | is given then the parser will attempt to detect the encoding. 184 | - Parameter handler: The event handler closure that will be called during parsing. 185 | - Throws: `HTMLParser.Error` if a fatal error occured during parsing. 186 | */ 187 | open func parse(data: Data, encoding: String.Encoding? = nil, handler: @escaping EventHandler) throws { 188 | let dataLength = data.count 189 | 190 | guard dataLength > 0 else { 191 | // libxml2 will not parse zero length data 192 | throw Error.emptyDocument 193 | } 194 | 195 | try _parse(data: data, encoding: encoding, handler: handler) 196 | } 197 | 198 | } 199 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "{}" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright {yyyy} {name of copyright owner} 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /Sources/HTMLSAXParser/HTMLSAXParser+libxml2.swift: -------------------------------------------------------------------------------- 1 | // 2 | // HTMLSAXParser+libxml2.swift 3 | // HTMLSAXParser 4 | // 5 | // Created by Raymond Mccrae on 20/07/2017. 6 | // Copyright © 2017 Raymond McCrae. 7 | // 8 | // Licensed under the Apache License, Version 2.0 (the "License"); 9 | // you may not use this file except in compliance with the License. 10 | // You may obtain a copy of the License at 11 | // 12 | // http://www.apache.org/licenses/LICENSE-2.0 13 | // 14 | // Unless required by applicable law or agreed to in writing, software 15 | // distributed under the License is distributed on an "AS IS" BASIS, 16 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | // See the License for the specific language governing permissions and 18 | // limitations under the License. 19 | // 20 | 21 | import Foundation 22 | import CHTMLSAXParser 23 | 24 | internal extension HTMLSAXParser { 25 | 26 | private static let libxmlSAXHandler: htmlSAXHandler = createSAXHandler() 27 | 28 | // swiftlint:disable:next identifier_name 29 | func _parse(data: Data, encoding: String.Encoding?, handler: @escaping EventHandler) throws { 30 | let dataLength = data.count 31 | var charEncoding: xmlCharEncoding = XML_CHAR_ENCODING_NONE 32 | 33 | if let encoding = encoding { 34 | charEncoding = convert(from: encoding) 35 | } else { 36 | data.withUnsafeBytes { (dataBytes: UnsafePointer) -> Void in 37 | charEncoding = xmlDetectCharEncoding(dataBytes, Int32(dataLength)) 38 | } 39 | } 40 | 41 | guard charEncoding != XML_CHAR_ENCODING_ERROR else { 42 | throw Error.unsupportedCharEncoding 43 | } 44 | 45 | try data.withUnsafeBytes { (dataBytes: UnsafePointer) -> Void in 46 | let handlerContext = HandlerContext(handler: handler) 47 | let handlerContextPtr = Unmanaged.passUnretained(handlerContext).toOpaque() 48 | var libxmlHandler = HTMLSAXParser.libxmlSAXHandler 49 | guard let parserContext = htmlCreatePushParserCtxt(&libxmlHandler, 50 | handlerContextPtr, 51 | dataBytes, 52 | Int32(dataLength), 53 | nil, 54 | charEncoding) else { 55 | throw Error.unknown 56 | } 57 | defer { 58 | // Free the parser context when we exit the scope. 59 | htmlFreeParserCtxt(parserContext) 60 | handlerContext.contextPtr = nil 61 | } 62 | 63 | handlerContext.contextPtr = parserContext 64 | let options = CInt(parseOptions.rawValue) 65 | htmlCtxtUseOptions(parserContext, options) 66 | 67 | let parseResult = htmlParseDocument(parserContext) 68 | try handleParseResult(parseResult, handlerContext) 69 | 70 | } 71 | } 72 | 73 | // swiftlint:disable:next function_body_length cyclomatic_complexity 74 | func convert(from swiftEncoding: String.Encoding) -> xmlCharEncoding { 75 | switch swiftEncoding { 76 | case .utf8: 77 | return XML_CHAR_ENCODING_UTF8 78 | case .utf16LittleEndian: 79 | return XML_CHAR_ENCODING_UTF16LE 80 | case .utf16BigEndian: 81 | return XML_CHAR_ENCODING_UTF16BE 82 | case .utf16: 83 | switch UInt32(CFByteOrderGetCurrent()) { 84 | case CFByteOrderBigEndian.rawValue: 85 | return XML_CHAR_ENCODING_UTF16BE 86 | 87 | case CFByteOrderLittleEndian.rawValue: 88 | return XML_CHAR_ENCODING_UTF16LE 89 | 90 | default: 91 | return XML_CHAR_ENCODING_NONE 92 | } 93 | case .utf32LittleEndian: 94 | return XML_CHAR_ENCODING_UCS4LE 95 | case .utf32BigEndian: 96 | return XML_CHAR_ENCODING_UCS4BE 97 | case .utf32: 98 | switch UInt32(CFByteOrderGetCurrent()) { 99 | case CFByteOrderBigEndian.rawValue: 100 | return XML_CHAR_ENCODING_UCS4BE 101 | 102 | case CFByteOrderLittleEndian.rawValue: 103 | return XML_CHAR_ENCODING_UCS4LE 104 | 105 | default: 106 | return XML_CHAR_ENCODING_NONE 107 | } 108 | case .isoLatin1: 109 | return XML_CHAR_ENCODING_8859_1 110 | case .isoLatin2: 111 | return XML_CHAR_ENCODING_8859_2 112 | case .japaneseEUC: 113 | return XML_CHAR_ENCODING_EUC_JP 114 | case .iso2022JP: 115 | return XML_CHAR_ENCODING_2022_JP 116 | case .shiftJIS: 117 | return XML_CHAR_ENCODING_SHIFT_JIS 118 | case .ascii: 119 | return XML_CHAR_ENCODING_ASCII 120 | 121 | default: 122 | return XML_CHAR_ENCODING_NONE 123 | } 124 | } 125 | 126 | /** 127 | Handle the parse result from the underlying libxml2 htmlParseDocument call. Determines if the parse method 128 | should throw a parsingFailure error. This will check the result did not end with a fatal error. Other less 129 | serious error levels will be considered a success. 130 | 131 | One success the method returns, otherwise the method throws an `HTMLParser.Error` 132 | 133 | - Parameter parseResult: The result from the libxml2 htmlParseDocument call. 134 | - Parameter handlerContext: The handler context. 135 | - Throws: `HTMLParser.Error` if the parsing ended with a fatel error. 136 | */ 137 | private func handleParseResult(_ parseResult: Int32, _ handlerContext: HTMLSAXParser.HandlerContext) throws { 138 | // htmlParseDocument returns zero for success, therefore if non-zero we need to check the last error. 139 | if parseResult != 0 { 140 | guard let error = handlerContext.lastError() else { 141 | // If no last error was found then just return. 142 | return 143 | } 144 | 145 | let errorLevel = error.pointee.level 146 | switch errorLevel { 147 | case XML_ERR_FATAL: // if fatal then throw a parsingFailure 148 | let message: String 149 | if let messageCString = error.pointee.message { 150 | message = String(cString: messageCString).trimmingCharacters(in: .whitespacesAndNewlines) 151 | } else { 152 | message = "" 153 | } 154 | 155 | let location = Location(line: Int(error.pointee.line), column: Int(error.pointee.int2)) 156 | 157 | throw Error.parsingFailure(location: location, message: message) 158 | default: // All other levels of error will be considered success 159 | break 160 | } 161 | } 162 | } 163 | 164 | private class HandlerContext: HTMLSAXParseContext { 165 | 166 | let handler: EventHandler 167 | var contextPtr: htmlParserCtxtPtr? 168 | 169 | init(handler: @escaping EventHandler) { 170 | self.handler = handler 171 | } 172 | 173 | var location: Location { 174 | guard let contextPtr = contextPtr else { 175 | return Location(line: 0, column: 0) 176 | } 177 | let lineNumber = Int(xmlSAX2GetLineNumber(contextPtr)) 178 | let columnNumber = Int(xmlSAX2GetColumnNumber(contextPtr)) 179 | let loc = Location(line: lineNumber, column: columnNumber) 180 | return loc 181 | } 182 | 183 | var systemId: String? { 184 | guard let contextPtr = contextPtr else { 185 | return nil 186 | } 187 | guard let systemId = xmlSAX2GetSystemId(contextPtr) else { 188 | return nil 189 | } 190 | return String(cString: systemId) 191 | } 192 | 193 | var publicId: String? { 194 | guard let contextPtr = contextPtr else { 195 | return nil 196 | } 197 | guard let publicId = xmlSAX2GetPublicId(contextPtr) else { 198 | return nil 199 | } 200 | return String(cString: publicId) 201 | 202 | } 203 | 204 | func abortParsing() { 205 | guard let contextPtr = contextPtr else { 206 | return 207 | } 208 | 209 | xmlStopParser(contextPtr) 210 | } 211 | 212 | fileprivate func lastError() -> xmlErrorPtr? { 213 | guard let contextPtr = contextPtr, let errorPtr = xmlCtxtGetLastError(contextPtr) else { 214 | return nil 215 | } 216 | return errorPtr 217 | } 218 | } 219 | 220 | /** 221 | Create a htmlSAXHandler instance for the libxml2 html parser. The created htmlSAXHandler struct 222 | will have the various function pointers set to the relevant Swift closures to process the event 223 | and forward the event to the EventHandler closure within the parsing context. 224 | 225 | - Returns: An instance of htmlSAXHandler with the function pointers set. 226 | */ 227 | // swiftlint:disable:next function_body_length cyclomatic_complexity 228 | private static func createSAXHandler() -> htmlSAXHandler { 229 | var handler = htmlSAXHandler() 230 | 231 | handler.startDocument = { (context: UnsafeMutableRawPointer?) in 232 | guard let context = context else { 233 | return 234 | } 235 | 236 | let handlerContext: HandlerContext = Unmanaged.fromOpaque(context).takeUnretainedValue() 237 | handlerContext.handler(handlerContext, .startDocument) 238 | } 239 | 240 | handler.endDocument = { (context: UnsafeMutableRawPointer?) in 241 | guard let context = context else { 242 | return 243 | } 244 | 245 | let handlerContext: HandlerContext = Unmanaged.fromOpaque(context).takeUnretainedValue() 246 | handlerContext.handler(handlerContext, .endDocument) 247 | } 248 | 249 | handler.startElement = { (context: UnsafeMutableRawPointer?, 250 | name: UnsafePointer?, 251 | attrs: UnsafeMutablePointer?>?) in 252 | guard let context = context, let name = name else { 253 | return 254 | } 255 | 256 | let handlerContext: HandlerContext = Unmanaged.fromOpaque(context).takeUnretainedValue() 257 | let elementName = String(cString: name) 258 | var elementAttributes: [String: String] = [:] 259 | 260 | if let attrs = attrs { 261 | var attrPtr = attrs.advanced(by: 0) 262 | 263 | while true { 264 | let attrName = attrPtr.pointee 265 | if let attrName = attrName { 266 | let attributeName = String(cString: attrName) 267 | attrPtr = attrPtr.advanced(by: 1) 268 | 269 | if let attrValue = attrPtr.pointee { 270 | let attributeValue = String(cString: attrValue) 271 | elementAttributes[attributeName] = attributeValue 272 | } else { 273 | // If the attribute does not have a value then use an empty string for value. 274 | elementAttributes[attributeName] = "" 275 | } 276 | attrPtr = attrPtr.advanced(by: 1) 277 | } else { 278 | break 279 | } 280 | } 281 | } 282 | 283 | handlerContext.handler(handlerContext, 284 | .startElement(name: elementName, 285 | attributes: elementAttributes)) 286 | } 287 | 288 | handler.endElement = { (context, name) in 289 | guard let context = context, let name = name else { 290 | return 291 | } 292 | 293 | let handlerContext: HandlerContext = Unmanaged.fromOpaque(context).takeUnretainedValue() 294 | let elementName = String(cString: name) 295 | 296 | handlerContext.handler(handlerContext, .endElement(name: elementName)) 297 | } 298 | 299 | handler.characters = { (context, characters, length) in 300 | guard let context = context, let characters = characters else { 301 | return 302 | } 303 | 304 | let ptr = UnsafeMutableRawPointer(OpaquePointer(characters)) 305 | let data = Data(bytesNoCopy: ptr, 306 | count: Int(length), 307 | deallocator: .none) 308 | guard let text = String(data: data, encoding: .utf8) else { 309 | return 310 | } 311 | 312 | let handlerContext: HandlerContext = Unmanaged.fromOpaque(context).takeUnretainedValue() 313 | handlerContext.handler(handlerContext, .characters(text: text)) 314 | 315 | } 316 | 317 | handler.processingInstruction = { (context, target, data) in 318 | guard let context = context, let target = target else { 319 | return 320 | } 321 | 322 | let targetString = String(cString: target) 323 | let dataString: String? 324 | if let data = data { 325 | dataString = String(cString: data) 326 | } else { 327 | dataString = nil 328 | } 329 | 330 | let handlerContext: HandlerContext = Unmanaged.fromOpaque(context).takeUnretainedValue() 331 | handlerContext.handler(handlerContext, 332 | .processingInstruction(target: targetString, 333 | data: dataString)) 334 | } 335 | 336 | handler.comment = { (context, comment) in 337 | guard let context = context, let comment = comment else { 338 | return 339 | } 340 | 341 | let commentString = String(cString: comment) 342 | let handlerContext: HandlerContext = Unmanaged.fromOpaque(context).takeUnretainedValue() 343 | handlerContext.handler(handlerContext, .comment(text: commentString)) 344 | } 345 | 346 | handler.cdataBlock = { (context, block, length) in 347 | guard let context = context, let block = block else { 348 | return 349 | } 350 | 351 | let dataBlock = Data(bytes: block, count: Int(length)) 352 | let handlerContext: HandlerContext = Unmanaged.fromOpaque(context).takeUnretainedValue() 353 | handlerContext.handler(handlerContext, .cdata(block: dataBlock)) 354 | } 355 | 356 | // Set the global error and warning handler functions. 357 | _ = HTMLSAXParser.globalErrorHandler 358 | _ = HTMLSAXParser.globalWarningHandler 359 | withUnsafeMutablePointer(to: &handler) { (handlerPtr) in 360 | htmlparser_set_global_error_handler(handlerPtr) 361 | htmlparser_set_global_warning_handler(handlerPtr) 362 | } 363 | 364 | return handler 365 | } 366 | 367 | private static let globalErrorHandler: HTMLParserWrappedErrorSAXFunc = { 368 | // We only want to set this global once ever. Regardless of the number of instances of parsers. 369 | htmlparser_global_error_sax_func = {context, message in 370 | guard let context = context else { 371 | return 372 | } 373 | 374 | let messageString: String 375 | if let message = message { 376 | messageString = String(cString: message).trimmingCharacters(in: .whitespacesAndNewlines) 377 | } else { 378 | messageString = "" 379 | } 380 | let handlerContext: HandlerContext = Unmanaged.fromOpaque(context).takeUnretainedValue() 381 | handlerContext.handler(handlerContext, .error(message: messageString)) 382 | } 383 | return htmlparser_global_error_sax_func 384 | }() 385 | private static let globalWarningHandler: HTMLParserWrappedWarningSAXFunc = { 386 | // We only want to set this global once ever. Regardless of the number of instances of parsers. 387 | htmlparser_global_warning_sax_func = { context, message in 388 | guard let context = context else { 389 | return 390 | } 391 | 392 | let messageString: String 393 | if let message = message { 394 | messageString = String(cString: message).trimmingCharacters(in: .whitespacesAndNewlines) 395 | } else { 396 | messageString = "" 397 | } 398 | let handlerContext: HandlerContext = Unmanaged.fromOpaque(context).takeUnretainedValue() 399 | handlerContext.handler(handlerContext, .warning(message: messageString)) 400 | } 401 | return htmlparser_global_warning_sax_func 402 | }() 403 | } 404 | -------------------------------------------------------------------------------- /HTMLSAXParser.xcodeproj/project.pbxproj: -------------------------------------------------------------------------------- 1 | // !$*UTF8*$! 2 | { 3 | archiveVersion = 1; 4 | classes = { 5 | }; 6 | objectVersion = 48; 7 | objects = { 8 | 9 | /* Begin PBXBuildFile section */ 10 | 4D161F411F228123002573EF /* HTMLEncodeEntitiesTests.swift in Sources */ = {isa = PBXBuildFile; fileRef = 4D161F401F228123002573EF /* HTMLEncodeEntitiesTests.swift */; }; 11 | 4D3D6DB11F3EDEFA00EE40CB /* HTMLSAXParser.swift in Sources */ = {isa = PBXBuildFile; fileRef = 4DDF8C3B1F211F01008C2135 /* HTMLSAXParser.swift */; }; 12 | 4D3D6DB21F3EDF0100EE40CB /* CHTMLSAXParser.h in Headers */ = {isa = PBXBuildFile; fileRef = 4D77F7351F2EFEB100C29ECC /* CHTMLSAXParser.h */; }; 13 | 4D3D6DB31F3EDF0500EE40CB /* CHTMLSAXParser.c in Sources */ = {isa = PBXBuildFile; fileRef = 4D77F7341F2EFEB100C29ECC /* CHTMLSAXParser.c */; }; 14 | 4D3D6DB41F3EDF0800EE40CB /* HTMLSAXParser+libxml2.swift in Sources */ = {isa = PBXBuildFile; fileRef = 4D9BB2241F215C5C0010D5A8 /* HTMLSAXParser+libxml2.swift */; }; 15 | 4D3D6DB51F3EDF0A00EE40CB /* EscapeSpecialCharacters.swift in Sources */ = {isa = PBXBuildFile; fileRef = 4D80C5571F21C84600E2F340 /* EscapeSpecialCharacters.swift */; }; 16 | 4D77F7361F2EFEB100C29ECC /* CHTMLSAXParser.c in Sources */ = {isa = PBXBuildFile; fileRef = 4D77F7341F2EFEB100C29ECC /* CHTMLSAXParser.c */; }; 17 | 4D77F7371F2EFEB100C29ECC /* CHTMLSAXParser.h in Headers */ = {isa = PBXBuildFile; fileRef = 4D77F7351F2EFEB100C29ECC /* CHTMLSAXParser.h */; }; 18 | 4D7F1DA51F3EED470080660A /* HTMLSAXParser.h in Headers */ = {isa = PBXBuildFile; fileRef = 4DDF8C241F211E2E008C2135 /* HTMLSAXParser.h */; settings = {ATTRIBUTES = (Public, ); }; }; 19 | 4D7F1DA71F3EED5B0080660A /* HTMLSAXParser.swift in Sources */ = {isa = PBXBuildFile; fileRef = 4DDF8C3B1F211F01008C2135 /* HTMLSAXParser.swift */; }; 20 | 4D7F1DA81F3EED5E0080660A /* CHTMLSAXParser.h in Headers */ = {isa = PBXBuildFile; fileRef = 4D77F7351F2EFEB100C29ECC /* CHTMLSAXParser.h */; }; 21 | 4D7F1DA91F3EED640080660A /* CHTMLSAXParser.c in Sources */ = {isa = PBXBuildFile; fileRef = 4D77F7341F2EFEB100C29ECC /* CHTMLSAXParser.c */; }; 22 | 4D7F1DAA1F3EED670080660A /* HTMLSAXParser+libxml2.swift in Sources */ = {isa = PBXBuildFile; fileRef = 4D9BB2241F215C5C0010D5A8 /* HTMLSAXParser+libxml2.swift */; }; 23 | 4D7F1DAB1F3EED6B0080660A /* EscapeSpecialCharacters.swift in Sources */ = {isa = PBXBuildFile; fileRef = 4D80C5571F21C84600E2F340 /* EscapeSpecialCharacters.swift */; }; 24 | 4D7F1DAD1F3EF59D0080660A /* article_with_images.html in Resources */ = {isa = PBXBuildFile; fileRef = 4D7F1DAC1F3EF5540080660A /* article_with_images.html */; }; 25 | 4D7F1DB21F40493B0080660A /* HTMLSAXParser.h in Headers */ = {isa = PBXBuildFile; fileRef = 4DDF8C241F211E2E008C2135 /* HTMLSAXParser.h */; settings = {ATTRIBUTES = (Public, ); }; }; 26 | 4D80C5581F21C84600E2F340 /* EscapeSpecialCharacters.swift in Sources */ = {isa = PBXBuildFile; fileRef = 4D80C5571F21C84600E2F340 /* EscapeSpecialCharacters.swift */; }; 27 | 4D9286BB1F3516F1001A16FC /* test_uft8.html in Resources */ = {isa = PBXBuildFile; fileRef = 4D9286BA1F3516E6001A16FC /* test_uft8.html */; }; 28 | 4D9286BC1F3516F4001A16FC /* test_uft16le.html in Resources */ = {isa = PBXBuildFile; fileRef = 4D9286B91F3516E6001A16FC /* test_uft16le.html */; }; 29 | 4D9BB2251F215C5C0010D5A8 /* HTMLSAXParser+libxml2.swift in Sources */ = {isa = PBXBuildFile; fileRef = 4D9BB2241F215C5C0010D5A8 /* HTMLSAXParser+libxml2.swift */; }; 30 | 4D9BB22D1F2160450010D5A8 /* AppDelegate.swift in Sources */ = {isa = PBXBuildFile; fileRef = 4D9BB22C1F2160450010D5A8 /* AppDelegate.swift */; }; 31 | 4D9BB22F1F2160450010D5A8 /* ViewController.swift in Sources */ = {isa = PBXBuildFile; fileRef = 4D9BB22E1F2160450010D5A8 /* ViewController.swift */; }; 32 | 4D9BB2321F2160450010D5A8 /* Main.storyboard in Resources */ = {isa = PBXBuildFile; fileRef = 4D9BB2301F2160450010D5A8 /* Main.storyboard */; }; 33 | 4D9BB2341F2160450010D5A8 /* Assets.xcassets in Resources */ = {isa = PBXBuildFile; fileRef = 4D9BB2331F2160450010D5A8 /* Assets.xcassets */; }; 34 | 4D9BB2371F2160450010D5A8 /* LaunchScreen.storyboard in Resources */ = {isa = PBXBuildFile; fileRef = 4D9BB2351F2160450010D5A8 /* LaunchScreen.storyboard */; }; 35 | 4D9BB23C1F21605E0010D5A8 /* HTMLSAXParser.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 4DDF8C211F211E2D008C2135 /* HTMLSAXParser.framework */; }; 36 | 4D9BB23D1F21605E0010D5A8 /* HTMLSAXParser.framework in Embed Frameworks */ = {isa = PBXBuildFile; fileRef = 4DDF8C211F211E2D008C2135 /* HTMLSAXParser.framework */; settings = {ATTRIBUTES = (CodeSignOnCopy, RemoveHeadersOnCopy, ); }; }; 37 | 4DDF8C2B1F211E2E008C2135 /* HTMLSAXParser.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 4DDF8C211F211E2D008C2135 /* HTMLSAXParser.framework */; }; 38 | 4DDF8C301F211E2E008C2135 /* HTMLParserTests.swift in Sources */ = {isa = PBXBuildFile; fileRef = 4DDF8C2F1F211E2E008C2135 /* HTMLParserTests.swift */; }; 39 | 4DDF8C321F211E2E008C2135 /* HTMLSAXParser.h in Headers */ = {isa = PBXBuildFile; fileRef = 4DDF8C241F211E2E008C2135 /* HTMLSAXParser.h */; settings = {ATTRIBUTES = (Public, ); }; }; 40 | 4DDF8C3C1F211F01008C2135 /* HTMLSAXParser.swift in Sources */ = {isa = PBXBuildFile; fileRef = 4DDF8C3B1F211F01008C2135 /* HTMLSAXParser.swift */; }; 41 | /* End PBXBuildFile section */ 42 | 43 | /* Begin PBXContainerItemProxy section */ 44 | 4D9BB23E1F21605E0010D5A8 /* PBXContainerItemProxy */ = { 45 | isa = PBXContainerItemProxy; 46 | containerPortal = 4DDF8C181F211E2D008C2135 /* Project object */; 47 | proxyType = 1; 48 | remoteGlobalIDString = 4DDF8C201F211E2D008C2135; 49 | remoteInfo = HTMLParser; 50 | }; 51 | 4DDF8C2C1F211E2E008C2135 /* PBXContainerItemProxy */ = { 52 | isa = PBXContainerItemProxy; 53 | containerPortal = 4DDF8C181F211E2D008C2135 /* Project object */; 54 | proxyType = 1; 55 | remoteGlobalIDString = 4DDF8C201F211E2D008C2135; 56 | remoteInfo = HTMLParser; 57 | }; 58 | /* End PBXContainerItemProxy section */ 59 | 60 | /* Begin PBXCopyFilesBuildPhase section */ 61 | 4D9BB2401F21605E0010D5A8 /* Embed Frameworks */ = { 62 | isa = PBXCopyFilesBuildPhase; 63 | buildActionMask = 2147483647; 64 | dstPath = ""; 65 | dstSubfolderSpec = 10; 66 | files = ( 67 | 4D9BB23D1F21605E0010D5A8 /* HTMLSAXParser.framework in Embed Frameworks */, 68 | ); 69 | name = "Embed Frameworks"; 70 | runOnlyForDeploymentPostprocessing = 0; 71 | }; 72 | /* End PBXCopyFilesBuildPhase section */ 73 | 74 | /* Begin PBXFileReference section */ 75 | 4D161F401F228123002573EF /* HTMLEncodeEntitiesTests.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = HTMLEncodeEntitiesTests.swift; sourceTree = ""; }; 76 | 4D3D6DA91F3EDEDA00EE40CB /* HTMLSAXParser.framework */ = {isa = PBXFileReference; explicitFileType = wrapper.framework; includeInIndex = 0; path = HTMLSAXParser.framework; sourceTree = BUILT_PRODUCTS_DIR; }; 77 | 4D77F7341F2EFEB100C29ECC /* CHTMLSAXParser.c */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.c; path = CHTMLSAXParser.c; sourceTree = ""; }; 78 | 4D77F7351F2EFEB100C29ECC /* CHTMLSAXParser.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; name = CHTMLSAXParser.h; path = include/CHTMLSAXParser.h; sourceTree = ""; }; 79 | 4D7F1D9D1F3EEC790080660A /* HTMLSAXParser.framework */ = {isa = PBXFileReference; explicitFileType = wrapper.framework; includeInIndex = 0; path = HTMLSAXParser.framework; sourceTree = BUILT_PRODUCTS_DIR; }; 80 | 4D7F1DAC1F3EF5540080660A /* article_with_images.html */ = {isa = PBXFileReference; lastKnownFileType = text.html; path = article_with_images.html; sourceTree = ""; }; 81 | 4D80C5571F21C84600E2F340 /* EscapeSpecialCharacters.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = EscapeSpecialCharacters.swift; sourceTree = ""; }; 82 | 4D9286B91F3516E6001A16FC /* test_uft16le.html */ = {isa = PBXFileReference; lastKnownFileType = text.html; path = test_uft16le.html; sourceTree = ""; }; 83 | 4D9286BA1F3516E6001A16FC /* test_uft8.html */ = {isa = PBXFileReference; lastKnownFileType = text.html; path = test_uft8.html; sourceTree = ""; }; 84 | 4D9BB2241F215C5C0010D5A8 /* HTMLSAXParser+libxml2.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = "HTMLSAXParser+libxml2.swift"; sourceTree = ""; }; 85 | 4D9BB22A1F2160450010D5A8 /* HTMLParserDemo.app */ = {isa = PBXFileReference; explicitFileType = wrapper.application; includeInIndex = 0; path = HTMLParserDemo.app; sourceTree = BUILT_PRODUCTS_DIR; }; 86 | 4D9BB22C1F2160450010D5A8 /* AppDelegate.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = AppDelegate.swift; sourceTree = ""; }; 87 | 4D9BB22E1F2160450010D5A8 /* ViewController.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ViewController.swift; sourceTree = ""; }; 88 | 4D9BB2311F2160450010D5A8 /* Base */ = {isa = PBXFileReference; lastKnownFileType = file.storyboard; name = Base; path = Base.lproj/Main.storyboard; sourceTree = ""; }; 89 | 4D9BB2331F2160450010D5A8 /* Assets.xcassets */ = {isa = PBXFileReference; lastKnownFileType = folder.assetcatalog; path = Assets.xcassets; sourceTree = ""; }; 90 | 4D9BB2361F2160450010D5A8 /* Base */ = {isa = PBXFileReference; lastKnownFileType = file.storyboard; name = Base; path = Base.lproj/LaunchScreen.storyboard; sourceTree = ""; }; 91 | 4D9BB2381F2160450010D5A8 /* Info.plist */ = {isa = PBXFileReference; lastKnownFileType = text.plist.xml; path = Info.plist; sourceTree = ""; }; 92 | 4DDF8C211F211E2D008C2135 /* HTMLSAXParser.framework */ = {isa = PBXFileReference; explicitFileType = wrapper.framework; includeInIndex = 0; path = HTMLSAXParser.framework; sourceTree = BUILT_PRODUCTS_DIR; }; 93 | 4DDF8C241F211E2E008C2135 /* HTMLSAXParser.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = HTMLSAXParser.h; sourceTree = ""; }; 94 | 4DDF8C2A1F211E2E008C2135 /* HTMLSAXParserTests.xctest */ = {isa = PBXFileReference; explicitFileType = wrapper.cfbundle; includeInIndex = 0; path = HTMLSAXParserTests.xctest; sourceTree = BUILT_PRODUCTS_DIR; }; 95 | 4DDF8C2F1F211E2E008C2135 /* HTMLParserTests.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = HTMLParserTests.swift; sourceTree = ""; }; 96 | 4DDF8C311F211E2E008C2135 /* Info.plist */ = {isa = PBXFileReference; lastKnownFileType = text.plist.xml; path = Info.plist; sourceTree = ""; }; 97 | 4DDF8C3B1F211F01008C2135 /* HTMLSAXParser.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = HTMLSAXParser.swift; sourceTree = ""; }; 98 | /* End PBXFileReference section */ 99 | 100 | /* Begin PBXFrameworksBuildPhase section */ 101 | 4D3D6DA51F3EDEDA00EE40CB /* Frameworks */ = { 102 | isa = PBXFrameworksBuildPhase; 103 | buildActionMask = 2147483647; 104 | files = ( 105 | ); 106 | runOnlyForDeploymentPostprocessing = 0; 107 | }; 108 | 4D7F1D991F3EEC790080660A /* Frameworks */ = { 109 | isa = PBXFrameworksBuildPhase; 110 | buildActionMask = 2147483647; 111 | files = ( 112 | ); 113 | runOnlyForDeploymentPostprocessing = 0; 114 | }; 115 | 4D9BB2271F2160450010D5A8 /* Frameworks */ = { 116 | isa = PBXFrameworksBuildPhase; 117 | buildActionMask = 2147483647; 118 | files = ( 119 | 4D9BB23C1F21605E0010D5A8 /* HTMLSAXParser.framework in Frameworks */, 120 | ); 121 | runOnlyForDeploymentPostprocessing = 0; 122 | }; 123 | 4DDF8C1D1F211E2D008C2135 /* Frameworks */ = { 124 | isa = PBXFrameworksBuildPhase; 125 | buildActionMask = 2147483647; 126 | files = ( 127 | ); 128 | runOnlyForDeploymentPostprocessing = 0; 129 | }; 130 | 4DDF8C271F211E2E008C2135 /* Frameworks */ = { 131 | isa = PBXFrameworksBuildPhase; 132 | buildActionMask = 2147483647; 133 | files = ( 134 | 4DDF8C2B1F211E2E008C2135 /* HTMLSAXParser.framework in Frameworks */, 135 | ); 136 | runOnlyForDeploymentPostprocessing = 0; 137 | }; 138 | /* End PBXFrameworksBuildPhase section */ 139 | 140 | /* Begin PBXGroup section */ 141 | 4D9286B81F3516C3001A16FC /* testdata */ = { 142 | isa = PBXGroup; 143 | children = ( 144 | 4D9286BA1F3516E6001A16FC /* test_uft8.html */, 145 | 4D9286B91F3516E6001A16FC /* test_uft16le.html */, 146 | 4D7F1DAC1F3EF5540080660A /* article_with_images.html */, 147 | ); 148 | path = testdata; 149 | sourceTree = ""; 150 | }; 151 | 4D950ABA1F48216A009825FD /* HTMLSAXParser */ = { 152 | isa = PBXGroup; 153 | children = ( 154 | 4DDF8C241F211E2E008C2135 /* HTMLSAXParser.h */, 155 | 4DDF8C3B1F211F01008C2135 /* HTMLSAXParser.swift */, 156 | 4D9BB2241F215C5C0010D5A8 /* HTMLSAXParser+libxml2.swift */, 157 | 4D80C5571F21C84600E2F340 /* EscapeSpecialCharacters.swift */, 158 | ); 159 | path = HTMLSAXParser; 160 | sourceTree = ""; 161 | }; 162 | 4D950ABB1F4821EA009825FD /* CHTMLSAXParser */ = { 163 | isa = PBXGroup; 164 | children = ( 165 | 4D77F7351F2EFEB100C29ECC /* CHTMLSAXParser.h */, 166 | 4D77F7341F2EFEB100C29ECC /* CHTMLSAXParser.c */, 167 | ); 168 | path = CHTMLSAXParser; 169 | sourceTree = ""; 170 | }; 171 | 4D9BB22B1F2160450010D5A8 /* HTMLParserDemo */ = { 172 | isa = PBXGroup; 173 | children = ( 174 | 4D9BB22C1F2160450010D5A8 /* AppDelegate.swift */, 175 | 4D9BB22E1F2160450010D5A8 /* ViewController.swift */, 176 | 4D9BB2301F2160450010D5A8 /* Main.storyboard */, 177 | 4D9BB2331F2160450010D5A8 /* Assets.xcassets */, 178 | 4D9BB2351F2160450010D5A8 /* LaunchScreen.storyboard */, 179 | 4D9BB2381F2160450010D5A8 /* Info.plist */, 180 | ); 181 | path = HTMLParserDemo; 182 | sourceTree = ""; 183 | }; 184 | 4DDF8C171F211E2D008C2135 = { 185 | isa = PBXGroup; 186 | children = ( 187 | 4DDF8C231F211E2D008C2135 /* Sources */, 188 | 4DDF8C2E1F211E2E008C2135 /* Tests */, 189 | 4D9BB22B1F2160450010D5A8 /* HTMLParserDemo */, 190 | 4DDF8C221F211E2D008C2135 /* Products */, 191 | ); 192 | sourceTree = ""; 193 | }; 194 | 4DDF8C221F211E2D008C2135 /* Products */ = { 195 | isa = PBXGroup; 196 | children = ( 197 | 4DDF8C211F211E2D008C2135 /* HTMLSAXParser.framework */, 198 | 4DDF8C2A1F211E2E008C2135 /* HTMLSAXParserTests.xctest */, 199 | 4D9BB22A1F2160450010D5A8 /* HTMLParserDemo.app */, 200 | 4D3D6DA91F3EDEDA00EE40CB /* HTMLSAXParser.framework */, 201 | 4D7F1D9D1F3EEC790080660A /* HTMLSAXParser.framework */, 202 | ); 203 | name = Products; 204 | sourceTree = ""; 205 | }; 206 | 4DDF8C231F211E2D008C2135 /* Sources */ = { 207 | isa = PBXGroup; 208 | children = ( 209 | 4D950ABA1F48216A009825FD /* HTMLSAXParser */, 210 | 4D950ABB1F4821EA009825FD /* CHTMLSAXParser */, 211 | ); 212 | path = Sources; 213 | sourceTree = ""; 214 | }; 215 | 4DDF8C2E1F211E2E008C2135 /* Tests */ = { 216 | isa = PBXGroup; 217 | children = ( 218 | 4D9286B81F3516C3001A16FC /* testdata */, 219 | 4DDF8C311F211E2E008C2135 /* Info.plist */, 220 | 4DDF8C2F1F211E2E008C2135 /* HTMLParserTests.swift */, 221 | 4D161F401F228123002573EF /* HTMLEncodeEntitiesTests.swift */, 222 | ); 223 | name = Tests; 224 | path = Tests/HTMLSAXParser; 225 | sourceTree = ""; 226 | }; 227 | /* End PBXGroup section */ 228 | 229 | /* Begin PBXHeadersBuildPhase section */ 230 | 4D3D6DA61F3EDEDA00EE40CB /* Headers */ = { 231 | isa = PBXHeadersBuildPhase; 232 | buildActionMask = 2147483647; 233 | files = ( 234 | 4D3D6DB21F3EDF0100EE40CB /* CHTMLSAXParser.h in Headers */, 235 | 4D7F1DB21F40493B0080660A /* HTMLSAXParser.h in Headers */, 236 | ); 237 | runOnlyForDeploymentPostprocessing = 0; 238 | }; 239 | 4D7F1D9A1F3EEC790080660A /* Headers */ = { 240 | isa = PBXHeadersBuildPhase; 241 | buildActionMask = 2147483647; 242 | files = ( 243 | 4D7F1DA51F3EED470080660A /* HTMLSAXParser.h in Headers */, 244 | 4D7F1DA81F3EED5E0080660A /* CHTMLSAXParser.h in Headers */, 245 | ); 246 | runOnlyForDeploymentPostprocessing = 0; 247 | }; 248 | 4DDF8C1E1F211E2D008C2135 /* Headers */ = { 249 | isa = PBXHeadersBuildPhase; 250 | buildActionMask = 2147483647; 251 | files = ( 252 | 4DDF8C321F211E2E008C2135 /* HTMLSAXParser.h in Headers */, 253 | 4D77F7371F2EFEB100C29ECC /* CHTMLSAXParser.h in Headers */, 254 | ); 255 | runOnlyForDeploymentPostprocessing = 0; 256 | }; 257 | /* End PBXHeadersBuildPhase section */ 258 | 259 | /* Begin PBXNativeTarget section */ 260 | 4D3D6DA81F3EDEDA00EE40CB /* HTMLSAXParser MacOS */ = { 261 | isa = PBXNativeTarget; 262 | buildConfigurationList = 4D3D6DAE1F3EDEDA00EE40CB /* Build configuration list for PBXNativeTarget "HTMLSAXParser MacOS" */; 263 | buildPhases = ( 264 | 4D3D6DA41F3EDEDA00EE40CB /* Sources */, 265 | 4D3D6DA51F3EDEDA00EE40CB /* Frameworks */, 266 | 4D3D6DA61F3EDEDA00EE40CB /* Headers */, 267 | 4D3D6DA71F3EDEDA00EE40CB /* Resources */, 268 | ); 269 | buildRules = ( 270 | ); 271 | dependencies = ( 272 | ); 273 | name = "HTMLSAXParser MacOS"; 274 | productName = "HTMLSAXParser MacOS"; 275 | productReference = 4D3D6DA91F3EDEDA00EE40CB /* HTMLSAXParser.framework */; 276 | productType = "com.apple.product-type.framework"; 277 | }; 278 | 4D7F1D9C1F3EEC790080660A /* HTMLSAXParser AppleTV */ = { 279 | isa = PBXNativeTarget; 280 | buildConfigurationList = 4D7F1DA21F3EEC790080660A /* Build configuration list for PBXNativeTarget "HTMLSAXParser AppleTV" */; 281 | buildPhases = ( 282 | 4D7F1D981F3EEC790080660A /* Sources */, 283 | 4D7F1D991F3EEC790080660A /* Frameworks */, 284 | 4D7F1D9A1F3EEC790080660A /* Headers */, 285 | 4D7F1D9B1F3EEC790080660A /* Resources */, 286 | ); 287 | buildRules = ( 288 | ); 289 | dependencies = ( 290 | ); 291 | name = "HTMLSAXParser AppleTV"; 292 | productName = HTMLSAXParser; 293 | productReference = 4D7F1D9D1F3EEC790080660A /* HTMLSAXParser.framework */; 294 | productType = "com.apple.product-type.framework"; 295 | }; 296 | 4D9BB2291F2160450010D5A8 /* HTMLParserDemo */ = { 297 | isa = PBXNativeTarget; 298 | buildConfigurationList = 4D9BB2391F2160450010D5A8 /* Build configuration list for PBXNativeTarget "HTMLParserDemo" */; 299 | buildPhases = ( 300 | 4D9BB2261F2160450010D5A8 /* Sources */, 301 | 4D9BB2271F2160450010D5A8 /* Frameworks */, 302 | 4D9BB2281F2160450010D5A8 /* Resources */, 303 | 4D9BB2401F21605E0010D5A8 /* Embed Frameworks */, 304 | ); 305 | buildRules = ( 306 | ); 307 | dependencies = ( 308 | 4D9BB23F1F21605E0010D5A8 /* PBXTargetDependency */, 309 | ); 310 | name = HTMLParserDemo; 311 | productName = HTMLParserDemo; 312 | productReference = 4D9BB22A1F2160450010D5A8 /* HTMLParserDemo.app */; 313 | productType = "com.apple.product-type.application"; 314 | }; 315 | 4DDF8C201F211E2D008C2135 /* HTMLSAXParser iOS */ = { 316 | isa = PBXNativeTarget; 317 | buildConfigurationList = 4DDF8C351F211E2E008C2135 /* Build configuration list for PBXNativeTarget "HTMLSAXParser iOS" */; 318 | buildPhases = ( 319 | 4D63D6581F9CB6F6009EC3CB /* SwiftLint */, 320 | 4DDF8C1C1F211E2D008C2135 /* Sources */, 321 | 4DDF8C1D1F211E2D008C2135 /* Frameworks */, 322 | 4DDF8C1E1F211E2D008C2135 /* Headers */, 323 | 4DDF8C1F1F211E2D008C2135 /* Resources */, 324 | ); 325 | buildRules = ( 326 | ); 327 | dependencies = ( 328 | ); 329 | name = "HTMLSAXParser iOS"; 330 | productName = HTMLParser; 331 | productReference = 4DDF8C211F211E2D008C2135 /* HTMLSAXParser.framework */; 332 | productType = "com.apple.product-type.framework"; 333 | }; 334 | 4DDF8C291F211E2E008C2135 /* HTMLSAXParserTests */ = { 335 | isa = PBXNativeTarget; 336 | buildConfigurationList = 4DDF8C381F211E2E008C2135 /* Build configuration list for PBXNativeTarget "HTMLSAXParserTests" */; 337 | buildPhases = ( 338 | 4DDF8C261F211E2E008C2135 /* Sources */, 339 | 4DDF8C271F211E2E008C2135 /* Frameworks */, 340 | 4DDF8C281F211E2E008C2135 /* Resources */, 341 | ); 342 | buildRules = ( 343 | ); 344 | dependencies = ( 345 | 4DDF8C2D1F211E2E008C2135 /* PBXTargetDependency */, 346 | ); 347 | name = HTMLSAXParserTests; 348 | productName = HTMLParserTests; 349 | productReference = 4DDF8C2A1F211E2E008C2135 /* HTMLSAXParserTests.xctest */; 350 | productType = "com.apple.product-type.bundle.unit-test"; 351 | }; 352 | /* End PBXNativeTarget section */ 353 | 354 | /* Begin PBXProject section */ 355 | 4DDF8C181F211E2D008C2135 /* Project object */ = { 356 | isa = PBXProject; 357 | attributes = { 358 | LastSwiftUpdateCheck = 0900; 359 | LastUpgradeCheck = 0900; 360 | ORGANIZATIONNAME = "Raymond Mccrae"; 361 | TargetAttributes = { 362 | 4D3D6DA81F3EDEDA00EE40CB = { 363 | CreatedOnToolsVersion = 9.0; 364 | }; 365 | 4D7F1D9C1F3EEC790080660A = { 366 | CreatedOnToolsVersion = 9.0; 367 | }; 368 | 4D9BB2291F2160450010D5A8 = { 369 | CreatedOnToolsVersion = 9.0; 370 | }; 371 | 4DDF8C201F211E2D008C2135 = { 372 | CreatedOnToolsVersion = 9.0; 373 | LastSwiftMigration = 0900; 374 | }; 375 | 4DDF8C291F211E2E008C2135 = { 376 | CreatedOnToolsVersion = 9.0; 377 | }; 378 | }; 379 | }; 380 | buildConfigurationList = 4DDF8C1B1F211E2D008C2135 /* Build configuration list for PBXProject "HTMLSAXParser" */; 381 | compatibilityVersion = "Xcode 8.0"; 382 | developmentRegion = en; 383 | hasScannedForEncodings = 0; 384 | knownRegions = ( 385 | en, 386 | Base, 387 | ); 388 | mainGroup = 4DDF8C171F211E2D008C2135; 389 | productRefGroup = 4DDF8C221F211E2D008C2135 /* Products */; 390 | projectDirPath = ""; 391 | projectRoot = ""; 392 | targets = ( 393 | 4DDF8C201F211E2D008C2135 /* HTMLSAXParser iOS */, 394 | 4D3D6DA81F3EDEDA00EE40CB /* HTMLSAXParser MacOS */, 395 | 4D7F1D9C1F3EEC790080660A /* HTMLSAXParser AppleTV */, 396 | 4DDF8C291F211E2E008C2135 /* HTMLSAXParserTests */, 397 | 4D9BB2291F2160450010D5A8 /* HTMLParserDemo */, 398 | ); 399 | }; 400 | /* End PBXProject section */ 401 | 402 | /* Begin PBXResourcesBuildPhase section */ 403 | 4D3D6DA71F3EDEDA00EE40CB /* Resources */ = { 404 | isa = PBXResourcesBuildPhase; 405 | buildActionMask = 2147483647; 406 | files = ( 407 | ); 408 | runOnlyForDeploymentPostprocessing = 0; 409 | }; 410 | 4D7F1D9B1F3EEC790080660A /* Resources */ = { 411 | isa = PBXResourcesBuildPhase; 412 | buildActionMask = 2147483647; 413 | files = ( 414 | ); 415 | runOnlyForDeploymentPostprocessing = 0; 416 | }; 417 | 4D9BB2281F2160450010D5A8 /* Resources */ = { 418 | isa = PBXResourcesBuildPhase; 419 | buildActionMask = 2147483647; 420 | files = ( 421 | 4D9BB2371F2160450010D5A8 /* LaunchScreen.storyboard in Resources */, 422 | 4D9BB2341F2160450010D5A8 /* Assets.xcassets in Resources */, 423 | 4D9BB2321F2160450010D5A8 /* Main.storyboard in Resources */, 424 | ); 425 | runOnlyForDeploymentPostprocessing = 0; 426 | }; 427 | 4DDF8C1F1F211E2D008C2135 /* Resources */ = { 428 | isa = PBXResourcesBuildPhase; 429 | buildActionMask = 2147483647; 430 | files = ( 431 | ); 432 | runOnlyForDeploymentPostprocessing = 0; 433 | }; 434 | 4DDF8C281F211E2E008C2135 /* Resources */ = { 435 | isa = PBXResourcesBuildPhase; 436 | buildActionMask = 2147483647; 437 | files = ( 438 | 4D9286BC1F3516F4001A16FC /* test_uft16le.html in Resources */, 439 | 4D7F1DAD1F3EF59D0080660A /* article_with_images.html in Resources */, 440 | 4D9286BB1F3516F1001A16FC /* test_uft8.html in Resources */, 441 | ); 442 | runOnlyForDeploymentPostprocessing = 0; 443 | }; 444 | /* End PBXResourcesBuildPhase section */ 445 | 446 | /* Begin PBXShellScriptBuildPhase section */ 447 | 4D63D6581F9CB6F6009EC3CB /* SwiftLint */ = { 448 | isa = PBXShellScriptBuildPhase; 449 | buildActionMask = 2147483647; 450 | files = ( 451 | ); 452 | inputPaths = ( 453 | ); 454 | name = SwiftLint; 455 | outputPaths = ( 456 | ); 457 | runOnlyForDeploymentPostprocessing = 0; 458 | shellPath = /bin/sh; 459 | shellScript = "if which swiftlint >/dev/null; then\n swiftlint\nelse\n\techo \"warning: SwiftLint not installed, download from https://github.com/realm/SwiftLint\"\nfi"; 460 | }; 461 | /* End PBXShellScriptBuildPhase section */ 462 | 463 | /* Begin PBXSourcesBuildPhase section */ 464 | 4D3D6DA41F3EDEDA00EE40CB /* Sources */ = { 465 | isa = PBXSourcesBuildPhase; 466 | buildActionMask = 2147483647; 467 | files = ( 468 | 4D3D6DB31F3EDF0500EE40CB /* CHTMLSAXParser.c in Sources */, 469 | 4D3D6DB11F3EDEFA00EE40CB /* HTMLSAXParser.swift in Sources */, 470 | 4D3D6DB51F3EDF0A00EE40CB /* EscapeSpecialCharacters.swift in Sources */, 471 | 4D3D6DB41F3EDF0800EE40CB /* HTMLSAXParser+libxml2.swift in Sources */, 472 | ); 473 | runOnlyForDeploymentPostprocessing = 0; 474 | }; 475 | 4D7F1D981F3EEC790080660A /* Sources */ = { 476 | isa = PBXSourcesBuildPhase; 477 | buildActionMask = 2147483647; 478 | files = ( 479 | 4D7F1DA91F3EED640080660A /* CHTMLSAXParser.c in Sources */, 480 | 4D7F1DA71F3EED5B0080660A /* HTMLSAXParser.swift in Sources */, 481 | 4D7F1DAB1F3EED6B0080660A /* EscapeSpecialCharacters.swift in Sources */, 482 | 4D7F1DAA1F3EED670080660A /* HTMLSAXParser+libxml2.swift in Sources */, 483 | ); 484 | runOnlyForDeploymentPostprocessing = 0; 485 | }; 486 | 4D9BB2261F2160450010D5A8 /* Sources */ = { 487 | isa = PBXSourcesBuildPhase; 488 | buildActionMask = 2147483647; 489 | files = ( 490 | 4D9BB22F1F2160450010D5A8 /* ViewController.swift in Sources */, 491 | 4D9BB22D1F2160450010D5A8 /* AppDelegate.swift in Sources */, 492 | ); 493 | runOnlyForDeploymentPostprocessing = 0; 494 | }; 495 | 4DDF8C1C1F211E2D008C2135 /* Sources */ = { 496 | isa = PBXSourcesBuildPhase; 497 | buildActionMask = 2147483647; 498 | files = ( 499 | 4D77F7361F2EFEB100C29ECC /* CHTMLSAXParser.c in Sources */, 500 | 4D9BB2251F215C5C0010D5A8 /* HTMLSAXParser+libxml2.swift in Sources */, 501 | 4D80C5581F21C84600E2F340 /* EscapeSpecialCharacters.swift in Sources */, 502 | 4DDF8C3C1F211F01008C2135 /* HTMLSAXParser.swift in Sources */, 503 | ); 504 | runOnlyForDeploymentPostprocessing = 0; 505 | }; 506 | 4DDF8C261F211E2E008C2135 /* Sources */ = { 507 | isa = PBXSourcesBuildPhase; 508 | buildActionMask = 2147483647; 509 | files = ( 510 | 4DDF8C301F211E2E008C2135 /* HTMLParserTests.swift in Sources */, 511 | 4D161F411F228123002573EF /* HTMLEncodeEntitiesTests.swift in Sources */, 512 | ); 513 | runOnlyForDeploymentPostprocessing = 0; 514 | }; 515 | /* End PBXSourcesBuildPhase section */ 516 | 517 | /* Begin PBXTargetDependency section */ 518 | 4D9BB23F1F21605E0010D5A8 /* PBXTargetDependency */ = { 519 | isa = PBXTargetDependency; 520 | target = 4DDF8C201F211E2D008C2135 /* HTMLSAXParser iOS */; 521 | targetProxy = 4D9BB23E1F21605E0010D5A8 /* PBXContainerItemProxy */; 522 | }; 523 | 4DDF8C2D1F211E2E008C2135 /* PBXTargetDependency */ = { 524 | isa = PBXTargetDependency; 525 | target = 4DDF8C201F211E2D008C2135 /* HTMLSAXParser iOS */; 526 | targetProxy = 4DDF8C2C1F211E2E008C2135 /* PBXContainerItemProxy */; 527 | }; 528 | /* End PBXTargetDependency section */ 529 | 530 | /* Begin PBXVariantGroup section */ 531 | 4D9BB2301F2160450010D5A8 /* Main.storyboard */ = { 532 | isa = PBXVariantGroup; 533 | children = ( 534 | 4D9BB2311F2160450010D5A8 /* Base */, 535 | ); 536 | name = Main.storyboard; 537 | sourceTree = ""; 538 | }; 539 | 4D9BB2351F2160450010D5A8 /* LaunchScreen.storyboard */ = { 540 | isa = PBXVariantGroup; 541 | children = ( 542 | 4D9BB2361F2160450010D5A8 /* Base */, 543 | ); 544 | name = LaunchScreen.storyboard; 545 | sourceTree = ""; 546 | }; 547 | /* End PBXVariantGroup section */ 548 | 549 | /* Begin XCBuildConfiguration section */ 550 | 4D3D6DAF1F3EDEDA00EE40CB /* Debug */ = { 551 | isa = XCBuildConfiguration; 552 | buildSettings = { 553 | CODE_SIGN_IDENTITY = "-"; 554 | COMBINE_HIDPI_IMAGES = YES; 555 | DEFINES_MODULE = YES; 556 | DYLIB_COMPATIBILITY_VERSION = 1; 557 | DYLIB_CURRENT_VERSION = 1; 558 | DYLIB_INSTALL_NAME_BASE = "@rpath"; 559 | FRAMEWORK_VERSION = A; 560 | HEADER_SEARCH_PATHS = "$(SDKROOT)/usr/include/libxml2"; 561 | INFOPLIST_FILE = Info_MacOS.plist; 562 | INSTALL_PATH = "$(LOCAL_LIBRARY_DIR)/Frameworks"; 563 | LD_RUNPATH_SEARCH_PATHS = "$(inherited) @executable_path/../Frameworks @loader_path/Frameworks"; 564 | MACOSX_DEPLOYMENT_TARGET = 10.9; 565 | PRODUCT_BUNDLE_IDENTIFIER = "com.yahoo.raymccrae.HTMLSAXParser-MacOS"; 566 | PRODUCT_NAME = HTMLSAXParser; 567 | SDKROOT = macosx; 568 | SKIP_INSTALL = YES; 569 | SWIFT_INCLUDE_PATHS = "$(PROJECT_DIR)/Sources/CHTMLSAXParser/include"; 570 | }; 571 | name = Debug; 572 | }; 573 | 4D3D6DB01F3EDEDA00EE40CB /* Release */ = { 574 | isa = XCBuildConfiguration; 575 | buildSettings = { 576 | CODE_SIGN_IDENTITY = "-"; 577 | COMBINE_HIDPI_IMAGES = YES; 578 | DEFINES_MODULE = YES; 579 | DYLIB_COMPATIBILITY_VERSION = 1; 580 | DYLIB_CURRENT_VERSION = 1; 581 | DYLIB_INSTALL_NAME_BASE = "@rpath"; 582 | FRAMEWORK_VERSION = A; 583 | HEADER_SEARCH_PATHS = "$(SDKROOT)/usr/include/libxml2"; 584 | INFOPLIST_FILE = Info_MacOS.plist; 585 | INSTALL_PATH = "$(LOCAL_LIBRARY_DIR)/Frameworks"; 586 | LD_RUNPATH_SEARCH_PATHS = "$(inherited) @executable_path/../Frameworks @loader_path/Frameworks"; 587 | MACOSX_DEPLOYMENT_TARGET = 10.9; 588 | PRODUCT_BUNDLE_IDENTIFIER = "com.yahoo.raymccrae.HTMLSAXParser-MacOS"; 589 | PRODUCT_NAME = HTMLSAXParser; 590 | SDKROOT = macosx; 591 | SKIP_INSTALL = YES; 592 | SWIFT_INCLUDE_PATHS = "$(PROJECT_DIR)/Sources/CHTMLSAXParser/include"; 593 | }; 594 | name = Release; 595 | }; 596 | 4D7F1DA31F3EEC790080660A /* Debug */ = { 597 | isa = XCBuildConfiguration; 598 | buildSettings = { 599 | CODE_SIGN_IDENTITY = ""; 600 | DEFINES_MODULE = YES; 601 | DYLIB_COMPATIBILITY_VERSION = 1; 602 | DYLIB_CURRENT_VERSION = 1; 603 | DYLIB_INSTALL_NAME_BASE = "@rpath"; 604 | HEADER_SEARCH_PATHS = "$(SDKROOT)/usr/include/libxml2"; 605 | INFOPLIST_FILE = Info_AppleTV.plist; 606 | INSTALL_PATH = "$(LOCAL_LIBRARY_DIR)/Frameworks"; 607 | LD_RUNPATH_SEARCH_PATHS = "$(inherited) @executable_path/Frameworks @loader_path/Frameworks"; 608 | PRODUCT_BUNDLE_IDENTIFIER = com.yahoo.raymccrae.HTMLSAXParser; 609 | PRODUCT_NAME = HTMLSAXParser; 610 | SDKROOT = appletvos; 611 | SKIP_INSTALL = YES; 612 | SWIFT_INCLUDE_PATHS = "$(PROJECT_DIR)/Sources/CHTMLSAXParser/include"; 613 | TARGETED_DEVICE_FAMILY = 3; 614 | TVOS_DEPLOYMENT_TARGET = 9.0; 615 | }; 616 | name = Debug; 617 | }; 618 | 4D7F1DA41F3EEC790080660A /* Release */ = { 619 | isa = XCBuildConfiguration; 620 | buildSettings = { 621 | CODE_SIGN_IDENTITY = ""; 622 | DEFINES_MODULE = YES; 623 | DYLIB_COMPATIBILITY_VERSION = 1; 624 | DYLIB_CURRENT_VERSION = 1; 625 | DYLIB_INSTALL_NAME_BASE = "@rpath"; 626 | HEADER_SEARCH_PATHS = "$(SDKROOT)/usr/include/libxml2"; 627 | INFOPLIST_FILE = Info_AppleTV.plist; 628 | INSTALL_PATH = "$(LOCAL_LIBRARY_DIR)/Frameworks"; 629 | LD_RUNPATH_SEARCH_PATHS = "$(inherited) @executable_path/Frameworks @loader_path/Frameworks"; 630 | PRODUCT_BUNDLE_IDENTIFIER = com.yahoo.raymccrae.HTMLSAXParser; 631 | PRODUCT_NAME = HTMLSAXParser; 632 | SDKROOT = appletvos; 633 | SKIP_INSTALL = YES; 634 | SWIFT_INCLUDE_PATHS = "$(PROJECT_DIR)/Sources/CHTMLSAXParser/include"; 635 | TARGETED_DEVICE_FAMILY = 3; 636 | TVOS_DEPLOYMENT_TARGET = 9.0; 637 | }; 638 | name = Release; 639 | }; 640 | 4D9BB23A1F2160450010D5A8 /* Debug */ = { 641 | isa = XCBuildConfiguration; 642 | buildSettings = { 643 | ALWAYS_EMBED_SWIFT_STANDARD_LIBRARIES = YES; 644 | ASSETCATALOG_COMPILER_APPICON_NAME = AppIcon; 645 | HEADER_SEARCH_PATHS = "$(SDKROOT)/usr/include/libxml2"; 646 | INFOPLIST_FILE = HTMLParserDemo/Info.plist; 647 | LD_RUNPATH_SEARCH_PATHS = "$(inherited) @executable_path/Frameworks"; 648 | PRODUCT_BUNDLE_IDENTIFIER = com.yahoo.raymccrae.HTMLParserDemo; 649 | PRODUCT_NAME = "$(TARGET_NAME)"; 650 | TARGETED_DEVICE_FAMILY = "1,2"; 651 | }; 652 | name = Debug; 653 | }; 654 | 4D9BB23B1F2160450010D5A8 /* Release */ = { 655 | isa = XCBuildConfiguration; 656 | buildSettings = { 657 | ALWAYS_EMBED_SWIFT_STANDARD_LIBRARIES = YES; 658 | ASSETCATALOG_COMPILER_APPICON_NAME = AppIcon; 659 | HEADER_SEARCH_PATHS = "$(SDKROOT)/usr/include/libxml2"; 660 | INFOPLIST_FILE = HTMLParserDemo/Info.plist; 661 | LD_RUNPATH_SEARCH_PATHS = "$(inherited) @executable_path/Frameworks"; 662 | PRODUCT_BUNDLE_IDENTIFIER = com.yahoo.raymccrae.HTMLParserDemo; 663 | PRODUCT_NAME = "$(TARGET_NAME)"; 664 | TARGETED_DEVICE_FAMILY = "1,2"; 665 | }; 666 | name = Release; 667 | }; 668 | 4DDF8C331F211E2E008C2135 /* Debug */ = { 669 | isa = XCBuildConfiguration; 670 | buildSettings = { 671 | ALWAYS_SEARCH_USER_PATHS = NO; 672 | CLANG_ANALYZER_NONNULL = YES; 673 | CLANG_ANALYZER_NUMBER_OBJECT_CONVERSION = YES_AGGRESSIVE; 674 | CLANG_CXX_LANGUAGE_STANDARD = "gnu++14"; 675 | CLANG_CXX_LIBRARY = "libc++"; 676 | CLANG_ENABLE_MODULES = YES; 677 | CLANG_ENABLE_OBJC_ARC = YES; 678 | CLANG_WARN_BLOCK_CAPTURE_AUTORELEASING = YES; 679 | CLANG_WARN_BOOL_CONVERSION = YES; 680 | CLANG_WARN_COMMA = YES; 681 | CLANG_WARN_CONSTANT_CONVERSION = YES; 682 | CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR; 683 | CLANG_WARN_DOCUMENTATION_COMMENTS = YES; 684 | CLANG_WARN_EMPTY_BODY = YES; 685 | CLANG_WARN_ENUM_CONVERSION = YES; 686 | CLANG_WARN_INFINITE_RECURSION = YES; 687 | CLANG_WARN_INT_CONVERSION = YES; 688 | CLANG_WARN_NON_LITERAL_NULL_CONVERSION = YES; 689 | CLANG_WARN_OBJC_LITERAL_CONVERSION = YES; 690 | CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR; 691 | CLANG_WARN_RANGE_LOOP_ANALYSIS = YES; 692 | CLANG_WARN_STRICT_PROTOTYPES = YES; 693 | CLANG_WARN_SUSPICIOUS_MOVE = YES; 694 | CLANG_WARN_UNGUARDED_AVAILABILITY = YES_AGGRESSIVE; 695 | CLANG_WARN_UNREACHABLE_CODE = YES; 696 | CLANG_WARN__DUPLICATE_METHOD_MATCH = YES; 697 | CODE_SIGN_IDENTITY = "iPhone Developer"; 698 | COPY_PHASE_STRIP = NO; 699 | CURRENT_PROJECT_VERSION = 1; 700 | DEBUG_INFORMATION_FORMAT = dwarf; 701 | ENABLE_STRICT_OBJC_MSGSEND = YES; 702 | ENABLE_TESTABILITY = YES; 703 | GCC_C_LANGUAGE_STANDARD = gnu11; 704 | GCC_DYNAMIC_NO_PIC = NO; 705 | GCC_NO_COMMON_BLOCKS = YES; 706 | GCC_OPTIMIZATION_LEVEL = 0; 707 | GCC_PREPROCESSOR_DEFINITIONS = ( 708 | "DEBUG=1", 709 | "$(inherited)", 710 | ); 711 | GCC_WARN_64_TO_32_BIT_CONVERSION = YES; 712 | GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR; 713 | GCC_WARN_UNDECLARED_SELECTOR = YES; 714 | GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE; 715 | GCC_WARN_UNUSED_FUNCTION = YES; 716 | GCC_WARN_UNUSED_VARIABLE = YES; 717 | IPHONEOS_DEPLOYMENT_TARGET = 11.0; 718 | MTL_ENABLE_DEBUG_INFO = YES; 719 | ONLY_ACTIVE_ARCH = YES; 720 | SDKROOT = iphoneos; 721 | SWIFT_ACTIVE_COMPILATION_CONDITIONS = DEBUG; 722 | SWIFT_OPTIMIZATION_LEVEL = "-Onone"; 723 | SWIFT_VERSION = 4.0; 724 | VERSIONING_SYSTEM = "apple-generic"; 725 | VERSION_INFO_PREFIX = ""; 726 | }; 727 | name = Debug; 728 | }; 729 | 4DDF8C341F211E2E008C2135 /* Release */ = { 730 | isa = XCBuildConfiguration; 731 | buildSettings = { 732 | ALWAYS_SEARCH_USER_PATHS = NO; 733 | CLANG_ANALYZER_NONNULL = YES; 734 | CLANG_ANALYZER_NUMBER_OBJECT_CONVERSION = YES_AGGRESSIVE; 735 | CLANG_CXX_LANGUAGE_STANDARD = "gnu++14"; 736 | CLANG_CXX_LIBRARY = "libc++"; 737 | CLANG_ENABLE_MODULES = YES; 738 | CLANG_ENABLE_OBJC_ARC = YES; 739 | CLANG_WARN_BLOCK_CAPTURE_AUTORELEASING = YES; 740 | CLANG_WARN_BOOL_CONVERSION = YES; 741 | CLANG_WARN_COMMA = YES; 742 | CLANG_WARN_CONSTANT_CONVERSION = YES; 743 | CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR; 744 | CLANG_WARN_DOCUMENTATION_COMMENTS = YES; 745 | CLANG_WARN_EMPTY_BODY = YES; 746 | CLANG_WARN_ENUM_CONVERSION = YES; 747 | CLANG_WARN_INFINITE_RECURSION = YES; 748 | CLANG_WARN_INT_CONVERSION = YES; 749 | CLANG_WARN_NON_LITERAL_NULL_CONVERSION = YES; 750 | CLANG_WARN_OBJC_LITERAL_CONVERSION = YES; 751 | CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR; 752 | CLANG_WARN_RANGE_LOOP_ANALYSIS = YES; 753 | CLANG_WARN_STRICT_PROTOTYPES = YES; 754 | CLANG_WARN_SUSPICIOUS_MOVE = YES; 755 | CLANG_WARN_UNGUARDED_AVAILABILITY = YES_AGGRESSIVE; 756 | CLANG_WARN_UNREACHABLE_CODE = YES; 757 | CLANG_WARN__DUPLICATE_METHOD_MATCH = YES; 758 | CODE_SIGN_IDENTITY = "iPhone Developer"; 759 | COPY_PHASE_STRIP = NO; 760 | CURRENT_PROJECT_VERSION = 1; 761 | DEBUG_INFORMATION_FORMAT = "dwarf-with-dsym"; 762 | ENABLE_NS_ASSERTIONS = NO; 763 | ENABLE_STRICT_OBJC_MSGSEND = YES; 764 | GCC_C_LANGUAGE_STANDARD = gnu11; 765 | GCC_NO_COMMON_BLOCKS = YES; 766 | GCC_WARN_64_TO_32_BIT_CONVERSION = YES; 767 | GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR; 768 | GCC_WARN_UNDECLARED_SELECTOR = YES; 769 | GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE; 770 | GCC_WARN_UNUSED_FUNCTION = YES; 771 | GCC_WARN_UNUSED_VARIABLE = YES; 772 | IPHONEOS_DEPLOYMENT_TARGET = 11.0; 773 | MTL_ENABLE_DEBUG_INFO = NO; 774 | SDKROOT = iphoneos; 775 | SWIFT_OPTIMIZATION_LEVEL = "-Owholemodule"; 776 | SWIFT_VERSION = 4.0; 777 | VALIDATE_PRODUCT = YES; 778 | VERSIONING_SYSTEM = "apple-generic"; 779 | VERSION_INFO_PREFIX = ""; 780 | }; 781 | name = Release; 782 | }; 783 | 4DDF8C361F211E2E008C2135 /* Debug */ = { 784 | isa = XCBuildConfiguration; 785 | buildSettings = { 786 | CLANG_ENABLE_MODULES = YES; 787 | CODE_SIGN_IDENTITY = ""; 788 | DEFINES_MODULE = YES; 789 | DYLIB_COMPATIBILITY_VERSION = 1; 790 | DYLIB_CURRENT_VERSION = 1; 791 | DYLIB_INSTALL_NAME_BASE = "@rpath"; 792 | HEADER_SEARCH_PATHS = "$(SDKROOT)/usr/include/libxml2"; 793 | INFOPLIST_FILE = Info_iOS.plist; 794 | INSTALL_PATH = "$(LOCAL_LIBRARY_DIR)/Frameworks"; 795 | IPHONEOS_DEPLOYMENT_TARGET = 8.0; 796 | LD_RUNPATH_SEARCH_PATHS = "$(inherited) @executable_path/Frameworks @loader_path/Frameworks"; 797 | PRODUCT_BUNDLE_IDENTIFIER = com.yahoo.raymccrae.HTMLSAXParser; 798 | PRODUCT_NAME = HTMLSAXParser; 799 | SKIP_INSTALL = YES; 800 | SWIFT_INCLUDE_PATHS = "$(PROJECT_DIR)/Sources/CHTMLSAXParser/include"; 801 | SWIFT_OPTIMIZATION_LEVEL = "-Onone"; 802 | TARGETED_DEVICE_FAMILY = "1,2"; 803 | }; 804 | name = Debug; 805 | }; 806 | 4DDF8C371F211E2E008C2135 /* Release */ = { 807 | isa = XCBuildConfiguration; 808 | buildSettings = { 809 | CLANG_ENABLE_MODULES = YES; 810 | CODE_SIGN_IDENTITY = ""; 811 | DEFINES_MODULE = YES; 812 | DYLIB_COMPATIBILITY_VERSION = 1; 813 | DYLIB_CURRENT_VERSION = 1; 814 | DYLIB_INSTALL_NAME_BASE = "@rpath"; 815 | HEADER_SEARCH_PATHS = "$(SDKROOT)/usr/include/libxml2"; 816 | INFOPLIST_FILE = Info_iOS.plist; 817 | INSTALL_PATH = "$(LOCAL_LIBRARY_DIR)/Frameworks"; 818 | IPHONEOS_DEPLOYMENT_TARGET = 8.0; 819 | LD_RUNPATH_SEARCH_PATHS = "$(inherited) @executable_path/Frameworks @loader_path/Frameworks"; 820 | PRODUCT_BUNDLE_IDENTIFIER = com.yahoo.raymccrae.HTMLSAXParser; 821 | PRODUCT_NAME = HTMLSAXParser; 822 | SKIP_INSTALL = YES; 823 | SWIFT_INCLUDE_PATHS = "$(PROJECT_DIR)/Sources/CHTMLSAXParser/include"; 824 | TARGETED_DEVICE_FAMILY = "1,2"; 825 | }; 826 | name = Release; 827 | }; 828 | 4DDF8C391F211E2E008C2135 /* Debug */ = { 829 | isa = XCBuildConfiguration; 830 | buildSettings = { 831 | ALWAYS_EMBED_SWIFT_STANDARD_LIBRARIES = YES; 832 | HEADER_SEARCH_PATHS = "$(SDKROOT)/usr/include/libxml2"; 833 | INFOPLIST_FILE = Tests/HTMLSAXParser/Info.plist; 834 | LD_RUNPATH_SEARCH_PATHS = "$(inherited) @executable_path/Frameworks @loader_path/Frameworks"; 835 | PRODUCT_BUNDLE_IDENTIFIER = com.yahoo.raymccrae.HTMLParserTests; 836 | PRODUCT_NAME = "$(TARGET_NAME)"; 837 | TARGETED_DEVICE_FAMILY = "1,2"; 838 | }; 839 | name = Debug; 840 | }; 841 | 4DDF8C3A1F211E2E008C2135 /* Release */ = { 842 | isa = XCBuildConfiguration; 843 | buildSettings = { 844 | ALWAYS_EMBED_SWIFT_STANDARD_LIBRARIES = YES; 845 | HEADER_SEARCH_PATHS = "$(SDKROOT)/usr/include/libxml2"; 846 | INFOPLIST_FILE = Tests/HTMLSAXParser/Info.plist; 847 | LD_RUNPATH_SEARCH_PATHS = "$(inherited) @executable_path/Frameworks @loader_path/Frameworks"; 848 | PRODUCT_BUNDLE_IDENTIFIER = com.yahoo.raymccrae.HTMLParserTests; 849 | PRODUCT_NAME = "$(TARGET_NAME)"; 850 | TARGETED_DEVICE_FAMILY = "1,2"; 851 | }; 852 | name = Release; 853 | }; 854 | /* End XCBuildConfiguration section */ 855 | 856 | /* Begin XCConfigurationList section */ 857 | 4D3D6DAE1F3EDEDA00EE40CB /* Build configuration list for PBXNativeTarget "HTMLSAXParser MacOS" */ = { 858 | isa = XCConfigurationList; 859 | buildConfigurations = ( 860 | 4D3D6DAF1F3EDEDA00EE40CB /* Debug */, 861 | 4D3D6DB01F3EDEDA00EE40CB /* Release */, 862 | ); 863 | defaultConfigurationIsVisible = 0; 864 | defaultConfigurationName = Release; 865 | }; 866 | 4D7F1DA21F3EEC790080660A /* Build configuration list for PBXNativeTarget "HTMLSAXParser AppleTV" */ = { 867 | isa = XCConfigurationList; 868 | buildConfigurations = ( 869 | 4D7F1DA31F3EEC790080660A /* Debug */, 870 | 4D7F1DA41F3EEC790080660A /* Release */, 871 | ); 872 | defaultConfigurationIsVisible = 0; 873 | defaultConfigurationName = Release; 874 | }; 875 | 4D9BB2391F2160450010D5A8 /* Build configuration list for PBXNativeTarget "HTMLParserDemo" */ = { 876 | isa = XCConfigurationList; 877 | buildConfigurations = ( 878 | 4D9BB23A1F2160450010D5A8 /* Debug */, 879 | 4D9BB23B1F2160450010D5A8 /* Release */, 880 | ); 881 | defaultConfigurationIsVisible = 0; 882 | defaultConfigurationName = Release; 883 | }; 884 | 4DDF8C1B1F211E2D008C2135 /* Build configuration list for PBXProject "HTMLSAXParser" */ = { 885 | isa = XCConfigurationList; 886 | buildConfigurations = ( 887 | 4DDF8C331F211E2E008C2135 /* Debug */, 888 | 4DDF8C341F211E2E008C2135 /* Release */, 889 | ); 890 | defaultConfigurationIsVisible = 0; 891 | defaultConfigurationName = Release; 892 | }; 893 | 4DDF8C351F211E2E008C2135 /* Build configuration list for PBXNativeTarget "HTMLSAXParser iOS" */ = { 894 | isa = XCConfigurationList; 895 | buildConfigurations = ( 896 | 4DDF8C361F211E2E008C2135 /* Debug */, 897 | 4DDF8C371F211E2E008C2135 /* Release */, 898 | ); 899 | defaultConfigurationIsVisible = 0; 900 | defaultConfigurationName = Release; 901 | }; 902 | 4DDF8C381F211E2E008C2135 /* Build configuration list for PBXNativeTarget "HTMLSAXParserTests" */ = { 903 | isa = XCConfigurationList; 904 | buildConfigurations = ( 905 | 4DDF8C391F211E2E008C2135 /* Debug */, 906 | 4DDF8C3A1F211E2E008C2135 /* Release */, 907 | ); 908 | defaultConfigurationIsVisible = 0; 909 | defaultConfigurationName = Release; 910 | }; 911 | /* End XCConfigurationList section */ 912 | }; 913 | rootObject = 4DDF8C181F211E2D008C2135 /* Project object */; 914 | } 915 | --------------------------------------------------------------------------------