├── .editorconfig ├── .github └── workflows │ └── build.yml ├── .gitignore ├── .swiftpm └── xcode │ └── package.xcworkspace │ └── contents.xcworkspacedata ├── LICENSE ├── Package.swift ├── README.md ├── RSParser.podspec ├── Sources ├── ObjC │ ├── FeedParser.h │ ├── NSData+RSParser.h │ ├── NSData+RSParser.m │ ├── NSString+RSParser.h │ ├── NSString+RSParser.m │ ├── ParserData.h │ ├── ParserData.m │ ├── RSAtomParser.h │ ├── RSAtomParser.m │ ├── RSDateParser.h │ ├── RSDateParser.m │ ├── RSHTMLLinkParser.h │ ├── RSHTMLLinkParser.m │ ├── RSHTMLMetadata.h │ ├── RSHTMLMetadata.m │ ├── RSHTMLMetadataParser.h │ ├── RSHTMLMetadataParser.m │ ├── RSHTMLTag.h │ ├── RSHTMLTag.m │ ├── RSOPMLAttributes.h │ ├── RSOPMLAttributes.m │ ├── RSOPMLDocument.h │ ├── RSOPMLDocument.m │ ├── RSOPMLError.h │ ├── RSOPMLError.m │ ├── RSOPMLFeedSpecifier.h │ ├── RSOPMLFeedSpecifier.m │ ├── RSOPMLItem.h │ ├── RSOPMLItem.m │ ├── RSOPMLParser.h │ ├── RSOPMLParser.m │ ├── RSParsedArticle.h │ ├── RSParsedArticle.m │ ├── RSParsedAuthor.h │ ├── RSParsedAuthor.m │ ├── RSParsedEnclosure.h │ ├── RSParsedEnclosure.m │ ├── RSParsedFeed.h │ ├── RSParsedFeed.m │ ├── RSParserInternal.h │ ├── RSParserInternal.m │ ├── RSRSSParser.h │ ├── RSRSSParser.m │ ├── RSSAXHTMLParser.h │ ├── RSSAXHTMLParser.m │ ├── RSSAXParser.h │ ├── RSSAXParser.m │ └── include │ │ └── RSParser.h └── Swift │ ├── Exports.swift │ ├── Feeds │ ├── FeedParser.swift │ ├── FeedParserError.swift │ ├── FeedType.swift │ ├── JSON │ │ ├── JSONFeedParser.swift │ │ └── RSSInJSONParser.swift │ ├── ParsedAttachment.swift │ ├── ParsedAuthor.swift │ ├── ParsedExtension.swift │ ├── ParsedFeed.swift │ ├── ParsedHub.swift │ ├── ParsedItem.swift │ └── XML │ │ ├── AtomParser.swift │ │ ├── RSParsedFeedTransformer.swift │ │ └── RSSParser.swift │ ├── JSON │ ├── JSONTypes.swift │ └── JSONUtilities.swift │ └── Utilities │ └── String+RSParser.swift └── Tests ├── LinuxMain.swift └── RSParserTests ├── AtomParserTests.swift ├── EntityDecodingTests.swift ├── FeedParserTypeTests.swift ├── HTMLLinkTests.swift ├── HTMLMetadataTests.swift ├── Info.plist ├── JSONFeedParserTests.swift ├── OPMLTests.swift ├── RSDateParserTests.swift ├── RSSInJSONParserTests.swift ├── RSSParserTests.swift └── Resources ├── 3960.json ├── 489.rss ├── 4fsodonline.atom ├── DaringFireball.atom ├── DaringFireball.html ├── DaringFireball.json ├── DaringFireball.rss ├── EMarley.rss ├── KatieFloyd.rss ├── OneFootTsunami.atom ├── ScriptingNews.json ├── Subs.opml ├── SubsNoTitleAttributes.opml ├── YouTubeTheVolvoRocks.html ├── aktuality.rss ├── allthis-partial.json ├── allthis.atom ├── allthis.json ├── atp.rss ├── authors.json ├── bio.rdf ├── cloudblog.rss ├── coco.html ├── curt.json ├── dcrainmaker.xml ├── donthitsave.xml ├── expertopinionent.atom ├── furbo.html ├── inessential.html ├── inessential.json ├── jsonfeed-extension.json ├── kc0011.rss ├── livemint.xml ├── macworld.rss ├── manton.rss ├── monkeydom.rss ├── natasha.xml ├── phpxml.rss ├── pxlnv.json ├── rose.json ├── rubenerd.rss ├── russcox.atom ├── scriptingNews.rss ├── sixcolors.html └── theomnishow.rss /.editorconfig: -------------------------------------------------------------------------------- 1 | root = true 2 | 3 | [*.{swift,m,h,c}] 4 | indent_style = tab 5 | indent_size = 4 6 | -------------------------------------------------------------------------------- /.github/workflows/build.yml: -------------------------------------------------------------------------------- 1 | name: CI 2 | 3 | on: [push] 4 | 5 | jobs: 6 | build: 7 | 8 | runs-on: macOS-latest 9 | 10 | steps: 11 | - uses: maxim-lobanov/setup-xcode@v1 12 | with: 13 | xcode-version: latest-stable 14 | 15 | - name: Checkout Project 16 | uses: actions/checkout@v1 17 | 18 | - name: Run Build 19 | run: swift build 20 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Xcode 2 | # 3 | # gitignore contributors: remember to update Global/Xcode.gitignore, Objective-C.gitignore & Swift.gitignore 4 | 5 | .DS_Store 6 | 7 | ## Build generated 8 | .build/ 9 | build/ 10 | DerivedData/ 11 | 12 | ## Various settings 13 | *.pbxuser 14 | !default.pbxuser 15 | *.mode1v3 16 | !default.mode1v3 17 | *.mode2v3 18 | !default.mode2v3 19 | *.perspectivev3 20 | !default.perspectivev3 21 | xcuserdata/ 22 | 23 | ## Other 24 | *.moved-aside 25 | *.xccheckout 26 | *.xcscmblueprint 27 | 28 | ## Obj-C/Swift specific 29 | *.hmap 30 | *.ipa 31 | *.dSYM.zip 32 | *.dSYM 33 | 34 | # CocoaPods 35 | # 36 | # We recommend against adding the Pods directory to your .gitignore. However 37 | # you should judge for yourself, the pros and cons are mentioned at: 38 | # https://guides.cocoapods.org/using/using-cocoapods.html#should-i-check-the-pods-directory-into-source-control 39 | # 40 | # Pods/ 41 | 42 | # Carthage 43 | # 44 | # Add this line if you want to avoid checking in source code from Carthage dependencies. 45 | # Carthage/Checkouts 46 | 47 | Carthage/Build 48 | 49 | # fastlane 50 | # 51 | # It is recommended to not store the screenshots in the git repo. Instead, use fastlane to re-generate the 52 | # screenshots whenever they are needed. 53 | # For more information about the recommended setup visit: 54 | # https://docs.fastlane.tools/best-practices/source-control/#source-control 55 | 56 | fastlane/report.xml 57 | fastlane/Preview.html 58 | fastlane/screenshots 59 | fastlane/test_output 60 | 61 | # Code Injection 62 | # 63 | # After new code Injection tools there's a generated folder /iOSInjectionProject 64 | # https://github.com/johnno1962/injectionforxcode 65 | 66 | iOSInjectionProject/ 67 | -------------------------------------------------------------------------------- /.swiftpm/xcode/package.xcworkspace/contents.xcworkspacedata: -------------------------------------------------------------------------------- 1 | 2 | 4 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2017 Brent Simmons 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /Package.swift: -------------------------------------------------------------------------------- 1 | // swift-tools-version:5.3 2 | // The swift-tools-version declares the minimum version of Swift required to build this package. 3 | 4 | import PackageDescription 5 | 6 | let package = Package( 7 | name: "RSParser", 8 | platforms: [.macOS(SupportedPlatform.MacOSVersion.v10_15), .iOS(SupportedPlatform.IOSVersion.v13)], 9 | products: [ 10 | // Products define the executables and libraries a package produces, and make them visible to other packages. 11 | .library( 12 | name: "RSParser", 13 | type: .dynamic, 14 | targets: ["RSParser"]), 15 | .library( 16 | name: "RSParserObjC", 17 | type: .dynamic, 18 | targets: ["RSParserObjC"]), 19 | ], 20 | dependencies: [ 21 | // Dependencies declare other packages that this package depends on. 22 | // .package(url: /* package url */, from: "1.0.0"), 23 | ], 24 | targets: [ 25 | // Targets are the basic building blocks of a package. A target can define a module or a test suite. 26 | // Targets can depend on other targets in this package, and on products in packages this package depends on. 27 | .target( 28 | name: "RSParser", 29 | dependencies: ["RSParserObjC"], 30 | path: "Sources/Swift"), 31 | .target( 32 | name: "RSParserObjC", 33 | dependencies: [], 34 | path: "Sources/ObjC", 35 | cSettings: [ 36 | .headerSearchPath("include") 37 | ]), 38 | .testTarget( 39 | name: "RSParserTests", 40 | dependencies: ["RSParser"], 41 | exclude: ["Info.plist"], 42 | resources: [.copy("Resources")]), 43 | ] 44 | ) 45 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # RSParser 2 | 3 | This framework was developed for [NetNewsWire](https://github.com/brentsimmons/NetNewsWire) and is made available here for developers who just need the parsing code. It has no dependencies that aren’t provided by the system. 4 | 5 | _Update 6 Feb. 2018_: RSParser is now a CocoaPod, with the much-appreciated help of [Silver Fox](https://github.com/dcilia). (We _think_ it worked, anyway. Looked like it did.) 6 | 7 | ## What’s inside 8 | 9 | This framework includes parsers for: 10 | 11 | * [RSS](http://cyber.harvard.edu/rss/rss.html), [Atom](https://tools.ietf.org/html/rfc4287), [JSON Feed](https://jsonfeed.org/), and [RSS-in-JSON](https://github.com/scripting/Scripting-News/blob/master/rss-in-json/README.md) 12 | * [OPML](http://dev.opml.org/) 13 | * Internet dates 14 | * HTML metadata and links 15 | * HTML entities 16 | 17 | It also includes Objective-C wrappers for libXML2’s XML SAX and HTML SAX parsers. You can write your own parsers on top of these. 18 | 19 | This framework builds for macOS. It *could* be made to build for iOS also, but I haven’t gotten around to it yet. 20 | 21 | ## How to parse feeds 22 | 23 | To get the type of a feed, even with partial data, call `FeedParser.feedType(parserData)`, which will return a `FeedType`. 24 | 25 | To parse a feed, call `FeedParser.parse(parserData)`, which will return a [ParsedFeed](Feeds/ParsedFeed.swift). Also see related structs: `ParsedAuthor`, `ParsedItem`, `ParsedAttachment`, and `ParsedHub`. 26 | 27 | You do *not* need to know the type of feed when calling `FeedParser.parse` — it will figure it out and use the correct concrete parser. 28 | 29 | However, if you do want to use a concrete parser directly, see [RSSInJSONParser](Feeds/JSON/RSSInJSONParser.swift), [JSONFeedParser](Feeds/JSON/JSONFeedParser.swift), [RSSParser](Feeds/XML/RSSParser.swift), and [AtomParser](Feeds/XML/AtomParser.swift). 30 | 31 | (Note: if you want to write a feed reader app, please do! You have my blessing and encouragement. Let me know when it’s shipping so I can check it out.) 32 | 33 | ## How to parse OPML 34 | 35 | Call `+[RSOPMLParser parseOPMLWithParserData:error:]`, which returns an `RSOPMLDocument`. See related objects: `RSOPMLItem`, `RSOPMLAttributes`, `RSOPMLFeedSpecifier`, and `RSOPMLError`. 36 | 37 | ## How to parse dates 38 | 39 | Call `RSDateWithString` or `RSDateWithBytes` (see `RSDateParser`). These handle the common internet date formats. You don’t need to know which format. 40 | 41 | ## How to parse HTML 42 | 43 | To get an array of ` "MIT", :file => "LICENSE" } 21 | s.author = { "Brent Simmons" => "brent@ranchero.com" } 22 | 23 | # When using multiple platforms 24 | s.ios.deployment_target = "9.3" 25 | s.osx.deployment_target = "10.10" 26 | s.watchos.deployment_target = "2.0" 27 | s.tvos.deployment_target = "9.0" 28 | 29 | s.source = { :git => "https://github.com/brentsimmons/RSParser.git", :tag => "#{s.version}" } 30 | s.source_files = "Sources", "Sources/**/*.{h,m,swift}" 31 | s.library = "xml2" 32 | s.xcconfig = { "HEADER_SEARCH_PATHS" => "$(SDKROOT)/usr/include/libxml2" } 33 | 34 | end 35 | -------------------------------------------------------------------------------- /Sources/ObjC/FeedParser.h: -------------------------------------------------------------------------------- 1 | // 2 | // FeedParser.h 3 | // RSXML 4 | // 5 | // Created by Brent Simmons on 7/12/15. 6 | // Copyright © 2015 Ranchero Software, LLC. All rights reserved. 7 | // 8 | 9 | @import Foundation; 10 | 11 | NS_ASSUME_NONNULL_BEGIN 12 | 13 | @class RSParsedFeed; 14 | @class RSXMLData; 15 | 16 | 17 | @protocol FeedParser 18 | 19 | + (BOOL)canParseFeed:(RSXMLData *)xmlData; 20 | 21 | - (instancetype)initWithXMLData:(RSXMLData *)xmlData; 22 | 23 | - (nullable RSParsedFeed *)parseFeed:(NSError **)error; 24 | 25 | 26 | @end 27 | 28 | NS_ASSUME_NONNULL_END 29 | -------------------------------------------------------------------------------- /Sources/ObjC/NSData+RSParser.h: -------------------------------------------------------------------------------- 1 | // 2 | // NSData+RSParser.h 3 | // RSParser 4 | // 5 | // Created by Brent Simmons on 6/24/17. 6 | // Copyright © 2017 Ranchero Software, LLC. All rights reserved. 7 | // 8 | 9 | @import Foundation; 10 | 11 | NS_ASSUME_NONNULL_BEGIN 12 | 13 | @interface NSData (RSParser) 14 | 15 | - (BOOL)isProbablyHTML; 16 | - (BOOL)isProbablyXML; 17 | - (BOOL)isProbablyJSON; 18 | 19 | - (BOOL)isProbablyJSONFeed; 20 | - (BOOL)isProbablyRSSInJSON; 21 | - (BOOL)isProbablyRSS; 22 | - (BOOL)isProbablyAtom; 23 | 24 | @end 25 | 26 | NS_ASSUME_NONNULL_END 27 | 28 | 29 | -------------------------------------------------------------------------------- /Sources/ObjC/NSData+RSParser.m: -------------------------------------------------------------------------------- 1 | // 2 | // NSData+RSParser.m 3 | // RSParser 4 | // 5 | // Created by Brent Simmons on 6/24/17. 6 | // Copyright © 2017 Ranchero Software, LLC. All rights reserved. 7 | // 8 | 9 | #import "NSData+RSParser.h" 10 | 11 | 12 | 13 | 14 | /* TODO: find real-world cases where the isProbably* cases fail when they should succeed, and add them to tests.*/ 15 | 16 | static BOOL bytesAreProbablyHTML(const char *bytes, NSUInteger numberOfBytes); 17 | static BOOL bytesAreProbablyXML(const char *bytes, NSUInteger numberOfBytes); 18 | static BOOL bytesStartWithStringIgnoringWhitespace(const char *string, const char *bytes, NSUInteger numberOfBytes); 19 | static BOOL didFindString(const char *string, const char *bytes, NSUInteger numberOfBytes); 20 | static BOOL bytesStartWithRSS(const char *bytes, NSUInteger numberOfBytes); 21 | static BOOL bytesStartWithRDF(const char *bytes, NSUInteger numberOfBytes); 22 | static BOOL bytesStartWithAtom(const char *bytes, NSUInteger numberOfBytes); 23 | 24 | @implementation NSData (RSParser) 25 | 26 | - (BOOL)isProbablyHTML { 27 | 28 | return bytesAreProbablyHTML(self.bytes, self.length); 29 | } 30 | 31 | - (BOOL)isProbablyXML { 32 | 33 | return bytesAreProbablyXML(self.bytes, self.length); 34 | } 35 | 36 | - (BOOL)isProbablyJSON { 37 | 38 | return bytesStartWithStringIgnoringWhitespace("{", self.bytes, self.length); 39 | } 40 | 41 | - (BOOL)isProbablyJSONFeed { 42 | 43 | if (![self isProbablyJSON]) { 44 | return NO; 45 | } 46 | return didFindString("://jsonfeed.org/version/", self.bytes, self.length) || didFindString(":\\/\\/jsonfeed.org\\/version\\/", self.bytes, self.length); 47 | } 48 | 49 | - (BOOL)isProbablyRSSInJSON { 50 | 51 | if (![self isProbablyJSON]) { 52 | return NO; 53 | } 54 | const char *bytes = self.bytes; 55 | NSUInteger length = self.length; 56 | return didFindString("rss", bytes, length) && didFindString("channel", bytes, length) && didFindString("item", bytes, length); 57 | } 58 | 59 | - (BOOL)isProbablyRSS { 60 | 61 | if (didFindString(" tag, but it should be parsed anyway. It does have some other distinct RSS markers we can find. 66 | return (didFindString("", self.bytes, self.length) && didFindString("", self.bytes, self.length)); 67 | } 68 | 69 | - (BOOL)isProbablyAtom { 70 | 71 | return didFindString(", and & entity-encoded. 18 | @property (readonly, copy) NSString *rsparser_stringByEncodingRequiredEntities; 19 | 20 | - (NSString *)rsparser_md5Hash; 21 | 22 | - (BOOL)rsparser_contains:(NSString *)s; 23 | 24 | @end 25 | 26 | NS_ASSUME_NONNULL_END 27 | -------------------------------------------------------------------------------- /Sources/ObjC/NSString+RSParser.m: -------------------------------------------------------------------------------- 1 | // 2 | // NSString+RSParser.m 3 | // RSParser 4 | // 5 | // Created by Brent Simmons on 9/25/15. 6 | // Copyright © 2015 Ranchero Software, LLC. All rights reserved. 7 | // 8 | 9 | #import "NSString+RSParser.h" 10 | #import 11 | 12 | 13 | 14 | 15 | @interface NSScanner (RSParser) 16 | 17 | - (BOOL)rs_scanEntityValue:(NSString * _Nullable * _Nullable)decodedEntity; 18 | 19 | @end 20 | 21 | 22 | @implementation NSString (RSParser) 23 | 24 | - (BOOL)rsparser_contains:(NSString *)s { 25 | 26 | return [self rangeOfString:s].location != NSNotFound; 27 | } 28 | 29 | - (NSString *)rsparser_stringByDecodingHTMLEntities { 30 | 31 | @autoreleasepool { 32 | 33 | NSScanner *scanner = [[NSScanner alloc] initWithString:self]; 34 | scanner.charactersToBeSkipped = nil; 35 | NSMutableString *result = [[NSMutableString alloc] init]; 36 | 37 | while (true) { 38 | 39 | NSString *scannedString = nil; 40 | if ([scanner scanUpToString:@"&" intoString:&scannedString]) { 41 | [result appendString:scannedString]; 42 | } 43 | if (scanner.isAtEnd) { 44 | break; 45 | } 46 | NSUInteger savedScanLocation = scanner.scanLocation; 47 | 48 | NSString *decodedEntity = nil; 49 | if ([scanner rs_scanEntityValue:&decodedEntity]) { 50 | [result appendString:decodedEntity]; 51 | } 52 | else { 53 | [result appendString:@"&"]; 54 | scanner.scanLocation = savedScanLocation + 1; 55 | } 56 | 57 | if (scanner.isAtEnd) { 58 | break; 59 | } 60 | } 61 | 62 | if ([self isEqualToString:result]) { 63 | return self; 64 | } 65 | return [result copy]; 66 | } 67 | } 68 | 69 | 70 | static NSDictionary *RSEntitiesDictionary(void); 71 | static NSString *RSParserStringWithValue(uint32_t value); 72 | 73 | - (NSString * _Nullable)rs_stringByDecodingEntity { 74 | 75 | // self may or may not have outer & and ; characters. 76 | 77 | NSMutableString *s = [self mutableCopy]; 78 | 79 | if ([s hasPrefix:@"&"]) { 80 | [s deleteCharactersInRange:NSMakeRange(0, 1)]; 81 | } 82 | if ([s hasSuffix:@";"]) { 83 | [s deleteCharactersInRange:NSMakeRange(s.length - 1, 1)]; 84 | } 85 | 86 | NSDictionary *entitiesDictionary = RSEntitiesDictionary(); 87 | 88 | NSString *decodedEntity = entitiesDictionary[self]; 89 | if (decodedEntity) { 90 | return decodedEntity; 91 | } 92 | 93 | if ([s hasPrefix:@"#x"] || [s hasPrefix:@"#X"]) { // Hex 94 | NSScanner *scanner = [[NSScanner alloc] initWithString:s]; 95 | scanner.charactersToBeSkipped = [NSCharacterSet characterSetWithCharactersInString:@"#xX"]; 96 | unsigned int hexValue = 0; 97 | if ([scanner scanHexInt:&hexValue]) { 98 | return RSParserStringWithValue((uint32_t)hexValue); 99 | } 100 | return nil; 101 | } 102 | 103 | else if ([s hasPrefix:@"#"]) { 104 | [s deleteCharactersInRange:NSMakeRange(0, 1)]; 105 | NSInteger value = s.integerValue; 106 | if (value < 1) { 107 | return nil; 108 | } 109 | return RSParserStringWithValue((uint32_t)value); 110 | } 111 | 112 | return nil; 113 | } 114 | 115 | - (NSString *)rsparser_stringByEncodingRequiredEntities { 116 | NSMutableString *result = [NSMutableString string]; 117 | 118 | for (NSUInteger i = 0; i < self.length; ++i) { 119 | unichar c = [self characterAtIndex:i]; 120 | 121 | switch (c) { 122 | case '<': 123 | [result appendString:@"<"]; 124 | break; 125 | case '>': 126 | [result appendString:@">"]; 127 | break; 128 | case '&': 129 | [result appendString:@"&"]; 130 | break; 131 | default: 132 | [result appendFormat:@"%C", c]; 133 | break; 134 | } 135 | } 136 | 137 | return [result copy]; 138 | } 139 | 140 | #pragma GCC diagnostic push 141 | #pragma GCC diagnostic ignored "-Wdeprecated-declarations" 142 | - (NSData *)_rsparser_md5HashData { 143 | 144 | NSData *data = [self dataUsingEncoding:NSUTF8StringEncoding]; 145 | unsigned char hash[CC_MD5_DIGEST_LENGTH]; 146 | CC_MD5(data.bytes, (CC_LONG)data.length, hash); 147 | 148 | return [NSData dataWithBytes:(const void *)hash length:CC_MD5_DIGEST_LENGTH]; 149 | } 150 | #pragma GCC diagnostic pop 151 | 152 | - (NSString *)rsparser_md5Hash { 153 | 154 | NSData *md5Data = [self _rsparser_md5HashData]; 155 | const Byte *bytes = md5Data.bytes; 156 | return [NSString stringWithFormat:@"%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x", bytes[0], bytes[1], bytes[2], bytes[3], bytes[4], bytes[5], bytes[6], bytes[7], bytes[8], bytes[9], bytes[10], bytes[11], bytes[12], bytes[13], bytes[14], bytes[15]]; 157 | } 158 | 159 | 160 | @end 161 | 162 | @implementation NSScanner (RSParser) 163 | 164 | - (BOOL)rs_scanEntityValue:(NSString * _Nullable * _Nullable)decodedEntity { 165 | 166 | NSString *s = self.string; 167 | NSUInteger initialScanLocation = self.scanLocation; 168 | static NSUInteger maxEntityLength = 20; // It’s probably smaller, but this is just for sanity. 169 | 170 | while (true) { 171 | 172 | unichar ch = [s characterAtIndex:self.scanLocation]; 173 | if ([NSCharacterSet.whitespaceAndNewlineCharacterSet characterIsMember:ch]) { 174 | break; 175 | } 176 | if (ch == ';') { 177 | if (!decodedEntity) { 178 | return YES; 179 | } 180 | NSString *rawEntity = [s substringWithRange:NSMakeRange(initialScanLocation + 1, (self.scanLocation - initialScanLocation) - 1)]; 181 | *decodedEntity = [rawEntity rs_stringByDecodingEntity]; 182 | self.scanLocation = self.scanLocation + 1; 183 | return *decodedEntity != nil; 184 | } 185 | 186 | self.scanLocation = self.scanLocation + 1; 187 | if (self.scanLocation - initialScanLocation > maxEntityLength) { 188 | break; 189 | } 190 | if (self.isAtEnd) { 191 | break; 192 | } 193 | } 194 | 195 | return NO; 196 | } 197 | 198 | @end 199 | 200 | static NSString *RSParserStringWithValue(uint32_t value) { 201 | // From WebCore's HTMLEntityParser 202 | static const uint32_t windowsLatin1ExtensionArray[32] = { 203 | 0x20AC, 0x0081, 0x201A, 0x0192, 0x201E, 0x2026, 0x2020, 0x2021, // 80-87 204 | 0x02C6, 0x2030, 0x0160, 0x2039, 0x0152, 0x008D, 0x017D, 0x008F, // 88-8F 205 | 0x0090, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014, // 90-97 206 | 0x02DC, 0x2122, 0x0161, 0x203A, 0x0153, 0x009D, 0x017E, 0x0178 // 98-9F 207 | }; 208 | 209 | if ((value & ~0x1Fu) == 0x80u) { // value >= 128 && value < 160 210 | value = windowsLatin1ExtensionArray[value - 0x80]; 211 | } 212 | 213 | value = CFSwapInt32HostToLittle(value); 214 | 215 | return [[NSString alloc] initWithBytes:&value length:sizeof(value) encoding:NSUTF32LittleEndianStringEncoding]; 216 | } 217 | 218 | static NSDictionary *RSEntitiesDictionary(void) { 219 | 220 | static NSDictionary *entitiesDictionary = nil; 221 | 222 | static dispatch_once_t onceToken; 223 | dispatch_once(&onceToken, ^{ 224 | 225 | entitiesDictionary = @{ 226 | // Named entities 227 | @"AElig": @"Æ", 228 | @"Aacute": @"Á", 229 | @"Acirc": @"Â", 230 | @"Agrave": @"À", 231 | @"Aring": @"Å", 232 | @"Atilde": @"Ã", 233 | @"Auml": @"Ä", 234 | @"Ccedil": @"Ç", 235 | @"Dstrok": @"Ð", 236 | @"ETH": @"Ð", 237 | @"Eacute": @"É", 238 | @"Ecirc": @"Ê", 239 | @"Egrave": @"È", 240 | @"Euml": @"Ë", 241 | @"Iacute": @"Í", 242 | @"Icirc": @"Î", 243 | @"Igrave": @"Ì", 244 | @"Iuml": @"Ï", 245 | @"Ntilde": @"Ñ", 246 | @"Oacute": @"Ó", 247 | @"Ocirc": @"Ô", 248 | @"Ograve": @"Ò", 249 | @"Oslash": @"Ø", 250 | @"Otilde": @"Õ", 251 | @"Ouml": @"Ö", 252 | @"Pi": @"Π", 253 | @"THORN": @"Þ", 254 | @"Uacute": @"Ú", 255 | @"Ucirc": @"Û", 256 | @"Ugrave": @"Ù", 257 | @"Uuml": @"Ü", 258 | @"Yacute": @"Y", 259 | @"aacute": @"á", 260 | @"acirc": @"â", 261 | @"acute": @"´", 262 | @"aelig": @"æ", 263 | @"agrave": @"à", 264 | @"amp": @"&", 265 | @"apos": @"'", 266 | @"aring": @"å", 267 | @"atilde": @"ã", 268 | @"auml": @"ä", 269 | @"brkbar": @"¦", 270 | @"brvbar": @"¦", 271 | @"ccedil": @"ç", 272 | @"cedil": @"¸", 273 | @"cent": @"¢", 274 | @"copy": @"©", 275 | @"curren": @"¤", 276 | @"deg": @"°", 277 | @"die": @"¨", 278 | @"divide": @"÷", 279 | @"eacute": @"é", 280 | @"ecirc": @"ê", 281 | @"egrave": @"è", 282 | @"eth": @"ð", 283 | @"euml": @"ë", 284 | @"euro": @"€", 285 | @"frac12": @"½", 286 | @"frac14": @"¼", 287 | @"frac34": @"¾", 288 | @"gt": @">", 289 | @"hearts": @"♥", 290 | @"hellip": @"…", 291 | @"iacute": @"í", 292 | @"icirc": @"î", 293 | @"iexcl": @"¡", 294 | @"igrave": @"ì", 295 | @"iquest": @"¿", 296 | @"iuml": @"ï", 297 | @"laquo": @"«", 298 | @"ldquo": @"“", 299 | @"lsquo": @"‘", 300 | @"lt": @"<", 301 | @"macr": @"¯", 302 | @"mdash": @"—", 303 | @"micro": @"µ", 304 | @"middot": @"·", 305 | @"ndash": @"–", 306 | @"not": @"¬", 307 | @"ntilde": @"ñ", 308 | @"oacute": @"ó", 309 | @"ocirc": @"ô", 310 | @"ograve": @"ò", 311 | @"ordf": @"ª", 312 | @"ordm": @"º", 313 | @"oslash": @"ø", 314 | @"otilde": @"õ", 315 | @"ouml": @"ö", 316 | @"para": @"¶", 317 | @"pi": @"π", 318 | @"plusmn": @"±", 319 | @"pound": @"£", 320 | @"quot": @"\"", 321 | @"raquo": @"»", 322 | @"rdquo": @"”", 323 | @"reg": @"®", 324 | @"rsquo": @"’", 325 | @"sect": @"§", 326 | @"shy": RSParserStringWithValue(173), 327 | @"sup1": @"¹", 328 | @"sup2": @"²", 329 | @"sup3": @"³", 330 | @"szlig": @"ß", 331 | @"thorn": @"þ", 332 | @"times": @"×", 333 | @"trade": @"™", 334 | @"uacute": @"ú", 335 | @"ucirc": @"û", 336 | @"ugrave": @"ù", 337 | @"uml": @"¨", 338 | @"uuml": @"ü", 339 | @"yacute": @"y", 340 | @"yen": @"¥", 341 | @"yuml": @"ÿ", 342 | @"infin": @"∞", 343 | @"nbsp": RSParserStringWithValue(160) 344 | }; 345 | }); 346 | 347 | return entitiesDictionary; 348 | } 349 | -------------------------------------------------------------------------------- /Sources/ObjC/ParserData.h: -------------------------------------------------------------------------------- 1 | // 2 | // ParserData.h 3 | // RSParser 4 | // 5 | // Created by Brent Simmons on 10/4/17. 6 | // Copyright © 2017 Ranchero Software, LLC. All rights reserved. 7 | // 8 | 9 | @import Foundation; 10 | 11 | NS_ASSUME_NONNULL_BEGIN 12 | 13 | @interface ParserData : NSObject 14 | 15 | @property (nonatomic, readonly) NSString *url; 16 | @property (nonatomic, readonly) NSData *data; 17 | 18 | - (instancetype)init NS_UNAVAILABLE; 19 | - (instancetype)initWithURL:(NSString *)url data:(NSData *)data NS_DESIGNATED_INITIALIZER; 20 | 21 | @end 22 | 23 | NS_ASSUME_NONNULL_END 24 | 25 | -------------------------------------------------------------------------------- /Sources/ObjC/ParserData.m: -------------------------------------------------------------------------------- 1 | // 2 | // ParserData.m 3 | // RSParser 4 | // 5 | // Created by Brent Simmons on 10/4/17. 6 | // Copyright © 2017 Ranchero Software, LLC. All rights reserved. 7 | // 8 | 9 | #import "ParserData.h" 10 | 11 | @implementation ParserData 12 | 13 | - (instancetype)initWithURL:(NSString *)url data:(NSData *)data { 14 | 15 | self = [super init]; 16 | if (!self) { 17 | return nil; 18 | } 19 | 20 | _url = url; 21 | _data = data; 22 | 23 | return self; 24 | } 25 | 26 | @end 27 | -------------------------------------------------------------------------------- /Sources/ObjC/RSAtomParser.h: -------------------------------------------------------------------------------- 1 | // 2 | // RSAtomParser.h 3 | // RSParser 4 | // 5 | // Created by Brent Simmons on 1/15/15. 6 | // Copyright (c) 2015 Ranchero Software LLC. All rights reserved. 7 | // 8 | 9 | @import Foundation; 10 | 11 | @class ParserData; 12 | @class RSParsedFeed; 13 | 14 | NS_ASSUME_NONNULL_BEGIN 15 | 16 | @interface RSAtomParser : NSObject 17 | 18 | + (RSParsedFeed *)parseFeedWithData:(ParserData *)parserData; 19 | 20 | @end 21 | 22 | NS_ASSUME_NONNULL_END 23 | -------------------------------------------------------------------------------- /Sources/ObjC/RSDateParser.h: -------------------------------------------------------------------------------- 1 | // 2 | // RSDateParser.h 3 | // RSParser 4 | // 5 | // Created by Brent Simmons on 3/25/15. 6 | // Copyright (c) 2015 Ranchero Software, LLC. All rights reserved. 7 | // 8 | 9 | @import Foundation; 10 | 11 | NS_ASSUME_NONNULL_BEGIN 12 | 13 | // Common web dates -- RFC 822 and 8601 -- are handled here: the formats you find in JSON and XML feeds. 14 | // These may return nil. They may also return garbage, given bad input. 15 | 16 | NSDate * _Nullable RSDateWithString(NSString *dateString); 17 | 18 | // If you're using a SAX parser, you have the bytes and don't need to convert to a string first. 19 | // It's faster and uses less memory. 20 | // (Assumes bytes are UTF-8 or ASCII. If you're using the libxml SAX parser, this will work.) 21 | 22 | NSDate * _Nullable RSDateWithBytes(const char *bytes, NSUInteger numberOfBytes); 23 | 24 | NS_ASSUME_NONNULL_END 25 | -------------------------------------------------------------------------------- /Sources/ObjC/RSHTMLLinkParser.h: -------------------------------------------------------------------------------- 1 | // 2 | // RSHTMLLinkParser.h 3 | // RSParser 4 | // 5 | // Created by Brent Simmons on 8/7/16. 6 | // Copyright © 2016 Ranchero Software, LLC. All rights reserved. 7 | // 8 | 9 | @import Foundation; 10 | 11 | NS_ASSUME_NONNULL_BEGIN 12 | 13 | /*Returns all some_text as RSHTMLLink object array.*/ 14 | 15 | @class ParserData; 16 | @class RSHTMLLink; 17 | 18 | @interface RSHTMLLinkParser : NSObject 19 | 20 | + (NSArray *)htmlLinksWithParserData:(ParserData *)parserData; 21 | 22 | @end 23 | 24 | 25 | @interface RSHTMLLink : NSObject 26 | 27 | // Any of these, even urlString, may be nil, because HTML can be bad. 28 | 29 | @property (nonatomic, nullable, readonly) NSString *urlString; //absolute 30 | @property (nonatomic, nullable, readonly) NSString *text; 31 | @property (nonatomic, nullable, readonly) NSString *title; //title attribute inside anchor tag 32 | 33 | @end 34 | 35 | NS_ASSUME_NONNULL_END 36 | -------------------------------------------------------------------------------- /Sources/ObjC/RSHTMLLinkParser.m: -------------------------------------------------------------------------------- 1 | // 2 | // RSHTMLLinkParser.m 3 | // RSParser 4 | // 5 | // Created by Brent Simmons on 8/7/16. 6 | // Copyright © 2016 Ranchero Software, LLC. All rights reserved. 7 | // 8 | 9 | 10 | #import "RSHTMLLinkParser.h" 11 | #import "RSSAXHTMLParser.h" 12 | #import "RSSAXParser.h" 13 | #import "RSParserInternal.h" 14 | #import "ParserData.h" 15 | 16 | #import 17 | 18 | 19 | 20 | @interface RSHTMLLinkParser() 21 | 22 | @property (nonatomic, readonly) NSMutableArray *links; 23 | @property (nonatomic, readonly) ParserData *parserData; 24 | @property (nonatomic, readonly) NSMutableArray *dictionaries; 25 | @property (nonatomic, readonly) NSURL *baseURL; 26 | 27 | @end 28 | 29 | 30 | @interface RSHTMLLink() 31 | 32 | @property (nonatomic, readwrite) NSString *urlString; //absolute 33 | @property (nonatomic, readwrite) NSString *text; 34 | @property (nonatomic, readwrite) NSString *title; //title attribute inside anchor tag 35 | 36 | @end 37 | 38 | 39 | @implementation RSHTMLLinkParser 40 | 41 | 42 | #pragma mark - Class Methods 43 | 44 | + (NSArray *)htmlLinksWithParserData:(ParserData *)parserData { 45 | 46 | RSHTMLLinkParser *parser = [[self alloc] initWithParserData:parserData]; 47 | return parser.links; 48 | } 49 | 50 | 51 | #pragma mark - Init 52 | 53 | - (instancetype)initWithParserData:(ParserData *)parserData { 54 | 55 | NSParameterAssert(parserData.data); 56 | NSParameterAssert(parserData.url); 57 | 58 | self = [super init]; 59 | if (!self) { 60 | return nil; 61 | } 62 | 63 | _links = [NSMutableArray new]; 64 | _parserData = parserData; 65 | _dictionaries = [NSMutableArray new]; 66 | _baseURL = [NSURL URLWithString:parserData.url]; 67 | 68 | [self parse]; 69 | 70 | return self; 71 | } 72 | 73 | 74 | #pragma mark - Parse 75 | 76 | - (void)parse { 77 | 78 | RSSAXHTMLParser *parser = [[RSSAXHTMLParser alloc] initWithDelegate:self]; 79 | [parser parseData:self.parserData.data]; 80 | [parser finishParsing]; 81 | } 82 | 83 | 84 | - (RSHTMLLink *)currentLink { 85 | 86 | return self.links.lastObject; 87 | } 88 | 89 | 90 | static NSString *kHrefKey = @"href"; 91 | 92 | - (NSString *)urlStringFromDictionary:(NSDictionary *)d { 93 | 94 | NSString *href = [d rsparser_objectForCaseInsensitiveKey:kHrefKey]; 95 | if (!href) { 96 | return nil; 97 | } 98 | 99 | NSURL *absoluteURL = [NSURL URLWithString:href relativeToURL:self.baseURL]; 100 | return absoluteURL.absoluteString; 101 | } 102 | 103 | 104 | static NSString *kTitleKey = @"title"; 105 | 106 | - (NSString *)titleFromDictionary:(NSDictionary *)d { 107 | 108 | return [d rsparser_objectForCaseInsensitiveKey:kTitleKey]; 109 | } 110 | 111 | 112 | - (void)handleLinkAttributes:(NSDictionary *)d { 113 | 114 | RSHTMLLink *link = self.currentLink; 115 | link.urlString = [self urlStringFromDictionary:d]; 116 | link.title = [self titleFromDictionary:d]; 117 | } 118 | 119 | 120 | static const char *kAnchor = "a"; 121 | static const NSInteger kAnchorLength = 2; 122 | 123 | - (void)saxParser:(RSSAXHTMLParser *)SAXParser XMLStartElement:(const xmlChar *)localName attributes:(const xmlChar **)attributes { 124 | 125 | if (!RSSAXEqualTags(localName, kAnchor, kAnchorLength)) { 126 | return; 127 | } 128 | 129 | RSHTMLLink *link = [RSHTMLLink new]; 130 | [self.links addObject:link]; 131 | 132 | NSDictionary *d = [SAXParser attributesDictionary:attributes]; 133 | if (!RSParserObjectIsEmpty(d)) { 134 | [self handleLinkAttributes:d]; 135 | } 136 | 137 | [SAXParser beginStoringCharacters]; 138 | } 139 | 140 | 141 | - (void)saxParser:(RSSAXParser *)SAXParser XMLEndElement:(const xmlChar *)localName { 142 | 143 | if (!RSSAXEqualTags(localName, kAnchor, kAnchorLength)) { 144 | return; 145 | } 146 | 147 | self.currentLink.text = SAXParser.currentStringWithTrimmedWhitespace; 148 | } 149 | 150 | @end 151 | 152 | @implementation RSHTMLLink 153 | 154 | @end 155 | -------------------------------------------------------------------------------- /Sources/ObjC/RSHTMLMetadata.h: -------------------------------------------------------------------------------- 1 | // 2 | // RSHTMLMetadata.h 3 | // RSParser 4 | // 5 | // Created by Brent Simmons on 3/6/16. 6 | // Copyright © 2016 Ranchero Software, LLC. All rights reserved. 7 | // 8 | 9 | @import Foundation; 10 | @import CoreGraphics; 11 | 12 | @class RSHTMLMetadataFeedLink; 13 | @class RSHTMLMetadataAppleTouchIcon; 14 | @class RSHTMLMetadataFavicon; 15 | @class RSHTMLOpenGraphProperties; 16 | @class RSHTMLOpenGraphImage; 17 | @class RSHTMLTag; 18 | @class RSHTMLTwitterProperties; 19 | 20 | NS_ASSUME_NONNULL_BEGIN 21 | 22 | @interface RSHTMLMetadata : NSObject 23 | 24 | - (instancetype)init NS_UNAVAILABLE; 25 | - (instancetype)initWithURLString:(NSString *)urlString tags:(NSArray *)tags NS_DESIGNATED_INITIALIZER; 26 | 27 | @property (nonatomic, readonly) NSString *baseURLString; 28 | @property (nonatomic, readonly) NSArray *tags; 29 | 30 | @property (nonatomic, readonly) NSArray *faviconLinks DEPRECATED_MSG_ATTRIBUTE("Use the favicons property instead."); 31 | @property (nonatomic, readonly) NSArray *favicons; 32 | @property (nonatomic, readonly) NSArray *appleTouchIcons; 33 | @property (nonatomic, readonly) NSArray *feedLinks; 34 | 35 | @property (nonatomic, readonly) RSHTMLOpenGraphProperties *openGraphProperties; 36 | @property (nonatomic, readonly) RSHTMLTwitterProperties *twitterProperties; 37 | 38 | @end 39 | 40 | 41 | @interface RSHTMLMetadataAppleTouchIcon : NSObject 42 | 43 | @property (nonatomic, nullable, readonly) NSString *rel; 44 | @property (nonatomic, nullable, readonly) NSString *sizes; 45 | @property (nonatomic, readonly) CGSize size; 46 | @property (nonatomic, nullable, readonly) NSString *urlString; // Absolute. 47 | 48 | @end 49 | 50 | 51 | @interface RSHTMLMetadataFeedLink : NSObject 52 | 53 | @property (nonatomic, nullable, readonly) NSString *title; 54 | @property (nonatomic, nullable, readonly) NSString *type; 55 | @property (nonatomic, nullable, readonly) NSString *urlString; // Absolute. 56 | 57 | @end 58 | 59 | @interface RSHTMLMetadataFavicon : NSObject 60 | 61 | @property (nonatomic, nullable, readonly) NSString *type; 62 | @property (nonatomic, nullable, readonly) NSString *urlString; 63 | 64 | @end 65 | 66 | @interface RSHTMLOpenGraphProperties : NSObject 67 | 68 | // TODO: the rest. At this writing (Nov. 26, 2017) I just care about og:image. 69 | // See http://ogp.me/ 70 | 71 | - (instancetype)init NS_UNAVAILABLE; 72 | - (instancetype)initWithURLString:(NSString *)urlString tags:(NSArray *)tags NS_DESIGNATED_INITIALIZER; 73 | 74 | @property (nonatomic, readonly) NSArray *images; 75 | 76 | @end 77 | 78 | @interface RSHTMLOpenGraphImage : NSObject 79 | 80 | @property (nonatomic, nullable, readonly) NSString *url; 81 | @property (nonatomic, nullable, readonly) NSString *secureURL; 82 | @property (nonatomic, nullable, readonly) NSString *mimeType; 83 | @property (nonatomic, readonly) CGFloat width; 84 | @property (nonatomic, readonly) CGFloat height; 85 | @property (nonatomic, nullable, readonly) NSString *altText; 86 | 87 | @end 88 | 89 | @interface RSHTMLTwitterProperties : NSObject 90 | 91 | // TODO: the rest. At this writing (Nov. 26, 2017) I just care about twitter:image:src. 92 | 93 | - (instancetype)initWithURLString:(NSString *)urlString tags:(NSArray *)tags; 94 | 95 | @property (nonatomic, nullable, readonly) NSString *imageURL; // twitter:image:src 96 | 97 | @end 98 | 99 | NS_ASSUME_NONNULL_END 100 | -------------------------------------------------------------------------------- /Sources/ObjC/RSHTMLMetadataParser.h: -------------------------------------------------------------------------------- 1 | // 2 | // RSHTMLMetadataParser.h 3 | // RSParser 4 | // 5 | // Created by Brent Simmons on 3/6/16. 6 | // Copyright © 2016 Ranchero Software, LLC. All rights reserved. 7 | // 8 | 9 | @import Foundation; 10 | 11 | 12 | @class RSHTMLMetadata; 13 | @class ParserData; 14 | 15 | NS_ASSUME_NONNULL_BEGIN 16 | 17 | @interface RSHTMLMetadataParser : NSObject 18 | 19 | + (RSHTMLMetadata *)HTMLMetadataWithParserData:(ParserData *)parserData; 20 | 21 | 22 | @end 23 | 24 | NS_ASSUME_NONNULL_END 25 | -------------------------------------------------------------------------------- /Sources/ObjC/RSHTMLMetadataParser.m: -------------------------------------------------------------------------------- 1 | // 2 | // RSHTMLMetadataParser.m 3 | // RSParser 4 | // 5 | // Created by Brent Simmons on 3/6/16. 6 | // Copyright © 2016 Ranchero Software, LLC. All rights reserved. 7 | // 8 | 9 | #import "RSHTMLMetadataParser.h" 10 | #import "RSHTMLMetadata.h" 11 | #import "RSSAXHTMLParser.h" 12 | #import "RSSAXHTMLParser.h" 13 | #import "RSSAXParser.h" 14 | #import "RSParserInternal.h" 15 | #import "ParserData.h" 16 | #import "RSHTMLTag.h" 17 | 18 | #import 19 | 20 | 21 | @interface RSHTMLMetadataParser () 22 | 23 | @property (nonatomic, readonly) ParserData *parserData; 24 | @property (nonatomic, readwrite) RSHTMLMetadata *metadata; 25 | @property (nonatomic) NSMutableArray *tags; 26 | @property (nonatomic) BOOL didFinishParsing; 27 | @property (nonatomic) BOOL shouldScanPastHeadSection; 28 | 29 | @end 30 | 31 | 32 | @implementation RSHTMLMetadataParser 33 | 34 | 35 | #pragma mark - Class Methods 36 | 37 | + (RSHTMLMetadata *)HTMLMetadataWithParserData:(ParserData *)parserData { 38 | 39 | RSHTMLMetadataParser *parser = [[self alloc] initWithParserData:parserData]; 40 | return parser.metadata; 41 | } 42 | 43 | 44 | #pragma mark - Init 45 | 46 | - (instancetype)initWithParserData:(ParserData *)parserData { 47 | 48 | NSParameterAssert(parserData.data); 49 | NSParameterAssert(parserData.url); 50 | 51 | self = [super init]; 52 | if (!self) { 53 | return nil; 54 | } 55 | 56 | _parserData = parserData; 57 | _tags = [NSMutableArray new]; 58 | 59 | // YouTube has a weird bug where, on some pages, it puts the feed link tag after the head section, in the body section. 60 | // This allows for a special case where we continue to scan after the head section. 61 | // (Yes, this match could yield false positives, but it’s harmless.) 62 | _shouldScanPastHeadSection = [parserData.url rangeOfString:@"youtube" options:NSCaseInsensitiveSearch].location != NSNotFound; 63 | 64 | [self parse]; 65 | 66 | return self; 67 | } 68 | 69 | 70 | #pragma mark - Parse 71 | 72 | - (void)parse { 73 | 74 | RSSAXHTMLParser *parser = [[RSSAXHTMLParser alloc] initWithDelegate:self]; 75 | [parser parseData:self.parserData.data]; 76 | [parser finishParsing]; 77 | 78 | self.metadata = [[RSHTMLMetadata alloc] initWithURLString:self.parserData.url tags:self.tags]; 79 | } 80 | 81 | 82 | static NSString *kHrefKey = @"href"; 83 | static NSString *kSrcKey = @"src"; 84 | static NSString *kRelKey = @"rel"; 85 | 86 | - (NSString *)linkForDictionary:(NSDictionary *)d { 87 | 88 | NSString *link = [d rsparser_objectForCaseInsensitiveKey:kHrefKey]; 89 | if (link) { 90 | return link; 91 | } 92 | 93 | return [d rsparser_objectForCaseInsensitiveKey:kSrcKey]; 94 | } 95 | 96 | - (void)handleLinkAttributes:(NSDictionary *)d { 97 | 98 | if (RSParserStringIsEmpty([d rsparser_objectForCaseInsensitiveKey:kRelKey])) { 99 | return; 100 | } 101 | if (RSParserStringIsEmpty([self linkForDictionary:d])) { 102 | return; 103 | } 104 | 105 | RSHTMLTag *tag = [RSHTMLTag linkTagWithAttributes:d]; 106 | [self.tags addObject:tag]; 107 | } 108 | 109 | - (void)handleMetaAttributes:(NSDictionary *)d { 110 | 111 | RSHTMLTag *tag = [RSHTMLTag metaTagWithAttributes:d]; 112 | [self.tags addObject:tag]; 113 | } 114 | 115 | #pragma mark - RSSAXHTMLParserDelegate 116 | 117 | static const char *kBody = "body"; 118 | static const NSInteger kBodyLength = 5; 119 | static const char *kLink = "link"; 120 | static const NSInteger kLinkLength = 5; 121 | static const char *kMeta = "meta"; 122 | static const NSInteger kMetaLength = 5; 123 | 124 | - (void)saxParser:(RSSAXHTMLParser *)SAXParser XMLStartElement:(const xmlChar *)localName attributes:(const xmlChar **)attributes { 125 | 126 | if (self.didFinishParsing) { 127 | return; 128 | } 129 | 130 | if (RSSAXEqualTags(localName, kBody, kBodyLength) && !self.shouldScanPastHeadSection) { 131 | self.didFinishParsing = YES; 132 | return; 133 | } 134 | 135 | if (RSSAXEqualTags(localName, kLink, kLinkLength)) { 136 | NSDictionary *d = [SAXParser attributesDictionary:attributes]; 137 | if (!RSParserObjectIsEmpty(d)) { 138 | [self handleLinkAttributes:d]; 139 | } 140 | return; 141 | } 142 | 143 | if (RSSAXEqualTags(localName, kMeta, kMetaLength)) { 144 | NSDictionary *d = [SAXParser attributesDictionary:attributes]; 145 | if (!RSParserObjectIsEmpty(d)) { 146 | [self handleMetaAttributes:d]; 147 | } 148 | } 149 | } 150 | 151 | @end 152 | -------------------------------------------------------------------------------- /Sources/ObjC/RSHTMLTag.h: -------------------------------------------------------------------------------- 1 | // 2 | // RSHTMLTag.h 3 | // RSParser 4 | // 5 | // Created by Brent Simmons on 11/26/17. 6 | // Copyright © 2017 Ranchero Software, LLC. All rights reserved. 7 | // 8 | 9 | @import Foundation; 10 | 11 | NS_ASSUME_NONNULL_BEGIN 12 | 13 | extern NSString *RSHTMLTagNameLink; // @"link" 14 | extern NSString *RSHTMLTagNameMeta; // @"meta" 15 | 16 | typedef NS_ENUM(NSInteger, RSHTMLTagType) { 17 | RSHTMLTagTypeLink, 18 | RSHTMLTagTypeMeta 19 | }; 20 | 21 | @interface RSHTMLTag : NSObject 22 | 23 | - (instancetype)init NS_UNAVAILABLE; 24 | - (instancetype)initWithType:(RSHTMLTagType)type attributes:(NSDictionary *)attributes NS_DESIGNATED_INITIALIZER; 25 | 26 | + (RSHTMLTag *)linkTagWithAttributes:(NSDictionary *)attributes; 27 | + (RSHTMLTag *)metaTagWithAttributes:(NSDictionary *)attributes; 28 | 29 | @property (nonatomic, readonly) RSHTMLTagType type; 30 | @property (nonatomic, readonly) NSDictionary *attributes; 31 | 32 | @end 33 | 34 | NS_ASSUME_NONNULL_END 35 | -------------------------------------------------------------------------------- /Sources/ObjC/RSHTMLTag.m: -------------------------------------------------------------------------------- 1 | // 2 | // RSHTMLTag.m 3 | // RSParser 4 | // 5 | // Created by Brent Simmons on 11/26/17. 6 | // Copyright © 2017 Ranchero Software, LLC. All rights reserved. 7 | // 8 | 9 | #import "RSHTMLTag.h" 10 | 11 | NSString *RSHTMLTagNameLink = @"link"; 12 | NSString *RSHTMLTagNameMeta = @"meta"; 13 | 14 | @implementation RSHTMLTag 15 | 16 | - (instancetype)initWithType:(RSHTMLTagType)type attributes:(NSDictionary *)attributes { 17 | 18 | self = [super init]; 19 | if (!self) { 20 | return nil; 21 | } 22 | 23 | _type = type; 24 | _attributes = attributes; 25 | 26 | return self; 27 | } 28 | 29 | + (RSHTMLTag *)linkTagWithAttributes:(NSDictionary *)attributes { 30 | 31 | return [[self alloc] initWithType:RSHTMLTagTypeLink attributes:attributes]; 32 | } 33 | 34 | + (RSHTMLTag *)metaTagWithAttributes:(NSDictionary *)attributes { 35 | 36 | return [[self alloc] initWithType:RSHTMLTagTypeMeta attributes:attributes]; 37 | } 38 | 39 | - (NSString *)description { 40 | return [NSString stringWithFormat:@"<%@: %p> type: %ld attributes: %@", NSStringFromClass([self class]), self, (long)self.type, self.attributes]; 41 | } 42 | 43 | @end 44 | -------------------------------------------------------------------------------- /Sources/ObjC/RSOPMLAttributes.h: -------------------------------------------------------------------------------- 1 | // 2 | // RSOPMLAttributes.h 3 | // RSParser 4 | // 5 | // Created by Brent Simmons on 2/28/16. 6 | // Copyright © 2016 Ranchero Software, LLC. All rights reserved. 7 | // 8 | 9 | @import Foundation; 10 | 11 | NS_ASSUME_NONNULL_BEGIN 12 | 13 | // OPML allows for arbitrary attributes. 14 | // These are the common attributes in OPML files used as RSS subscription lists. 15 | 16 | extern NSString *OPMLTextKey; //text 17 | extern NSString *OPMLTitleKey; //title 18 | extern NSString *OPMLDescriptionKey; //description 19 | extern NSString *OPMLTypeKey; //type 20 | extern NSString *OPMLVersionKey; //version 21 | extern NSString *OPMLHMTLURLKey; //htmlUrl 22 | extern NSString *OPMLXMLURLKey; //xmlUrl 23 | 24 | 25 | @interface NSDictionary (RSOPMLAttributes) 26 | 27 | // A frequent error in OPML files is to mess up the capitalization, 28 | // so these do a case-insensitive lookup. 29 | 30 | @property (nonatomic, nullable, readonly) NSString *opml_text; 31 | @property (nonatomic, nullable, readonly) NSString *opml_title; 32 | @property (nonatomic, nullable, readonly) NSString *opml_description; 33 | @property (nonatomic, nullable, readonly) NSString *opml_type; 34 | @property (nonatomic, nullable, readonly) NSString *opml_version; 35 | @property (nonatomic, nullable, readonly) NSString *opml_htmlUrl; 36 | @property (nonatomic, nullable, readonly) NSString *opml_xmlUrl; 37 | 38 | @end 39 | 40 | NS_ASSUME_NONNULL_END 41 | -------------------------------------------------------------------------------- /Sources/ObjC/RSOPMLAttributes.m: -------------------------------------------------------------------------------- 1 | // 2 | // RSOPMLAttributes.m 3 | // RSParser 4 | // 5 | // Created by Brent Simmons on 2/28/16. 6 | // Copyright © 2016 Ranchero Software, LLC. All rights reserved. 7 | // 8 | 9 | #import "RSOPMLAttributes.h" 10 | #import "RSParserInternal.h" 11 | 12 | 13 | 14 | 15 | NSString *OPMLTextKey = @"text"; 16 | NSString *OPMLTitleKey = @"title"; 17 | NSString *OPMLDescriptionKey = @"description"; 18 | NSString *OPMLTypeKey = @"type"; 19 | NSString *OPMLVersionKey = @"version"; 20 | NSString *OPMLHMTLURLKey = @"htmlUrl"; 21 | NSString *OPMLXMLURLKey = @"xmlUrl"; 22 | 23 | 24 | @implementation NSDictionary (RSOPMLAttributes) 25 | 26 | - (NSString *)opml_text { 27 | 28 | return [self rsparser_objectForCaseInsensitiveKey:OPMLTextKey]; 29 | } 30 | 31 | 32 | - (NSString *)opml_title { 33 | 34 | return [self rsparser_objectForCaseInsensitiveKey:OPMLTitleKey]; 35 | } 36 | 37 | 38 | - (NSString *)opml_description { 39 | 40 | return [self rsparser_objectForCaseInsensitiveKey:OPMLDescriptionKey]; 41 | } 42 | 43 | 44 | - (NSString *)opml_type { 45 | 46 | return [self rsparser_objectForCaseInsensitiveKey:OPMLTypeKey]; 47 | } 48 | 49 | 50 | - (NSString *)opml_version { 51 | 52 | return [self rsparser_objectForCaseInsensitiveKey:OPMLVersionKey]; 53 | } 54 | 55 | 56 | - (NSString *)opml_htmlUrl { 57 | 58 | return [self rsparser_objectForCaseInsensitiveKey:OPMLHMTLURLKey]; 59 | } 60 | 61 | 62 | - (NSString *)opml_xmlUrl { 63 | 64 | return [self rsparser_objectForCaseInsensitiveKey:OPMLXMLURLKey]; 65 | } 66 | 67 | 68 | @end 69 | -------------------------------------------------------------------------------- /Sources/ObjC/RSOPMLDocument.h: -------------------------------------------------------------------------------- 1 | // 2 | // RSOPMLDocument.h 3 | // RSParser 4 | // 5 | // Created by Brent Simmons on 2/28/16. 6 | // Copyright © 2016 Ranchero Software, LLC. All rights reserved. 7 | // 8 | 9 | @import Foundation; 10 | 11 | #import "RSOPMLItem.h" 12 | 13 | 14 | 15 | NS_ASSUME_NONNULL_BEGIN 16 | 17 | @interface RSOPMLDocument : RSOPMLItem 18 | 19 | @property (nonatomic, nullable) NSString *title; 20 | @property (nonatomic, nullable) NSString *url; 21 | 22 | @end 23 | 24 | NS_ASSUME_NONNULL_END 25 | -------------------------------------------------------------------------------- /Sources/ObjC/RSOPMLDocument.m: -------------------------------------------------------------------------------- 1 | // 2 | // RSOPMLDocument.m 3 | // RSParser 4 | // 5 | // Created by Brent Simmons on 2/28/16. 6 | // Copyright © 2016 Ranchero Software, LLC. All rights reserved. 7 | // 8 | 9 | 10 | #import "RSOPMLDocument.h" 11 | 12 | @implementation RSOPMLDocument 13 | 14 | @end 15 | -------------------------------------------------------------------------------- /Sources/ObjC/RSOPMLError.h: -------------------------------------------------------------------------------- 1 | // 2 | // RSOPMLError.h 3 | // RSParser 4 | // 5 | // Created by Brent Simmons on 2/28/16. 6 | // Copyright © 2016 Ranchero Software, LLC. All rights reserved. 7 | // 8 | 9 | @import Foundation; 10 | 11 | NS_ASSUME_NONNULL_BEGIN 12 | 13 | extern NSString *RSOPMLErrorDomain; 14 | 15 | 16 | typedef NS_ENUM(NSInteger, RSOPMLErrorCode) { 17 | RSOPMLErrorCodeDataIsWrongFormat = 1024 18 | }; 19 | 20 | 21 | NSError *RSOPMLWrongFormatError(NSString *fileName); 22 | 23 | NS_ASSUME_NONNULL_END 24 | -------------------------------------------------------------------------------- /Sources/ObjC/RSOPMLError.m: -------------------------------------------------------------------------------- 1 | // 2 | // RSOPMLError.m 3 | // RSParser 4 | // 5 | // Created by Brent Simmons on 2/28/16. 6 | // Copyright © 2016 Ranchero Software, LLC. All rights reserved. 7 | // 8 | 9 | #import "RSOPMLError.h" 10 | 11 | NSString *RSOPMLErrorDomain = @"com.ranchero.OPML"; 12 | 13 | NSError *RSOPMLWrongFormatError(NSString *fileName) { 14 | 15 | NSString *localizedDescriptionFormatString = NSLocalizedString(@"The file ‘%@’ can’t be parsed because it’s not an OPML file.", @"OPML wrong format"); 16 | NSString *localizedDescription = [NSString stringWithFormat:localizedDescriptionFormatString, fileName]; 17 | 18 | NSString *localizedFailureString = NSLocalizedString(@"The file is not an OPML file.", @"OPML wrong format"); 19 | NSDictionary *userInfo = @{NSLocalizedDescriptionKey: localizedDescription, NSLocalizedFailureReasonErrorKey: localizedFailureString}; 20 | 21 | return [[NSError alloc] initWithDomain:RSOPMLErrorDomain code:RSOPMLErrorCodeDataIsWrongFormat userInfo:userInfo]; 22 | } 23 | -------------------------------------------------------------------------------- /Sources/ObjC/RSOPMLFeedSpecifier.h: -------------------------------------------------------------------------------- 1 | // 2 | // RSOPMLFeedSpecifier.h 3 | // RSParser 4 | // 5 | // Created by Brent Simmons on 2/28/16. 6 | // Copyright © 2016 Ranchero Software, LLC. All rights reserved. 7 | // 8 | 9 | @import Foundation; 10 | 11 | NS_ASSUME_NONNULL_BEGIN 12 | 13 | @interface RSOPMLFeedSpecifier : NSObject 14 | 15 | - (instancetype)init NS_UNAVAILABLE; 16 | - (instancetype)initWithTitle:(NSString * _Nullable)title feedDescription:(NSString * _Nullable)feedDescription homePageURL:(NSString * _Nullable)homePageURL feedURL:(NSString *)feedURL NS_DESIGNATED_INITIALIZER; 17 | 18 | @property (nonatomic, nullable, readonly) NSString *title; 19 | @property (nonatomic, nullable, readonly) NSString *feedDescription; 20 | @property (nonatomic, nullable, readonly) NSString *homePageURL; 21 | @property (nonatomic, readonly) NSString *feedURL; 22 | 23 | @end 24 | 25 | NS_ASSUME_NONNULL_END 26 | -------------------------------------------------------------------------------- /Sources/ObjC/RSOPMLFeedSpecifier.m: -------------------------------------------------------------------------------- 1 | // 2 | // RSOPMLFeedSpecifier.m 3 | // RSParser 4 | // 5 | // Created by Brent Simmons on 2/28/16. 6 | // Copyright © 2016 Ranchero Software, LLC. All rights reserved. 7 | // 8 | 9 | #import "RSOPMLFeedSpecifier.h" 10 | #import "RSParserInternal.h" 11 | 12 | 13 | 14 | @implementation RSOPMLFeedSpecifier 15 | 16 | - (instancetype)initWithTitle:(NSString *)title feedDescription:(NSString *)feedDescription homePageURL:(NSString *)homePageURL feedURL:(NSString *)feedURL { 17 | 18 | NSParameterAssert(!RSParserStringIsEmpty(feedURL)); 19 | 20 | self = [super init]; 21 | if (!self) { 22 | return nil; 23 | } 24 | 25 | if (RSParserStringIsEmpty(title)) { 26 | _title = nil; 27 | } 28 | else { 29 | _title = title; 30 | } 31 | 32 | if (RSParserStringIsEmpty(feedDescription)) { 33 | _feedDescription = nil; 34 | } 35 | else { 36 | _feedDescription = feedDescription; 37 | } 38 | 39 | if (RSParserStringIsEmpty(homePageURL)) { 40 | _homePageURL = nil; 41 | } 42 | else { 43 | _homePageURL = homePageURL; 44 | } 45 | 46 | _feedURL = feedURL; 47 | 48 | return self; 49 | } 50 | 51 | @end 52 | -------------------------------------------------------------------------------- /Sources/ObjC/RSOPMLItem.h: -------------------------------------------------------------------------------- 1 | // 2 | // RSOPMLItem.h 3 | // RSParser 4 | // 5 | // Created by Brent Simmons on 2/28/16. 6 | // Copyright © 2016 Ranchero Software, LLC. All rights reserved. 7 | // 8 | 9 | @import Foundation; 10 | 11 | @class RSOPMLFeedSpecifier; 12 | 13 | NS_ASSUME_NONNULL_BEGIN 14 | 15 | @interface RSOPMLItem : NSObject 16 | 17 | @property (nonatomic, nullable) NSDictionary *attributes; 18 | @property (nonatomic, nullable) NSArray *children; 19 | 20 | - (void)addChild:(RSOPMLItem *)child; 21 | 22 | @property (nonatomic, nullable, readonly) RSOPMLFeedSpecifier *feedSpecifier; 23 | 24 | @property (nonatomic, nullable, readonly) NSString *titleFromAttributes; 25 | @property (nonatomic, readonly) BOOL isFolder; 26 | 27 | @end 28 | 29 | NS_ASSUME_NONNULL_END 30 | 31 | -------------------------------------------------------------------------------- /Sources/ObjC/RSOPMLItem.m: -------------------------------------------------------------------------------- 1 | // 2 | // RSOPMLItem.m 3 | // RSParser 4 | // 5 | // Created by Brent Simmons on 2/28/16. 6 | // Copyright © 2016 Ranchero Software, LLC. All rights reserved. 7 | // 8 | 9 | #import "RSOPMLItem.h" 10 | #import "RSOPMLAttributes.h" 11 | #import "RSOPMLFeedSpecifier.h" 12 | #import "RSParserInternal.h" 13 | 14 | 15 | 16 | @interface RSOPMLItem () 17 | 18 | @property (nonatomic) NSMutableArray *mutableChildren; 19 | 20 | @end 21 | 22 | 23 | @implementation RSOPMLItem 24 | 25 | @synthesize children = _children; 26 | @synthesize feedSpecifier = _feedSpecifier; 27 | 28 | 29 | - (NSArray *)children { 30 | 31 | return [self.mutableChildren copy]; 32 | } 33 | 34 | 35 | - (void)setChildren:(NSArray *)children { 36 | 37 | _children = children; 38 | self.mutableChildren = [_children mutableCopy]; 39 | } 40 | 41 | 42 | - (void)addChild:(RSOPMLItem *)child { 43 | 44 | if (!self.mutableChildren) { 45 | self.mutableChildren = [NSMutableArray new]; 46 | } 47 | 48 | [self.mutableChildren addObject:child]; 49 | } 50 | 51 | 52 | - (RSOPMLFeedSpecifier *)feedSpecifier { 53 | 54 | if (_feedSpecifier) { 55 | return _feedSpecifier; 56 | } 57 | 58 | NSString *feedURL = self.attributes.opml_xmlUrl; 59 | if (RSParserObjectIsEmpty(feedURL)) { 60 | return nil; 61 | } 62 | 63 | _feedSpecifier = [[RSOPMLFeedSpecifier alloc] initWithTitle:self.titleFromAttributes feedDescription:self.attributes.opml_description homePageURL:self.attributes.opml_htmlUrl feedURL:feedURL]; 64 | 65 | return _feedSpecifier; 66 | } 67 | 68 | - (NSString *)titleFromAttributes { 69 | 70 | NSString *title = self.attributes.opml_title; 71 | if (title) { 72 | return title; 73 | } 74 | title = self.attributes.opml_text; 75 | if (title) { 76 | return title; 77 | } 78 | 79 | return nil; 80 | } 81 | 82 | - (BOOL)isFolder { 83 | 84 | return self.mutableChildren.count > 0; 85 | } 86 | 87 | @end 88 | -------------------------------------------------------------------------------- /Sources/ObjC/RSOPMLParser.h: -------------------------------------------------------------------------------- 1 | // 2 | // RSOPMLParser.h 3 | // RSParser 4 | // 5 | // Created by Brent Simmons on 7/12/15. 6 | // Copyright © 2015 Ranchero Software, LLC. All rights reserved. 7 | // 8 | 9 | @import Foundation; 10 | 11 | 12 | @class ParserData; 13 | @class RSOPMLDocument; 14 | 15 | NS_ASSUME_NONNULL_BEGIN 16 | 17 | typedef void (^OPMLParserCallback)(RSOPMLDocument * _Nullable opmlDocument, NSError * _Nullable error); 18 | 19 | // Parses on background thread; calls back on main thread. 20 | void RSParseOPML(ParserData *parserData, OPMLParserCallback callback); 21 | 22 | 23 | @interface RSOPMLParser: NSObject 24 | 25 | + (nullable RSOPMLDocument *)parseOPMLWithParserData:(ParserData *)parserData error:(NSError **)error; 26 | 27 | @end 28 | 29 | NS_ASSUME_NONNULL_END 30 | -------------------------------------------------------------------------------- /Sources/ObjC/RSOPMLParser.m: -------------------------------------------------------------------------------- 1 | // 2 | // RSOPMLParser.m 3 | // RSParser 4 | // 5 | // Created by Brent Simmons on 7/12/15. 6 | // Copyright © 2015 Ranchero Software, LLC. All rights reserved. 7 | // 8 | 9 | #import "RSOPMLParser.h" 10 | #import "RSSAXParser.h" 11 | #import "RSOPMLItem.h" 12 | #import "RSOPMLDocument.h" 13 | #import "RSOPMLAttributes.h" 14 | #import "RSOPMLError.h" 15 | #import "RSOPMLParser.h" 16 | #import "ParserData.h" 17 | 18 | #import 19 | 20 | 21 | 22 | @interface RSOPMLParser () 23 | 24 | @property (nonatomic, readwrite) RSOPMLDocument *OPMLDocument; 25 | @property (nonatomic, readwrite) NSError *error; 26 | @property (nonatomic) NSMutableArray *itemStack; 27 | 28 | @end 29 | 30 | void RSParseOPML(ParserData *parserData, OPMLParserCallback callback) { 31 | 32 | dispatch_async(dispatch_get_global_queue(DISPATCH_QUEUE_PRIORITY_DEFAULT, 0), ^{ 33 | 34 | @autoreleasepool { 35 | NSError *error = nil; 36 | RSOPMLDocument *opmlDocument = [RSOPMLParser parseOPMLWithParserData:parserData error:&error]; 37 | 38 | dispatch_async(dispatch_get_main_queue(), ^{ 39 | callback(opmlDocument, error); 40 | }); 41 | } 42 | }); 43 | } 44 | 45 | @implementation RSOPMLParser 46 | 47 | #pragma mark - Class Methods 48 | 49 | + (RSOPMLDocument *)parseOPMLWithParserData:(ParserData *)parserData error:(NSError **)error { 50 | 51 | RSOPMLParser *parser = [[RSOPMLParser alloc] initWithParserData:parserData]; 52 | 53 | RSOPMLDocument *document = parser.OPMLDocument; 54 | document.url = parserData.url; 55 | if (parser.error && error) { 56 | *error = parser.error; 57 | return nil; 58 | } 59 | return document; 60 | } 61 | 62 | #pragma mark - Init 63 | 64 | - (instancetype)initWithParserData:(ParserData *)parserData { 65 | 66 | self = [super init]; 67 | if (!self) { 68 | return nil; 69 | } 70 | 71 | [self parse:parserData]; 72 | 73 | return self; 74 | } 75 | 76 | 77 | #pragma mark - Private 78 | 79 | - (void)parse:(ParserData *)parserData { 80 | 81 | @autoreleasepool { 82 | 83 | if (![self canParseData:parserData.data]) { 84 | 85 | NSString *filename = nil; 86 | NSURL *url = [NSURL URLWithString:parserData.url]; 87 | if (url && url.isFileURL) { 88 | filename = url.path.lastPathComponent; 89 | } 90 | if ([parserData.url hasPrefix:@"http"]) { 91 | filename = parserData.url; 92 | } 93 | if (!filename) { 94 | filename = parserData.url; 95 | } 96 | self.error = RSOPMLWrongFormatError(filename); 97 | return; 98 | } 99 | 100 | RSSAXParser *parser = [[RSSAXParser alloc] initWithDelegate:self]; 101 | 102 | self.itemStack = [NSMutableArray new]; 103 | self.OPMLDocument = [RSOPMLDocument new]; 104 | [self pushItem:self.OPMLDocument]; 105 | 106 | [parser parseData:parserData.data]; 107 | [parser finishParsing]; 108 | } 109 | } 110 | 111 | - (BOOL)canParseData:(NSData *)d { 112 | 113 | // Check for 0, nil); 151 | 152 | /*If itemStack is empty, bad things are happening. 153 | But we still shouldn't crash in production.*/ 154 | 155 | if (self.itemStack.count > 0) { 156 | [self.itemStack removeLastObject]; 157 | } 158 | } 159 | 160 | 161 | - (RSOPMLItem *)currentItem { 162 | 163 | return self.itemStack.lastObject; 164 | } 165 | 166 | 167 | #pragma mark - RSSAXParserDelegate 168 | 169 | static const char *kOutline = "outline"; 170 | static const char kOutlineLength = 8; 171 | 172 | - (void)saxParser:(RSSAXParser *)SAXParser XMLStartElement:(const xmlChar *)localName prefix:(const xmlChar *)prefix uri:(const xmlChar *)uri numberOfNamespaces:(NSInteger)numberOfNamespaces namespaces:(const xmlChar **)namespaces numberOfAttributes:(NSInteger)numberOfAttributes numberDefaulted:(int)numberDefaulted attributes:(const xmlChar **)attributes { 173 | 174 | if (RSSAXEqualTags(localName, kTitle, kTitleLength)) { 175 | [SAXParser beginStoringCharacters]; 176 | return; 177 | } 178 | 179 | if (!RSSAXEqualTags(localName, kOutline, kOutlineLength)) { 180 | return; 181 | } 182 | 183 | RSOPMLItem *item = [RSOPMLItem new]; 184 | item.attributes = [SAXParser attributesDictionary:attributes numberOfAttributes:numberOfAttributes]; 185 | 186 | [[self currentItem] addChild:item]; 187 | [self pushItem:item]; 188 | } 189 | 190 | 191 | - (void)saxParser:(RSSAXParser *)SAXParser XMLEndElement:(const xmlChar *)localName prefix:(const xmlChar *)prefix uri:(const xmlChar *)uri { 192 | 193 | if (RSSAXEqualTags(localName, kTitle, kTitleLength)) { 194 | RSOPMLItem* item = [self currentItem]; 195 | if ([item isKindOfClass:[RSOPMLDocument class]]) { 196 | ((RSOPMLDocument *)item).title = SAXParser.currentStringWithTrimmedWhitespace; 197 | } 198 | return; 199 | } 200 | 201 | if (RSSAXEqualTags(localName, kOutline, kOutlineLength)) { 202 | [self popItem]; 203 | } 204 | } 205 | 206 | 207 | static const char *kText = "text"; 208 | static const NSInteger kTextLength = 5; 209 | 210 | static const char *kTitle = "title"; 211 | static const NSInteger kTitleLength = 6; 212 | 213 | static const char *kDescription = "description"; 214 | static const NSInteger kDescriptionLength = 12; 215 | 216 | static const char *kType = "type"; 217 | static const NSInteger kTypeLength = 5; 218 | 219 | static const char *kVersion = "version"; 220 | static const NSInteger kVersionLength = 8; 221 | 222 | static const char *kHTMLURL = "htmlUrl"; 223 | static const NSInteger kHTMLURLLength = 8; 224 | 225 | static const char *kXMLURL = "xmlUrl"; 226 | static const NSInteger kXMLURLLength = 7; 227 | 228 | - (NSString *)saxParser:(RSSAXParser *)SAXParser internedStringForName:(const xmlChar *)name prefix:(const xmlChar *)prefix { 229 | 230 | if (prefix) { 231 | return nil; 232 | } 233 | 234 | size_t nameLength = strlen((const char *)name); 235 | 236 | if (nameLength == kTextLength - 1) { 237 | if (RSSAXEqualTags(name, kText, kTextLength)) { 238 | return OPMLTextKey; 239 | } 240 | if (RSSAXEqualTags(name, kType, kTypeLength)) { 241 | return OPMLTypeKey; 242 | } 243 | } 244 | 245 | else if (nameLength == kTitleLength - 1) { 246 | if (RSSAXEqualTags(name, kTitle, kTitleLength)) { 247 | return OPMLTitleKey; 248 | } 249 | } 250 | 251 | else if (nameLength == kXMLURLLength - 1) { 252 | if (RSSAXEqualTags(name, kXMLURL, kXMLURLLength)) { 253 | return OPMLXMLURLKey; 254 | } 255 | } 256 | 257 | else if (nameLength == kVersionLength - 1) { 258 | if (RSSAXEqualTags(name, kVersion, kVersionLength)) { 259 | return OPMLVersionKey; 260 | } 261 | if (RSSAXEqualTags(name, kHTMLURL, kHTMLURLLength)) { 262 | return OPMLHMTLURLKey; 263 | } 264 | } 265 | 266 | else if (nameLength == kDescriptionLength - 1) { 267 | if (RSSAXEqualTags(name, kDescription, kDescriptionLength)) { 268 | return OPMLDescriptionKey; 269 | } 270 | } 271 | 272 | return nil; 273 | } 274 | 275 | 276 | static const char *kRSSUppercase = "RSS"; 277 | static const char *kRSSLowercase = "rss"; 278 | static const NSUInteger kRSSLength = 3; 279 | static NSString *RSSUppercaseValue = @"RSS"; 280 | static NSString *RSSLowercaseValue = @"rss"; 281 | static NSString *emptyString = @""; 282 | 283 | static BOOL equalBytes(const void *bytes1, const void *bytes2, NSUInteger length) { 284 | 285 | return memcmp(bytes1, bytes2, length) == 0; 286 | } 287 | 288 | - (NSString *)saxParser:(RSSAXParser *)SAXParser internedStringForValue:(const void *)bytes length:(NSUInteger)length { 289 | 290 | 291 | if (length < 1) { 292 | return emptyString; 293 | } 294 | 295 | if (length == kRSSLength) { 296 | 297 | if (equalBytes(bytes, kRSSUppercase, kRSSLength)) { 298 | return RSSUppercaseValue; 299 | } 300 | else if (equalBytes(bytes, kRSSLowercase, kRSSLength)) { 301 | return RSSLowercaseValue; 302 | } 303 | 304 | } 305 | 306 | return nil; 307 | } 308 | 309 | 310 | @end 311 | -------------------------------------------------------------------------------- /Sources/ObjC/RSParsedArticle.h: -------------------------------------------------------------------------------- 1 | // 2 | // RSParsedArticle.h 3 | // RSParser 4 | // 5 | // Created by Brent Simmons on 12/6/14. 6 | // Copyright (c) 2014 Ranchero Software LLC. All rights reserved. 7 | // 8 | 9 | @import Foundation; 10 | 11 | @class RSParsedEnclosure; 12 | @class RSParsedAuthor; 13 | 14 | NS_ASSUME_NONNULL_BEGIN 15 | 16 | @interface RSParsedArticle : NSObject 17 | 18 | - (instancetype)init NS_UNAVAILABLE; 19 | - (instancetype)initWithFeedURL:(NSString *)feedURL NS_DESIGNATED_INITIALIZER; 20 | 21 | @property (nonatomic, readonly) NSString *feedURL; 22 | @property (nonatomic) NSString *articleID; //guid, if present, or calculated from other attributes. Should be unique to the feed, but not necessarily unique across different feeds. (Not suitable for a database ID.) 23 | 24 | @property (nonatomic, nullable) NSString *guid; 25 | @property (nonatomic, nullable) NSString *title; 26 | @property (nonatomic, nullable) NSString *body; 27 | @property (nonatomic, nullable) NSString *link; 28 | @property (nonatomic, nullable) NSString *permalink; 29 | @property (nonatomic, nullable) NSSet *authors; 30 | @property (nonatomic, nullable) NSSet *enclosures; 31 | @property (nonatomic, nullable) NSDate *datePublished; 32 | @property (nonatomic, nullable) NSDate *dateModified; 33 | @property (nonatomic) NSDate *dateParsed; 34 | @property (nonatomic, nullable) NSString *language; 35 | 36 | - (void)addEnclosure:(RSParsedEnclosure *)enclosure; 37 | - (void)addAuthor:(RSParsedAuthor *)author; 38 | 39 | @end 40 | 41 | NS_ASSUME_NONNULL_END 42 | -------------------------------------------------------------------------------- /Sources/ObjC/RSParsedArticle.m: -------------------------------------------------------------------------------- 1 | // 2 | // RSParsedArticle.m 3 | // RSParser 4 | // 5 | // Created by Brent Simmons on 12/6/14. 6 | // Copyright (c) 2014 Ranchero Software LLC. All rights reserved. 7 | // 8 | 9 | 10 | #import "RSParsedArticle.h" 11 | #import "RSParserInternal.h" 12 | #import "NSString+RSParser.h" 13 | #import "RSParsedAuthor.h" 14 | #import "RSParsedEnclosure.h" 15 | 16 | 17 | 18 | @implementation RSParsedArticle 19 | 20 | 21 | #pragma mark - Init 22 | 23 | - (instancetype)initWithFeedURL:(NSString *)feedURL { 24 | 25 | NSParameterAssert(feedURL != nil); 26 | 27 | self = [super init]; 28 | if (!self) { 29 | return nil; 30 | } 31 | 32 | _feedURL = feedURL; 33 | _dateParsed = [NSDate date]; 34 | 35 | return self; 36 | } 37 | 38 | 39 | #pragma mark - Enclosures 40 | 41 | - (void)addEnclosure:(RSParsedEnclosure *)enclosure { 42 | 43 | if (self.enclosures) { 44 | self.enclosures = [self.enclosures setByAddingObject:enclosure]; 45 | } 46 | else { 47 | self.enclosures = [NSSet setWithObject:enclosure]; 48 | } 49 | } 50 | 51 | #pragma mark - Authors 52 | 53 | - (void)addAuthor:(RSParsedAuthor *)author { 54 | 55 | if (self.authors) { 56 | self.authors = [self.authors setByAddingObject:author]; 57 | } 58 | else { 59 | self.authors = [NSSet setWithObject:author]; 60 | } 61 | } 62 | 63 | #pragma mark - articleID 64 | 65 | - (NSString *)articleID { 66 | 67 | if (self.guid) { 68 | return self.guid; 69 | } 70 | 71 | if (!_articleID) { 72 | _articleID = [self calculatedArticleID]; 73 | } 74 | 75 | return _articleID; 76 | } 77 | 78 | 79 | - (NSString *)calculatedArticleID { 80 | 81 | /*Concatenate a combination of properties when no guid. Then hash the result. 82 | In general, feeds should have guids. When they don't, re-runs are very likely, 83 | because there's no other 100% reliable way to determine identity. 84 | This is intended to create an ID unique inside a feed, but not globally unique. 85 | Not suitable for a database ID, in other words.*/ 86 | 87 | NSMutableString *s = [NSMutableString stringWithString:@""]; 88 | 89 | NSString *datePublishedTimeStampString = nil; 90 | if (self.datePublished) { 91 | datePublishedTimeStampString = [NSString stringWithFormat:@"%.0f", self.datePublished.timeIntervalSince1970]; 92 | } 93 | 94 | // Ideally we have a permalink and a pubDate. Either one would probably be a good guid, but together they should be rock-solid. (In theory. Feeds are buggy, though.) 95 | if (!RSParserStringIsEmpty(self.permalink) && datePublishedTimeStampString) { 96 | [s appendString:self.permalink]; 97 | [s appendString:datePublishedTimeStampString]; 98 | } 99 | 100 | else if (!RSParserStringIsEmpty(self.link) && datePublishedTimeStampString) { 101 | [s appendString:self.link]; 102 | [s appendString:datePublishedTimeStampString]; 103 | } 104 | 105 | else if (!RSParserStringIsEmpty(self.title) && datePublishedTimeStampString) { 106 | [s appendString:self.title]; 107 | [s appendString:datePublishedTimeStampString]; 108 | } 109 | 110 | else if (datePublishedTimeStampString) { 111 | [s appendString:datePublishedTimeStampString]; 112 | } 113 | 114 | else if (!RSParserStringIsEmpty(self.permalink)) { 115 | [s appendString:self.permalink]; 116 | } 117 | 118 | else if (!RSParserStringIsEmpty(self.link)) { 119 | [s appendString:self.link]; 120 | } 121 | 122 | else if (!RSParserStringIsEmpty(self.title)) { 123 | [s appendString:self.title]; 124 | } 125 | 126 | else if (!RSParserStringIsEmpty(self.body)) { 127 | [s appendString:self.body]; 128 | } 129 | 130 | return [s rsparser_md5Hash]; 131 | } 132 | 133 | @end 134 | 135 | -------------------------------------------------------------------------------- /Sources/ObjC/RSParsedAuthor.h: -------------------------------------------------------------------------------- 1 | // 2 | // RSParsedAuthor.h 3 | // RSParserTests 4 | // 5 | // Created by Brent Simmons on 12/19/17. 6 | // Copyright © 2017 Ranchero Software, LLC. All rights reserved. 7 | // 8 | 9 | @import Foundation; 10 | 11 | NS_ASSUME_NONNULL_BEGIN 12 | 13 | @interface RSParsedAuthor : NSObject 14 | 15 | @property (nonatomic, nullable) NSString *name; 16 | @property (nonatomic, nullable) NSString *emailAddress; 17 | @property (nonatomic, nullable) NSString *url; 18 | 19 | + (instancetype)authorWithSingleString:(NSString *)s; // Don’t know which property it is. Guess based on contents of the string. Common with RSS. 20 | 21 | @end 22 | 23 | NS_ASSUME_NONNULL_END 24 | -------------------------------------------------------------------------------- /Sources/ObjC/RSParsedAuthor.m: -------------------------------------------------------------------------------- 1 | // 2 | // RSParsedAuthor.m 3 | // RSParserTests 4 | // 5 | // Created by Brent Simmons on 12/19/17. 6 | // Copyright © 2017 Ranchero Software, LLC. All rights reserved. 7 | // 8 | 9 | #import "NSString+RSParser.h" 10 | 11 | #import "RSParsedAuthor.h" 12 | 13 | @implementation RSParsedAuthor 14 | 15 | + (instancetype)authorWithSingleString:(NSString *)s { 16 | 17 | // The author element in RSS is supposed to be email address — but often it’s a name, and sometimes a URL. 18 | 19 | RSParsedAuthor *author = [[self alloc] init]; 20 | 21 | if ([s rsparser_contains:@"@"]) { 22 | author.emailAddress = s; 23 | } 24 | else if ([s.lowercaseString hasPrefix:@"http"]) { 25 | author.url = s; 26 | } 27 | else { 28 | author.name = s; 29 | } 30 | 31 | return author; 32 | } 33 | 34 | @end 35 | -------------------------------------------------------------------------------- /Sources/ObjC/RSParsedEnclosure.h: -------------------------------------------------------------------------------- 1 | // 2 | // RSParsedEnclosure.h 3 | // RSParser 4 | // 5 | // Created by Brent Simmons on 12/18/17. 6 | // Copyright © 2017 Ranchero Software, LLC. All rights reserved. 7 | // 8 | 9 | @import Foundation; 10 | 11 | NS_ASSUME_NONNULL_BEGIN 12 | 13 | @interface RSParsedEnclosure : NSObject 14 | 15 | @property (nonatomic) NSString *url; 16 | @property (nonatomic) NSInteger length; 17 | @property (nonatomic, nullable) NSString *mimeType; 18 | @property (nonatomic, nullable) NSString *title; 19 | 20 | - (instancetype)init NS_UNAVAILABLE; 21 | - (instancetype)initWithURLString:(NSString *)urlString NS_DESIGNATED_INITIALIZER; 22 | 23 | @end 24 | 25 | NS_ASSUME_NONNULL_END 26 | -------------------------------------------------------------------------------- /Sources/ObjC/RSParsedEnclosure.m: -------------------------------------------------------------------------------- 1 | // 2 | // RSParsedEnclosure.m 3 | // RSParser 4 | // 5 | // Created by Brent Simmons on 12/18/17. 6 | // Copyright © 2017 Ranchero Software, LLC. All rights reserved. 7 | // 8 | 9 | #import "RSParsedEnclosure.h" 10 | 11 | @implementation RSParsedEnclosure 12 | 13 | - (instancetype)initWithURLString:(NSString *)urlString { 14 | 15 | self = [super init]; 16 | if (!self) { 17 | return nil; 18 | } 19 | 20 | _url = urlString; 21 | 22 | return self; 23 | } 24 | 25 | @end 26 | -------------------------------------------------------------------------------- /Sources/ObjC/RSParsedFeed.h: -------------------------------------------------------------------------------- 1 | // 2 | // RSParsedFeed.h 3 | // RSParser 4 | // 5 | // Created by Brent Simmons on 7/12/15. 6 | // Copyright © 2015 Ranchero Software, LLC. All rights reserved. 7 | // 8 | 9 | @import Foundation; 10 | 11 | @class RSParsedArticle; 12 | 13 | NS_ASSUME_NONNULL_BEGIN 14 | 15 | @interface RSParsedFeed : NSObject 16 | 17 | - (instancetype)init NS_UNAVAILABLE; 18 | - (instancetype)initWithURLString:(NSString *)urlString title:(NSString * _Nullable)title link:(NSString * _Nullable)link language:(NSString * _Nullable)language articles:(NSArray *)articles NS_DESIGNATED_INITIALIZER; 19 | 20 | @property (nonatomic, readonly) NSString *urlString; 21 | @property (nonatomic, readonly, nullable) NSString *title; 22 | @property (nonatomic, readonly, nullable) NSString *link; 23 | @property (nonatomic, readonly, nullable) NSString *language; 24 | @property (nonatomic, readonly) NSSet *articles; 25 | 26 | @end 27 | 28 | NS_ASSUME_NONNULL_END 29 | -------------------------------------------------------------------------------- /Sources/ObjC/RSParsedFeed.m: -------------------------------------------------------------------------------- 1 | // 2 | // RSParsedFeed.m 3 | // RSParser 4 | // 5 | // Created by Brent Simmons on 7/12/15. 6 | // Copyright © 2015 Ranchero Software, LLC. All rights reserved. 7 | // 8 | 9 | #import "RSParsedFeed.h" 10 | 11 | 12 | 13 | @implementation RSParsedFeed 14 | 15 | - (instancetype)initWithURLString:(NSString *)urlString title:(NSString *)title link:(NSString *)link language:(NSString *)language articles:(NSSet *)articles { 16 | 17 | self = [super init]; 18 | if (!self) { 19 | return nil; 20 | } 21 | 22 | _urlString = urlString; 23 | _title = title; 24 | _link = link; 25 | _language = language; 26 | _articles = articles; 27 | 28 | return self; 29 | } 30 | 31 | 32 | @end 33 | -------------------------------------------------------------------------------- /Sources/ObjC/RSParserInternal.h: -------------------------------------------------------------------------------- 1 | // 2 | // RSParserInternal.h 3 | // RSParser 4 | // 5 | // Created by Brent Simmons on 12/26/16. 6 | // Copyright © 2016 Ranchero Software, LLC. All rights reserved. 7 | // 8 | 9 | @import Foundation; 10 | 11 | NS_ASSUME_NONNULL_BEGIN 12 | 13 | BOOL RSParserObjectIsEmpty(id _Nullable obj); 14 | BOOL RSParserStringIsEmpty(NSString * _Nullable s); 15 | 16 | 17 | @interface NSDictionary (RSParserInternal) 18 | 19 | - (nullable id)rsparser_objectForCaseInsensitiveKey:(NSString *)key; 20 | 21 | @end 22 | 23 | NS_ASSUME_NONNULL_END 24 | 25 | -------------------------------------------------------------------------------- /Sources/ObjC/RSParserInternal.m: -------------------------------------------------------------------------------- 1 | // 2 | // RSParserInternal.m 3 | // RSParser 4 | // 5 | // Created by Brent Simmons on 12/26/16. 6 | // Copyright © 2016 Ranchero Software, LLC. All rights reserved. 7 | // 8 | 9 | 10 | #import "RSParserInternal.h" 11 | #import 12 | 13 | 14 | static BOOL RSParserIsNil(id obj) { 15 | 16 | return obj == nil || obj == [NSNull null]; 17 | } 18 | 19 | BOOL RSParserObjectIsEmpty(id obj) { 20 | 21 | if (RSParserIsNil(obj)) { 22 | return YES; 23 | } 24 | 25 | if ([obj respondsToSelector:@selector(count)]) { 26 | return [obj count] < 1; 27 | } 28 | 29 | if ([obj respondsToSelector:@selector(length)]) { 30 | return [obj length] < 1; 31 | } 32 | 33 | return NO; /*Shouldn't get here very often.*/ 34 | } 35 | 36 | BOOL RSParserStringIsEmpty(NSString *s) { 37 | 38 | return RSParserIsNil(s) || s.length < 1; 39 | } 40 | 41 | 42 | @implementation NSDictionary (RSParserInternal) 43 | 44 | - (nullable id)rsparser_objectForCaseInsensitiveKey:(NSString *)key { 45 | 46 | id obj = self[key]; 47 | if (obj) { 48 | return obj; 49 | } 50 | 51 | for (NSString *oneKey in self.allKeys) { 52 | 53 | if ([oneKey isKindOfClass:[NSString class]] && [key caseInsensitiveCompare:oneKey] == NSOrderedSame) { 54 | return self[oneKey]; 55 | } 56 | } 57 | 58 | return nil; 59 | } 60 | 61 | @end 62 | -------------------------------------------------------------------------------- /Sources/ObjC/RSRSSParser.h: -------------------------------------------------------------------------------- 1 | // 2 | // RSRSSParser.h 3 | // RSParser 4 | // 5 | // Created by Brent Simmons on 1/6/15. 6 | // Copyright (c) 2015 Ranchero Software LLC. All rights reserved. 7 | // 8 | 9 | @import Foundation; 10 | 11 | @class ParserData; 12 | @class RSParsedFeed; 13 | 14 | NS_ASSUME_NONNULL_BEGIN 15 | 16 | @interface RSRSSParser : NSObject 17 | 18 | + (RSParsedFeed *)parseFeedWithData:(ParserData *)parserData; 19 | 20 | 21 | @end 22 | 23 | NS_ASSUME_NONNULL_END 24 | -------------------------------------------------------------------------------- /Sources/ObjC/RSSAXHTMLParser.h: -------------------------------------------------------------------------------- 1 | // 2 | // RSSAXHTMLParser.h 3 | // RSParser 4 | // 5 | // Created by Brent Simmons on 3/6/16. 6 | // Copyright © 2016 Ranchero Software, LLC. All rights reserved. 7 | // 8 | 9 | @import Foundation; 10 | 11 | NS_ASSUME_NONNULL_BEGIN 12 | 13 | @class RSSAXHTMLParser; 14 | 15 | @protocol RSSAXHTMLParserDelegate 16 | 17 | @optional 18 | 19 | - (void)saxParser:(RSSAXHTMLParser *)SAXParser XMLStartElement:(const unsigned char * _Nullable)localName attributes:(const unsigned char * _Nullable * _Nullable)attributes; 20 | 21 | - (void)saxParser:(RSSAXHTMLParser *)SAXParser XMLEndElement:(const unsigned char * _Nullable)localName; 22 | 23 | // Length is guaranteed to be greater than 0. 24 | - (void)saxParser:(RSSAXHTMLParser *)SAXParser XMLCharactersFound:(const unsigned char * _Nullable)characters length:(NSUInteger)length; 25 | 26 | - (void)saxParserDidReachEndOfDocument:(RSSAXHTMLParser *)SAXParser; // If canceled, may not get called (but might). 27 | 28 | @end 29 | 30 | 31 | @interface RSSAXHTMLParser : NSObject 32 | 33 | 34 | - (instancetype)initWithDelegate:(id)delegate; 35 | 36 | - (void)parseData:(NSData *)data; 37 | - (void)parseBytes:(const void *)bytes numberOfBytes:(NSUInteger)numberOfBytes; 38 | - (void)finishParsing; 39 | - (void)cancel; 40 | 41 | @property (nullable, nonatomic, strong, readonly) NSData *currentCharacters; // nil if not storing characters. UTF-8 encoded. 42 | @property (nullable, nonatomic, strong, readonly) NSString *currentString; // Convenience to get string version of currentCharacters. 43 | @property (nullable, nonatomic, strong, readonly) NSString *currentStringWithTrimmedWhitespace; 44 | 45 | - (void)beginStoringCharacters; // Delegate can call from XMLStartElement. Characters will be available in XMLEndElement as currentCharacters property. Storing characters is stopped after each XMLEndElement. 46 | 47 | // Delegate can call from within XMLStartElement. 48 | 49 | - (nullable NSDictionary *)attributesDictionary:(const unsigned char * _Nullable * _Nullable)attributes; 50 | 51 | 52 | @end 53 | 54 | NS_ASSUME_NONNULL_END 55 | 56 | -------------------------------------------------------------------------------- /Sources/ObjC/RSSAXHTMLParser.m: -------------------------------------------------------------------------------- 1 | // 2 | // RSSAXHTMLParser.m 3 | // RSParser 4 | // 5 | // Created by Brent Simmons on 3/6/16. 6 | // Copyright © 2016 Ranchero Software, LLC. All rights reserved. 7 | // 8 | 9 | #import "RSSAXHTMLParser.h" 10 | #import "RSSAXParser.h" 11 | #import "RSParserInternal.h" 12 | 13 | #import 14 | #import 15 | #import 16 | 17 | 18 | 19 | @interface RSSAXHTMLParser () 20 | 21 | @property (nonatomic) id delegate; 22 | @property (nonatomic, assign) htmlParserCtxtPtr context; 23 | @property (nonatomic, assign) BOOL storingCharacters; 24 | @property (nonatomic) NSMutableData *characters; 25 | @property (nonatomic) BOOL delegateRespondsToStartElementMethod; 26 | @property (nonatomic) BOOL delegateRespondsToEndElementMethod; 27 | @property (nonatomic) BOOL delegateRespondsToCharactersFoundMethod; 28 | @property (nonatomic) BOOL delegateRespondsToEndOfDocumentMethod; 29 | 30 | @end 31 | 32 | 33 | @implementation RSSAXHTMLParser 34 | 35 | 36 | + (void)initialize { 37 | 38 | RSSAXInitLibXMLParser(); 39 | } 40 | 41 | 42 | #pragma mark - Init 43 | 44 | - (instancetype)initWithDelegate:(id)delegate { 45 | 46 | self = [super init]; 47 | if (self == nil) 48 | return nil; 49 | 50 | _delegate = delegate; 51 | 52 | if ([_delegate respondsToSelector:@selector(saxParser:XMLStartElement:attributes:)]) { 53 | _delegateRespondsToStartElementMethod = YES; 54 | } 55 | if ([_delegate respondsToSelector:@selector(saxParser:XMLEndElement:)]) { 56 | _delegateRespondsToEndElementMethod = YES; 57 | } 58 | if ([_delegate respondsToSelector:@selector(saxParser:XMLCharactersFound:length:)]) { 59 | _delegateRespondsToCharactersFoundMethod = YES; 60 | } 61 | if ([_delegate respondsToSelector:@selector(saxParserDidReachEndOfDocument:)]) { 62 | _delegateRespondsToEndOfDocumentMethod = YES; 63 | } 64 | 65 | return self; 66 | } 67 | 68 | 69 | #pragma mark - Dealloc 70 | 71 | - (void)dealloc { 72 | 73 | if (_context != nil) { 74 | htmlFreeParserCtxt(_context); 75 | _context = nil; 76 | } 77 | _delegate = nil; 78 | } 79 | 80 | 81 | #pragma mark - API 82 | 83 | static xmlSAXHandler saxHandlerStruct; 84 | 85 | - (void)parseData:(NSData *)data { 86 | 87 | [self parseBytes:data.bytes numberOfBytes:data.length]; 88 | } 89 | 90 | 91 | - (void)parseBytes:(const void *)bytes numberOfBytes:(NSUInteger)numberOfBytes { 92 | 93 | if (self.context == nil) { 94 | 95 | xmlCharEncoding characterEncoding = xmlDetectCharEncoding(bytes, (int)numberOfBytes); 96 | self.context = htmlCreatePushParserCtxt(&saxHandlerStruct, (__bridge void *)self, nil, 0, nil, characterEncoding); 97 | htmlCtxtUseOptions(self.context, XML_PARSE_RECOVER | XML_PARSE_NONET | HTML_PARSE_COMPACT); 98 | } 99 | 100 | @autoreleasepool { 101 | htmlParseChunk(self.context, (const char *)bytes, (int)numberOfBytes, 0); 102 | } 103 | } 104 | 105 | 106 | - (void)finishParsing { 107 | 108 | NSAssert(self.context != nil, nil); 109 | if (self.context == nil) 110 | return; 111 | 112 | @autoreleasepool { 113 | htmlParseChunk(self.context, nil, 0, 1); 114 | htmlFreeParserCtxt(self.context); 115 | self.context = nil; 116 | self.characters = nil; 117 | } 118 | } 119 | 120 | 121 | - (void)cancel { 122 | 123 | @autoreleasepool { 124 | xmlStopParser(self.context); 125 | } 126 | } 127 | 128 | 129 | 130 | - (void)beginStoringCharacters { 131 | self.storingCharacters = YES; 132 | self.characters = [NSMutableData new]; 133 | } 134 | 135 | 136 | - (void)endStoringCharacters { 137 | self.storingCharacters = NO; 138 | self.characters = nil; 139 | } 140 | 141 | 142 | - (NSData *)currentCharacters { 143 | 144 | if (!self.storingCharacters) { 145 | return nil; 146 | } 147 | 148 | return self.characters; 149 | } 150 | 151 | 152 | - (NSString *)currentString { 153 | 154 | NSData *d = self.currentCharacters; 155 | if (RSParserObjectIsEmpty(d)) { 156 | return nil; 157 | } 158 | 159 | return [[NSString alloc] initWithData:d encoding:NSUTF8StringEncoding]; 160 | } 161 | 162 | 163 | - (NSString *)currentStringWithTrimmedWhitespace { 164 | 165 | return [self.currentString stringByTrimmingCharactersInSet:[NSCharacterSet whitespaceAndNewlineCharacterSet]]; 166 | } 167 | 168 | 169 | #pragma mark - Attributes Dictionary 170 | 171 | - (NSDictionary *)attributesDictionary:(const xmlChar **)attributes { 172 | 173 | if (!attributes) { 174 | return nil; 175 | } 176 | 177 | NSMutableDictionary *d = [NSMutableDictionary new]; 178 | 179 | NSInteger ix = 0; 180 | NSString *currentKey = nil; 181 | while (true) { 182 | 183 | const xmlChar *oneAttribute = attributes[ix]; 184 | ix++; 185 | 186 | if (!currentKey && !oneAttribute) { 187 | break; 188 | } 189 | 190 | if (!currentKey) { 191 | currentKey = [NSString stringWithUTF8String:(const char *)oneAttribute]; 192 | } 193 | else { 194 | NSString *value = nil; 195 | if (oneAttribute) { 196 | value = [NSString stringWithUTF8String:(const char *)oneAttribute]; 197 | } 198 | 199 | d[currentKey] = value ? value : @""; 200 | currentKey = nil; 201 | } 202 | } 203 | 204 | return [d copy]; 205 | } 206 | 207 | 208 | #pragma mark - Callbacks 209 | 210 | - (void)xmlEndDocument { 211 | 212 | @autoreleasepool { 213 | if (self.delegateRespondsToEndOfDocumentMethod) { 214 | [self.delegate saxParserDidReachEndOfDocument:self]; 215 | } 216 | 217 | [self endStoringCharacters]; 218 | } 219 | } 220 | 221 | 222 | - (void)xmlCharactersFound:(const xmlChar *)ch length:(NSUInteger)length { 223 | 224 | if (length < 1) { 225 | return; 226 | } 227 | 228 | @autoreleasepool { 229 | if (self.storingCharacters) { 230 | [self.characters appendBytes:(const void *)ch length:length]; 231 | } 232 | 233 | if (self.delegateRespondsToCharactersFoundMethod) { 234 | [self.delegate saxParser:self XMLCharactersFound:ch length:length]; 235 | } 236 | } 237 | } 238 | 239 | 240 | - (void)xmlStartElement:(const xmlChar *)localName attributes:(const xmlChar **)attributes { 241 | 242 | @autoreleasepool { 243 | if (self.delegateRespondsToStartElementMethod) { 244 | 245 | [self.delegate saxParser:self XMLStartElement:localName attributes:attributes]; 246 | } 247 | } 248 | } 249 | 250 | 251 | - (void)xmlEndElement:(const xmlChar *)localName { 252 | 253 | @autoreleasepool { 254 | if (self.delegateRespondsToEndElementMethod) { 255 | [self.delegate saxParser:self XMLEndElement:localName]; 256 | } 257 | 258 | [self endStoringCharacters]; 259 | } 260 | } 261 | 262 | 263 | @end 264 | 265 | 266 | static void startElementSAX(void *context, const xmlChar *localname, const xmlChar **attributes) { 267 | 268 | [(__bridge RSSAXHTMLParser *)context xmlStartElement:localname attributes:attributes]; 269 | } 270 | 271 | 272 | static void endElementSAX(void *context, const xmlChar *localname) { 273 | [(__bridge RSSAXHTMLParser *)context xmlEndElement:localname]; 274 | } 275 | 276 | 277 | static void charactersFoundSAX(void *context, const xmlChar *ch, int len) { 278 | [(__bridge RSSAXHTMLParser *)context xmlCharactersFound:ch length:(NSUInteger)len]; 279 | } 280 | 281 | 282 | static void endDocumentSAX(void *context) { 283 | [(__bridge RSSAXHTMLParser *)context xmlEndDocument]; 284 | } 285 | 286 | 287 | static htmlSAXHandler saxHandlerStruct = { 288 | nil, /* internalSubset */ 289 | nil, /* isStandalone */ 290 | nil, /* hasInternalSubset */ 291 | nil, /* hasExternalSubset */ 292 | nil, /* resolveEntity */ 293 | nil, /* getEntity */ 294 | nil, /* entityDecl */ 295 | nil, /* notationDecl */ 296 | nil, /* attributeDecl */ 297 | nil, /* elementDecl */ 298 | nil, /* unparsedEntityDecl */ 299 | nil, /* setDocumentLocator */ 300 | nil, /* startDocument */ 301 | endDocumentSAX, /* endDocument */ 302 | startElementSAX, /* startElement*/ 303 | endElementSAX, /* endElement */ 304 | nil, /* reference */ 305 | charactersFoundSAX, /* characters */ 306 | nil, /* ignorableWhitespace */ 307 | nil, /* processingInstruction */ 308 | nil, /* comment */ 309 | nil, /* warning */ 310 | nil, /* error */ 311 | nil, /* fatalError //: unused error() get all the errors */ 312 | nil, /* getParameterEntity */ 313 | nil, /* cdataBlock */ 314 | nil, /* externalSubset */ 315 | XML_SAX2_MAGIC, 316 | nil, 317 | nil, /* startElementNs */ 318 | nil, /* endElementNs */ 319 | nil /* serror */ 320 | }; 321 | 322 | -------------------------------------------------------------------------------- /Sources/ObjC/RSSAXParser.h: -------------------------------------------------------------------------------- 1 | // 2 | // RSSAXParser.h 3 | // RSParser 4 | // 5 | // Created by Brent Simmons on 3/25/15. 6 | // Copyright (c) 2015 Ranchero Software, LLC. All rights reserved. 7 | // 8 | 9 | @import Foundation; 10 | 11 | NS_ASSUME_NONNULL_BEGIN 12 | 13 | /*Thread-safe, not re-entrant. 14 | 15 | Calls to the delegate will happen on the same thread where the parser runs. 16 | 17 | This is a low-level streaming XML parser, a thin wrapper for libxml2's SAX parser. It doesn't do much Foundation-ifying quite on purpose -- because the goal is performance and low memory use. 18 | 19 | This class is not meant to be sub-classed. Use the delegate methods. 20 | */ 21 | 22 | 23 | @class RSSAXParser; 24 | 25 | @protocol RSSAXParserDelegate 26 | 27 | @optional 28 | 29 | - (void)saxParser:(RSSAXParser *)SAXParser XMLStartElement:(const unsigned char * _Nullable)localName prefix:(const unsigned char * _Nullable)prefix uri:(const unsigned char * _Nullable)uri numberOfNamespaces:(NSInteger)numberOfNamespaces namespaces:(const unsigned char * _Nullable * _Nullable)namespaces numberOfAttributes:(NSInteger)numberOfAttributes numberDefaulted:(int)numberDefaulted attributes:(const unsigned char * _Nullable * _Nullable)attributes; 30 | 31 | - (void)saxParser:(RSSAXParser *)SAXParser XMLEndElement:(const unsigned char * _Nullable)localName prefix:(const unsigned char * _Nullable)prefix uri:(const unsigned char * _Nullable)uri; 32 | 33 | // Length is guaranteed to be greater than 0. 34 | - (void)saxParser:(RSSAXParser *)SAXParser XMLCharactersFound:(const unsigned char * _Nullable)characters length:(NSUInteger)length; 35 | 36 | - (void)saxParserDidReachEndOfDocument:(RSSAXParser *)SAXParser; /*If canceled, may not get called (but might).*/ 37 | 38 | - (nullable NSString *)saxParser:(RSSAXParser *)SAXParser internedStringForName:(const unsigned char * _Nullable)name prefix:(const unsigned char * _Nullable)prefix; /*Okay to return nil. Prefix may be nil.*/ 39 | 40 | - (nullable NSString *)saxParser:(RSSAXParser *)SAXParser internedStringForValue:(const void * _Nullable)bytes length:(NSUInteger)length; 41 | 42 | @end 43 | 44 | 45 | void RSSAXInitLibXMLParser(void); // Needed by RSSAXHTMLParser. 46 | 47 | /*For use by delegate.*/ 48 | 49 | BOOL RSSAXEqualTags(const unsigned char *localName, const char *tag, NSInteger tagLength); 50 | 51 | 52 | @interface RSSAXParser : NSObject 53 | 54 | - (instancetype)initWithDelegate:(id)delegate; 55 | 56 | - (void)parseData:(NSData *)data; 57 | - (void)parseBytes:(const void *)bytes numberOfBytes:(NSUInteger)numberOfBytes; 58 | - (void)finishParsing; 59 | - (void)cancel; 60 | 61 | @property (nonatomic, strong, nullable, readonly) NSData *currentCharacters; /*nil if not storing characters. UTF-8 encoded.*/ 62 | @property (nonatomic, strong, nullable, readonly) NSString *currentString; /*Convenience to get string version of currentCharacters.*/ 63 | @property (nonatomic, strong, nullable, readonly) NSString *currentStringWithTrimmedWhitespace; 64 | 65 | - (void)beginStoringCharacters; /*Delegate can call from XMLStartElement. Characters will be available in XMLEndElement as currentCharacters property. Storing characters is stopped after each XMLEndElement.*/ 66 | 67 | /*Delegate can call from within XMLStartElement. Returns nil if numberOfAttributes < 1.*/ 68 | 69 | - (NSDictionary *)attributesDictionary:(const unsigned char * _Nullable * _Nullable)attributes numberOfAttributes:(NSInteger)numberOfAttributes; 70 | 71 | @end 72 | 73 | NS_ASSUME_NONNULL_END 74 | -------------------------------------------------------------------------------- /Sources/ObjC/RSSAXParser.m: -------------------------------------------------------------------------------- 1 | // 2 | // RSSAXParser.m 3 | // RSParser 4 | // 5 | // Created by Brent Simmons on 3/25/15. 6 | // Copyright (c) 2015 Ranchero Software, LLC. All rights reserved. 7 | // 8 | 9 | #import "RSSAXParser.h" 10 | #import "RSParserInternal.h" 11 | 12 | #import 13 | #import 14 | #import 15 | 16 | 17 | 18 | @interface RSSAXParser () 19 | 20 | @property (nonatomic, weak) id delegate; 21 | @property (nonatomic, assign) xmlParserCtxtPtr context; 22 | @property (nonatomic, assign) BOOL storingCharacters; 23 | @property (nonatomic) NSMutableData *characters; 24 | @property (nonatomic) BOOL delegateRespondsToInternedStringMethod; 25 | @property (nonatomic) BOOL delegateRespondsToInternedStringForValueMethod; 26 | @property (nonatomic) BOOL delegateRespondsToStartElementMethod; 27 | @property (nonatomic) BOOL delegateRespondsToEndElementMethod; 28 | @property (nonatomic) BOOL delegateRespondsToCharactersFoundMethod; 29 | @property (nonatomic) BOOL delegateRespondsToEndOfDocumentMethod; 30 | 31 | @end 32 | 33 | 34 | @implementation RSSAXParser 35 | 36 | + (void)initialize { 37 | 38 | RSSAXInitLibXMLParser(); 39 | } 40 | 41 | 42 | #pragma mark - Init 43 | 44 | - (instancetype)initWithDelegate:(id)delegate { 45 | 46 | self = [super init]; 47 | if (self == nil) 48 | return nil; 49 | 50 | _delegate = delegate; 51 | 52 | if ([_delegate respondsToSelector:@selector(saxParser:internedStringForName:prefix:)]) { 53 | _delegateRespondsToInternedStringMethod = YES; 54 | } 55 | if ([_delegate respondsToSelector:@selector(saxParser:internedStringForValue:length:)]) { 56 | _delegateRespondsToInternedStringForValueMethod = YES; 57 | } 58 | if ([_delegate respondsToSelector:@selector(saxParser:XMLStartElement:prefix:uri:numberOfNamespaces:namespaces:numberOfAttributes:numberDefaulted:attributes:)]) { 59 | _delegateRespondsToStartElementMethod = YES; 60 | } 61 | if ([_delegate respondsToSelector:@selector(saxParser:XMLEndElement:prefix:uri:)]) { 62 | _delegateRespondsToEndElementMethod = YES; 63 | } 64 | if ([_delegate respondsToSelector:@selector(saxParser:XMLCharactersFound:length:)]) { 65 | _delegateRespondsToCharactersFoundMethod = YES; 66 | } 67 | if ([_delegate respondsToSelector:@selector(saxParserDidReachEndOfDocument:)]) { 68 | _delegateRespondsToEndOfDocumentMethod = YES; 69 | } 70 | 71 | return self; 72 | } 73 | 74 | 75 | #pragma mark - Dealloc 76 | 77 | - (void)dealloc { 78 | if (_context != nil) { 79 | xmlFreeParserCtxt(_context); 80 | _context = nil; 81 | } 82 | _delegate = nil; 83 | } 84 | 85 | 86 | #pragma mark - API 87 | 88 | static xmlSAXHandler saxHandlerStruct; 89 | 90 | - (void)parseData:(NSData *)data { 91 | 92 | [self parseBytes:data.bytes numberOfBytes:data.length]; 93 | } 94 | 95 | 96 | - (void)parseBytes:(const void *)bytes numberOfBytes:(NSUInteger)numberOfBytes { 97 | 98 | if (self.context == nil) { 99 | 100 | self.context = xmlCreatePushParserCtxt(&saxHandlerStruct, (__bridge void *)self, nil, 0, nil); 101 | xmlCtxtUseOptions(self.context, XML_PARSE_RECOVER | XML_PARSE_NOENT); 102 | } 103 | 104 | @autoreleasepool { 105 | xmlParseChunk(self.context, (const char *)bytes, (int)numberOfBytes, 0); 106 | } 107 | } 108 | 109 | 110 | - (void)finishParsing { 111 | 112 | NSAssert(self.context != nil, nil); 113 | if (self.context == nil) 114 | return; 115 | 116 | @autoreleasepool { 117 | xmlParseChunk(self.context, nil, 0, 1); 118 | xmlFreeParserCtxt(self.context); 119 | self.context = nil; 120 | self.characters = nil; 121 | } 122 | } 123 | 124 | 125 | - (void)cancel { 126 | 127 | @autoreleasepool { 128 | xmlStopParser(self.context); 129 | } 130 | } 131 | 132 | 133 | - (void)beginStoringCharacters { 134 | self.storingCharacters = YES; 135 | self.characters = [NSMutableData new]; 136 | } 137 | 138 | 139 | - (void)endStoringCharacters { 140 | self.storingCharacters = NO; 141 | self.characters = nil; 142 | } 143 | 144 | 145 | - (NSData *)currentCharacters { 146 | 147 | if (!self.storingCharacters) { 148 | return nil; 149 | } 150 | 151 | return self.characters; 152 | } 153 | 154 | 155 | - (NSString *)currentString { 156 | 157 | NSData *d = self.currentCharacters; 158 | if (RSParserObjectIsEmpty(d)) { 159 | return nil; 160 | } 161 | 162 | return [[NSString alloc] initWithData:d encoding:NSUTF8StringEncoding]; 163 | } 164 | 165 | 166 | - (NSString *)currentStringWithTrimmedWhitespace { 167 | 168 | return [self.currentString stringByTrimmingCharactersInSet:[NSCharacterSet whitespaceAndNewlineCharacterSet]]; 169 | } 170 | 171 | 172 | #pragma mark - Attributes Dictionary 173 | 174 | - (NSDictionary *)attributesDictionary:(const xmlChar **)attributes numberOfAttributes:(NSInteger)numberOfAttributes { 175 | 176 | if (numberOfAttributes < 1 || !attributes) { 177 | return nil; 178 | } 179 | 180 | NSMutableDictionary *d = [NSMutableDictionary new]; 181 | 182 | @autoreleasepool { 183 | NSInteger i = 0, j = 0; 184 | for (i = 0, j = 0; i < numberOfAttributes; i++, j+=5) { 185 | 186 | NSUInteger lenValue = (NSUInteger)(attributes[j + 4] - attributes[j + 3]); 187 | NSString *value = nil; 188 | 189 | if (self.delegateRespondsToInternedStringForValueMethod) { 190 | value = [self.delegate saxParser:self internedStringForValue:(const void *)attributes[j + 3] length:lenValue]; 191 | } 192 | if (!value) { 193 | value = [[NSString alloc] initWithBytes:(const void *)attributes[j + 3] length:lenValue encoding:NSUTF8StringEncoding]; 194 | } 195 | 196 | NSString *attributeName = nil; 197 | 198 | if (self.delegateRespondsToInternedStringMethod) { 199 | attributeName = [self.delegate saxParser:self internedStringForName:(const xmlChar *)attributes[j] prefix:(const xmlChar *)attributes[j + 1]]; 200 | } 201 | 202 | if (!attributeName) { 203 | attributeName = [NSString stringWithUTF8String:(const char *)attributes[j]]; 204 | if (attributes[j + 1]) { 205 | NSString *attributePrefix = [NSString stringWithUTF8String:(const char *)attributes[j + 1]]; 206 | attributeName = [NSString stringWithFormat:@"%@:%@", attributePrefix, attributeName]; 207 | } 208 | } 209 | 210 | if (value && attributeName) { 211 | d[attributeName] = value; 212 | } 213 | } 214 | } 215 | 216 | return d; 217 | } 218 | 219 | 220 | #pragma mark - Equal Tags 221 | 222 | BOOL RSSAXEqualTags(const xmlChar *localName, const char *tag, NSInteger tagLength) { 223 | 224 | if (!localName) { 225 | return NO; 226 | } 227 | return !strncmp((const char *)localName, tag, (size_t)tagLength); 228 | } 229 | 230 | 231 | #pragma mark - Callbacks 232 | 233 | - (void)xmlEndDocument { 234 | 235 | @autoreleasepool { 236 | if (self.delegateRespondsToEndOfDocumentMethod) { 237 | [self.delegate saxParserDidReachEndOfDocument:self]; 238 | } 239 | 240 | [self endStoringCharacters]; 241 | } 242 | } 243 | 244 | 245 | - (void)xmlCharactersFound:(const xmlChar *)ch length:(NSUInteger)length { 246 | 247 | if (length < 1) { 248 | return; 249 | } 250 | 251 | @autoreleasepool { 252 | if (self.storingCharacters) { 253 | [self.characters appendBytes:(const void *)ch length:length]; 254 | } 255 | 256 | if (self.delegateRespondsToCharactersFoundMethod) { 257 | [self.delegate saxParser:self XMLCharactersFound:ch length:length]; 258 | } 259 | } 260 | } 261 | 262 | 263 | - (void)xmlStartElement:(const xmlChar *)localName prefix:(const xmlChar *)prefix uri:(const xmlChar *)uri numberOfNamespaces:(int)numberOfNamespaces namespaces:(const xmlChar **)namespaces numberOfAttributes:(int)numberOfAttributes numberDefaulted:(int)numberDefaulted attributes:(const xmlChar **)attributes { 264 | 265 | @autoreleasepool { 266 | if (self.delegateRespondsToStartElementMethod) { 267 | 268 | [self.delegate saxParser:self XMLStartElement:localName prefix:prefix uri:uri numberOfNamespaces:numberOfNamespaces namespaces:namespaces numberOfAttributes:numberOfAttributes numberDefaulted:numberDefaulted attributes:attributes]; 269 | } 270 | } 271 | } 272 | 273 | 274 | - (void)xmlEndElement:(const xmlChar *)localName prefix:(const xmlChar *)prefix uri:(const xmlChar *)uri { 275 | 276 | @autoreleasepool { 277 | if (self.delegateRespondsToEndElementMethod) { 278 | [self.delegate saxParser:self XMLEndElement:localName prefix:prefix uri:uri]; 279 | } 280 | 281 | [self endStoringCharacters]; 282 | } 283 | } 284 | 285 | 286 | @end 287 | 288 | 289 | static void startElementSAX(void *context, const xmlChar *localname, const xmlChar *prefix, const xmlChar *URI, int nb_namespaces, const xmlChar **namespaces, int nb_attributes, int nb_defaulted, const xmlChar **attributes) { 290 | 291 | [(__bridge RSSAXParser *)context xmlStartElement:localname prefix:prefix uri:URI numberOfNamespaces:nb_namespaces namespaces:namespaces numberOfAttributes:nb_attributes numberDefaulted:nb_defaulted attributes:attributes]; 292 | } 293 | 294 | 295 | static void endElementSAX(void *context, const xmlChar *localname, const xmlChar *prefix, const xmlChar *URI) { 296 | [(__bridge RSSAXParser *)context xmlEndElement:localname prefix:prefix uri:URI]; 297 | } 298 | 299 | 300 | static void charactersFoundSAX(void *context, const xmlChar *ch, int len) { 301 | [(__bridge RSSAXParser *)context xmlCharactersFound:ch length:(NSUInteger)len]; 302 | } 303 | 304 | 305 | static void endDocumentSAX(void *context) { 306 | [(__bridge RSSAXParser *)context xmlEndDocument]; 307 | } 308 | 309 | 310 | static xmlSAXHandler saxHandlerStruct = { 311 | nil, /* internalSubset */ 312 | nil, /* isStandalone */ 313 | nil, /* hasInternalSubset */ 314 | nil, /* hasExternalSubset */ 315 | nil, /* resolveEntity */ 316 | nil, /* getEntity */ 317 | nil, /* entityDecl */ 318 | nil, /* notationDecl */ 319 | nil, /* attributeDecl */ 320 | nil, /* elementDecl */ 321 | nil, /* unparsedEntityDecl */ 322 | nil, /* setDocumentLocator */ 323 | nil, /* startDocument */ 324 | endDocumentSAX, /* endDocument */ 325 | nil, /* startElement*/ 326 | nil, /* endElement */ 327 | nil, /* reference */ 328 | charactersFoundSAX, /* characters */ 329 | nil, /* ignorableWhitespace */ 330 | nil, /* processingInstruction */ 331 | nil, /* comment */ 332 | nil, /* warning */ 333 | nil, /* error */ 334 | nil, /* fatalError //: unused error() get all the errors */ 335 | nil, /* getParameterEntity */ 336 | nil, /* cdataBlock */ 337 | nil, /* externalSubset */ 338 | XML_SAX2_MAGIC, 339 | nil, 340 | startElementSAX, /* startElementNs */ 341 | endElementSAX, /* endElementNs */ 342 | nil /* serror */ 343 | }; 344 | 345 | 346 | void RSSAXInitLibXMLParser(void) { 347 | 348 | static dispatch_once_t onceToken; 349 | dispatch_once(&onceToken, ^{ 350 | xmlInitParser(); 351 | }); 352 | } 353 | 354 | -------------------------------------------------------------------------------- /Sources/ObjC/include/RSParser.h: -------------------------------------------------------------------------------- 1 | // 2 | // RSParser.h 3 | // RSParser 4 | // 5 | // Created by Brent Simmons on 6/20/17. 6 | // Copyright © 2017 Ranchero Software, LLC. All rights reserved. 7 | // 8 | 9 | @import Foundation; 10 | 11 | 12 | #import "../ParserData.h" 13 | #import "../RSDateParser.h" 14 | 15 | // OPML 16 | 17 | #import "../RSOPMLParser.h" 18 | #import "../RSOPMLDocument.h" 19 | #import "../RSOPMLItem.h" 20 | #import "../RSOPMLAttributes.h" 21 | #import "../RSOPMLFeedSpecifier.h" 22 | #import "../RSOPMLError.h" 23 | 24 | // For writing your own XML parser. 25 | 26 | #import "../RSSAXParser.h" 27 | 28 | // You should use FeedParser (Swift) instead of these two specific parsers 29 | // and the objects they create. 30 | // But they’re available if you want them. 31 | 32 | #import "../RSRSSParser.h" 33 | #import "../RSAtomParser.h" 34 | #import "../RSParsedFeed.h" 35 | #import "../RSParsedArticle.h" 36 | #import "../RSParsedEnclosure.h" 37 | #import "../RSParsedAuthor.h" 38 | 39 | // HTML 40 | 41 | #import "../RSHTMLMetadataParser.h" 42 | #import "../RSHTMLMetadata.h" 43 | #import "../RSHTMLLinkParser.h" 44 | #import "../RSSAXHTMLParser.h" // For writing your own HTML parser. 45 | #import "../RSHTMLTag.h" 46 | 47 | // Utilities 48 | 49 | #import "../NSData+RSParser.h" 50 | #import "../NSString+RSParser.h" 51 | 52 | 53 | 54 | 55 | 56 | 57 | -------------------------------------------------------------------------------- /Sources/Swift/Exports.swift: -------------------------------------------------------------------------------- 1 | // 2 | // Exports.swift 3 | // 4 | // 5 | // Created by Stuart Breckenridge on 29/7/20. 6 | // 7 | 8 | import Foundation 9 | @_exported import RSParserObjC 10 | -------------------------------------------------------------------------------- /Sources/Swift/Feeds/FeedParser.swift: -------------------------------------------------------------------------------- 1 | // 2 | // FeedParser.swift 3 | // RSParser 4 | // 5 | // Created by Brent Simmons on 6/20/17. 6 | // Copyright © 2017 Ranchero Software, LLC. All rights reserved. 7 | // 8 | 9 | import Foundation 10 | import RSParserObjC 11 | 12 | // FeedParser handles RSS, Atom, JSON Feed, and RSS-in-JSON. 13 | // You don’t need to know the type of feed. 14 | 15 | public typealias FeedParserCallback = (_ parsedFeed: ParsedFeed?, _ error: Error?) -> Void 16 | 17 | public struct FeedParser { 18 | 19 | private static let parseQueue = DispatchQueue(label: "FeedParser parse queue") 20 | 21 | public static func canParse(_ parserData: ParserData) -> Bool { 22 | 23 | let type = feedType(parserData) 24 | 25 | switch type { 26 | case .jsonFeed, .rssInJSON, .rss, .atom: 27 | return true 28 | default: 29 | return false 30 | } 31 | } 32 | 33 | public static func mightBeAbleToParseBasedOnPartialData(_ parserData: ParserData) -> Bool { 34 | 35 | let type = feedType(parserData, isPartialData: true) 36 | 37 | switch type { 38 | case .jsonFeed, .rssInJSON, .rss, .atom, .unknown: 39 | return true 40 | default: 41 | return false 42 | } 43 | } 44 | 45 | public static func parse(_ parserData: ParserData) throws -> ParsedFeed? { 46 | 47 | // This is generally fast enough to call on the main thread — 48 | // but it’s probably a good idea to use a background queue if 49 | // you might be doing a lot of parsing. (Such as in a feed reader.) 50 | 51 | do { 52 | let type = feedType(parserData) 53 | 54 | switch type { 55 | 56 | case .jsonFeed: 57 | return try JSONFeedParser.parse(parserData) 58 | 59 | case .rssInJSON: 60 | return try RSSInJSONParser.parse(parserData) 61 | 62 | case .rss: 63 | return RSSParser.parse(parserData) 64 | 65 | case .atom: 66 | return AtomParser.parse(parserData) 67 | 68 | case .unknown, .notAFeed: 69 | return nil 70 | } 71 | } 72 | catch { throw error } 73 | } 74 | 75 | public static func parse(_ parserData: ParserData, _ completion: @escaping FeedParserCallback) { 76 | 77 | parseQueue.async { 78 | do { 79 | let parsedFeed = try parse(parserData) 80 | DispatchQueue.main.async { 81 | completion(parsedFeed, nil) 82 | } 83 | } 84 | catch { 85 | DispatchQueue.main.async { 86 | completion(nil, error) 87 | } 88 | } 89 | } 90 | } 91 | } 92 | -------------------------------------------------------------------------------- /Sources/Swift/Feeds/FeedParserError.swift: -------------------------------------------------------------------------------- 1 | // 2 | // FeedParserError.swift 3 | // RSParser 4 | // 5 | // Created by Brent Simmons on 6/24/17. 6 | // Copyright © 2017 Ranchero Software, LLC. All rights reserved. 7 | // 8 | 9 | import Foundation 10 | 11 | public struct FeedParserError: Error { 12 | 13 | public enum FeedParserErrorType { 14 | 15 | case rssChannelNotFound 16 | case rssItemsNotFound 17 | case jsonFeedVersionNotFound 18 | case jsonFeedItemsNotFound 19 | case jsonFeedTitleNotFound 20 | case invalidJSON 21 | } 22 | 23 | public let errorType: FeedParserErrorType 24 | 25 | public init(_ errorType: FeedParserErrorType) { 26 | 27 | self.errorType = errorType 28 | } 29 | } 30 | -------------------------------------------------------------------------------- /Sources/Swift/Feeds/FeedType.swift: -------------------------------------------------------------------------------- 1 | // 2 | // FeedType.swift 3 | // RSParser 4 | // 5 | // Created by Brent Simmons on 6/20/17. 6 | // Copyright © 2017 Ranchero Software, LLC. All rights reserved. 7 | // 8 | 9 | import Foundation 10 | #if SWIFT_PACKAGE 11 | import RSParserObjC 12 | #endif 13 | 14 | public enum FeedType { 15 | case rss 16 | case atom 17 | case jsonFeed 18 | case rssInJSON 19 | case unknown 20 | case notAFeed 21 | } 22 | 23 | 24 | private let minNumberOfBytesRequired = 128 25 | 26 | public func feedType(_ parserData: ParserData, isPartialData: Bool = false) -> FeedType { 27 | 28 | // Can call with partial data — while still downloading, for instance. 29 | // If there’s not enough data, return .unknown. Ask again when there’s more data. 30 | // If it’s definitely not a feed, return .notAFeed. 31 | // 32 | // This is fast enough to call on the main thread. 33 | 34 | if parserData.data.count < minNumberOfBytesRequired { 35 | return .unknown 36 | } 37 | 38 | let nsdata = parserData.data as NSData 39 | 40 | if nsdata.isProbablyJSONFeed() { 41 | return .jsonFeed 42 | } 43 | if nsdata.isProbablyRSSInJSON() { 44 | return .rssInJSON 45 | } 46 | if nsdata.isProbablyRSS() { 47 | return .rss 48 | } 49 | if nsdata.isProbablyAtom() { 50 | return .atom 51 | } 52 | 53 | if isPartialData && nsdata.isProbablyJSON() { 54 | // Might not be able to detect a JSON Feed without all data. 55 | // Dr. Drang’s JSON Feed (see althis.json and allthis-partial.json in tests) 56 | // has, at this writing, the JSON version element at the end of the feed, 57 | // which is totally legal — but it means not being able to detect 58 | // that it’s a JSON Feed without all the data. 59 | // So this returns .unknown instead of .notAFeed. 60 | return .unknown 61 | } 62 | 63 | return .notAFeed 64 | } 65 | -------------------------------------------------------------------------------- /Sources/Swift/Feeds/JSON/JSONFeedParser.swift: -------------------------------------------------------------------------------- 1 | // 2 | // JSONFeedParser.swift 3 | // RSParser 4 | // 5 | // Created by Brent Simmons on 6/25/17. 6 | // Copyright © 2017 Ranchero Software, LLC. All rights reserved. 7 | // 8 | 9 | import Foundation 10 | #if SWIFT_PACKAGE 11 | import RSParserObjC 12 | #endif 13 | 14 | // See https://jsonfeed.org/version/1.1 15 | 16 | public struct JSONFeedParser { 17 | 18 | struct Key { 19 | static let version = "version" 20 | static let items = "items" 21 | static let title = "title" 22 | static let homePageURL = "home_page_url" 23 | static let feedURL = "feed_url" 24 | static let feedDescription = "description" 25 | static let nextURL = "next_url" 26 | static let icon = "icon" 27 | static let favicon = "favicon" 28 | static let expired = "expired" 29 | static let author = "author" 30 | static let authors = "authors" 31 | static let name = "name" 32 | static let url = "url" 33 | static let avatar = "avatar" 34 | static let hubs = "hubs" 35 | static let type = "type" 36 | static let contentHTML = "content_html" 37 | static let contentText = "content_text" 38 | static let externalURL = "external_url" 39 | static let summary = "summary" 40 | static let image = "image" 41 | static let bannerImage = "banner_image" 42 | static let datePublished = "date_published" 43 | static let dateModified = "date_modified" 44 | static let tags = "tags" 45 | static let uniqueID = "id" 46 | static let attachments = "attachments" 47 | static let mimeType = "mime_type" 48 | static let sizeInBytes = "size_in_bytes" 49 | static let durationInSeconds = "duration_in_seconds" 50 | static let language = "language" 51 | } 52 | 53 | static let jsonFeedVersionMarker = "://jsonfeed.org/version/" // Allow for the mistake of not getting the scheme exactly correct. 54 | 55 | public static func parse(_ parserData: ParserData) throws -> ParsedFeed? { 56 | 57 | guard let d = JSONUtilities.dictionary(with: parserData.data) else { 58 | throw FeedParserError(.invalidJSON) 59 | } 60 | 61 | guard let version = d[Key.version] as? String, let _ = version.range(of: JSONFeedParser.jsonFeedVersionMarker) else { 62 | throw FeedParserError(.jsonFeedVersionNotFound) 63 | } 64 | guard let itemsArray = d[Key.items] as? JSONArray else { 65 | throw FeedParserError(.jsonFeedItemsNotFound) 66 | } 67 | guard let title = d[Key.title] as? String else { 68 | throw FeedParserError(.jsonFeedTitleNotFound) 69 | } 70 | 71 | let authors = parseAuthors(d) 72 | let homePageURL = d[Key.homePageURL] as? String 73 | let feedURL = d[Key.feedURL] as? String ?? parserData.url 74 | let feedDescription = d[Key.feedDescription] as? String 75 | let nextURL = d[Key.nextURL] as? String 76 | let iconURL = d[Key.icon] as? String 77 | let faviconURL = d[Key.favicon] as? String 78 | let expired = d[Key.expired] as? Bool ?? false 79 | let hubs = parseHubs(d) 80 | let language = d[Key.language] as? String 81 | 82 | let items = parseItems(itemsArray, parserData.url) 83 | let extensions = parseExtensions(d) 84 | 85 | return ParsedFeed(type: .jsonFeed, title: title, homePageURL: homePageURL, feedURL: feedURL, language: language, feedDescription: feedDescription, nextURL: nextURL, iconURL: iconURL, faviconURL: faviconURL, authors: authors, expired: expired, hubs: hubs, items: items, extensions: extensions) 86 | } 87 | } 88 | 89 | private extension JSONFeedParser { 90 | 91 | static func parseAuthors(_ dictionary: JSONDictionary) -> Set? { 92 | 93 | if let authorsArray = dictionary[Key.authors] as? JSONArray { 94 | var authors = Set() 95 | for author in authorsArray { 96 | if let parsedAuthor = parseAuthor(author) { 97 | authors.insert(parsedAuthor) 98 | } 99 | } 100 | return authors 101 | } 102 | 103 | guard let authorDictionary = dictionary[Key.author] as? JSONDictionary, 104 | let parsedAuthor = parseAuthor(authorDictionary) else { 105 | return nil 106 | } 107 | 108 | return Set([parsedAuthor]) 109 | } 110 | 111 | static func parseAuthor(_ dictionary: JSONDictionary) -> ParsedAuthor? { 112 | let name = dictionary[Key.name] as? String 113 | let url = dictionary[Key.url] as? String 114 | let avatar = dictionary[Key.avatar] as? String 115 | if name == nil && url == nil && avatar == nil { 116 | return nil 117 | } 118 | return ParsedAuthor(name: name, url: url, avatarURL: avatar, emailAddress: nil) 119 | } 120 | 121 | static func parseHubs(_ dictionary: JSONDictionary) -> Set? { 122 | 123 | guard let hubsArray = dictionary[Key.hubs] as? JSONArray else { 124 | return nil 125 | } 126 | 127 | let hubs = hubsArray.compactMap { (hubDictionary) -> ParsedHub? in 128 | guard let hubURL = hubDictionary[Key.url] as? String, let hubType = hubDictionary[Key.type] as? String else { 129 | return nil 130 | } 131 | return ParsedHub(type: hubType, url: hubURL) 132 | } 133 | return hubs.isEmpty ? nil : Set(hubs) 134 | } 135 | 136 | static func parseItems(_ itemsArray: JSONArray, _ feedURL: String) -> Set { 137 | 138 | return Set(itemsArray.compactMap { (oneItemDictionary) -> ParsedItem? in 139 | return parseItem(oneItemDictionary, feedURL) 140 | }) 141 | } 142 | 143 | static func parseItem(_ itemDictionary: JSONDictionary, _ feedURL: String) -> ParsedItem? { 144 | 145 | guard let uniqueID = parseUniqueID(itemDictionary) else { 146 | return nil 147 | } 148 | 149 | let contentHTML = itemDictionary[Key.contentHTML] as? String 150 | let contentText = itemDictionary[Key.contentText] as? String 151 | if contentHTML == nil && contentText == nil { 152 | return nil 153 | } 154 | 155 | let url = itemDictionary[Key.url] as? String 156 | let externalURL = itemDictionary[Key.externalURL] as? String 157 | let title = parseTitle(itemDictionary, feedURL) 158 | let language = itemDictionary[Key.language] as? String 159 | let summary = itemDictionary[Key.summary] as? String 160 | let imageURL = itemDictionary[Key.image] as? String 161 | let bannerImageURL = itemDictionary[Key.bannerImage] as? String 162 | 163 | let datePublished = parseDate(itemDictionary[Key.datePublished] as? String) 164 | let dateModified = parseDate(itemDictionary[Key.dateModified] as? String) 165 | 166 | let authors = parseAuthors(itemDictionary) 167 | var tags: Set? = nil 168 | if let tagsArray = itemDictionary[Key.tags] as? [String] { 169 | tags = Set(tagsArray) 170 | } 171 | let attachments = parseAttachments(itemDictionary) 172 | let extensions = parseExtensions(itemDictionary) 173 | 174 | return ParsedItem(syncServiceID: nil, uniqueID: uniqueID, feedURL: feedURL, url: url, externalURL: externalURL, title: title, language: language, contentHTML: contentHTML, contentText: contentText, summary: summary, imageURL: imageURL, bannerImageURL: bannerImageURL, datePublished: datePublished, dateModified: dateModified, authors: authors, tags: tags, attachments: attachments, extensions: extensions) 175 | } 176 | 177 | static func parseTitle(_ itemDictionary: JSONDictionary, _ feedURL: String) -> String? { 178 | 179 | guard let title = itemDictionary[Key.title] as? String else { 180 | return nil 181 | } 182 | 183 | if isSpecialCaseTitleWithEntitiesFeed(feedURL) { 184 | return (title as NSString).rsparser_stringByDecodingHTMLEntities() 185 | } 186 | 187 | return title 188 | } 189 | 190 | static func isSpecialCaseTitleWithEntitiesFeed(_ feedURL: String) -> Bool { 191 | 192 | // As of 16 Feb. 2018, Kottke’s and Heer’s feeds includes HTML entities in the title elements. 193 | // If we find more feeds like this, we’ll add them here. If these feeds get fixed, we’ll remove them. 194 | 195 | let lowerFeedURL = feedURL.lowercased() 196 | let matchStrings = ["kottke.org", "pxlnv.com", "macstories.net", "macobserver.com"] 197 | for matchString in matchStrings { 198 | if lowerFeedURL.contains(matchString) { 199 | return true 200 | } 201 | } 202 | 203 | return false 204 | } 205 | 206 | static func parseUniqueID(_ itemDictionary: JSONDictionary) -> String? { 207 | 208 | if let uniqueID = itemDictionary[Key.uniqueID] as? String { 209 | return uniqueID // Spec says it must be a string 210 | } 211 | // Version 1 spec also says that if it’s a number, even though that’s incorrect, it should be coerced to a string. 212 | if let uniqueID = itemDictionary[Key.uniqueID] as? Int { 213 | return "\(uniqueID)" 214 | } 215 | if let uniqueID = itemDictionary[Key.uniqueID] as? Double { 216 | return "\(uniqueID)" 217 | } 218 | return nil 219 | } 220 | 221 | static func parseDate(_ dateString: String?) -> Date? { 222 | 223 | guard let dateString = dateString, !dateString.isEmpty else { 224 | return nil 225 | } 226 | return RSDateWithString(dateString) 227 | } 228 | 229 | static func parseAttachments(_ itemDictionary: JSONDictionary) -> Set? { 230 | 231 | guard let attachmentsArray = itemDictionary[Key.attachments] as? JSONArray else { 232 | return nil 233 | } 234 | return Set(attachmentsArray.compactMap { parseAttachment($0) }) 235 | } 236 | 237 | static func parseAttachment(_ attachmentObject: JSONDictionary) -> ParsedAttachment? { 238 | 239 | guard let url = attachmentObject[Key.url] as? String else { 240 | return nil 241 | } 242 | guard let mimeType = attachmentObject[Key.mimeType] as? String else { 243 | return nil 244 | } 245 | 246 | let title = attachmentObject[Key.title] as? String 247 | let sizeInBytes = attachmentObject[Key.sizeInBytes] as? Int 248 | let durationInSeconds = attachmentObject[Key.durationInSeconds] as? Int 249 | 250 | return ParsedAttachment(url: url, mimeType: mimeType, title: title, sizeInBytes: sizeInBytes, durationInSeconds: durationInSeconds) 251 | } 252 | 253 | static func parseExtensions(_ dictionary: JSONDictionary) -> Set? { 254 | 255 | let extensions = dictionary 256 | .filter { $0.key.hasPrefix("_") && $0.value is JSONDictionary } 257 | .map { parseExtensionWithName($0.key, dictionary: $0.value as! JSONDictionary) } 258 | 259 | if !extensions.isEmpty { return Set(extensions) } 260 | 261 | return nil 262 | } 263 | 264 | static func parseExtensionWithName(_ name: String, dictionary: JSONDictionary) -> ParsedExtension { 265 | 266 | let onlyHashables = dictionary 267 | .compactMapValues { $0 as? AnyHashable } 268 | 269 | return ParsedExtension(name: name, content: onlyHashables) 270 | } 271 | } 272 | -------------------------------------------------------------------------------- /Sources/Swift/Feeds/JSON/RSSInJSONParser.swift: -------------------------------------------------------------------------------- 1 | // 2 | // RSSInJSONParser.swift 3 | // RSParser 4 | // 5 | // Created by Brent Simmons on 6/24/17. 6 | // Copyright © 2017 Ranchero Software, LLC. All rights reserved. 7 | // 8 | 9 | import Foundation 10 | #if SWIFT_PACKAGE 11 | import RSParserObjC 12 | #endif 13 | 14 | // See https://github.com/scripting/Scripting-News/blob/master/rss-in-json/README.md 15 | // Also: http://cyber.harvard.edu/rss/rss.html 16 | 17 | public struct RSSInJSONParser { 18 | 19 | public static func parse(_ parserData: ParserData) throws -> ParsedFeed? { 20 | 21 | do { 22 | guard let parsedObject = try JSONSerialization.jsonObject(with: parserData.data) as? JSONDictionary else { 23 | throw FeedParserError(.invalidJSON) 24 | } 25 | guard let rssObject = parsedObject["rss"] as? JSONDictionary else { 26 | throw FeedParserError(.rssChannelNotFound) 27 | } 28 | guard let channelObject = rssObject["channel"] as? JSONDictionary else { 29 | throw FeedParserError(.rssChannelNotFound) 30 | } 31 | 32 | // I’d bet money that in practice the items array won’t always appear correctly inside the channel object. 33 | // I’d also bet that sometimes it gets called "items" instead of "item". 34 | var itemsObject = channelObject["item"] as? JSONArray 35 | if itemsObject == nil { 36 | itemsObject = parsedObject["item"] as? JSONArray 37 | } 38 | if itemsObject == nil { 39 | itemsObject = channelObject["items"] as? JSONArray 40 | } 41 | if itemsObject == nil { 42 | itemsObject = parsedObject["items"] as? JSONArray 43 | } 44 | if itemsObject == nil { 45 | throw FeedParserError(.rssItemsNotFound) 46 | } 47 | 48 | let title = channelObject["title"] as? String 49 | let homePageURL = channelObject["link"] as? String 50 | let feedURL = parserData.url 51 | let feedDescription = channelObject["description"] as? String 52 | let feedLanguage = channelObject["language"] as? String 53 | 54 | let items = parseItems(itemsObject!, parserData.url) 55 | 56 | return ParsedFeed(type: .rssInJSON, title: title, homePageURL: homePageURL, feedURL: feedURL, language: feedLanguage, feedDescription: feedDescription, nextURL: nil, iconURL: nil, faviconURL: nil, authors: nil, expired: false, hubs: nil, items: items, extensions: nil) 57 | 58 | } 59 | catch { throw error } 60 | } 61 | } 62 | 63 | private extension RSSInJSONParser { 64 | 65 | static func parseItems(_ itemsObject: JSONArray, _ feedURL: String) -> Set { 66 | 67 | return Set(itemsObject.compactMap{ (oneItemDictionary) -> ParsedItem? in 68 | 69 | return parsedItemWithDictionary(oneItemDictionary, feedURL) 70 | }) 71 | } 72 | 73 | static func parsedItemWithDictionary(_ itemDictionary: JSONDictionary, _ feedURL: String) -> ParsedItem? { 74 | 75 | let externalURL = itemDictionary["link"] as? String 76 | let title = itemDictionary["title"] as? String 77 | 78 | var contentHTML = itemDictionary["description"] as? String 79 | var contentText: String? = nil 80 | if contentHTML != nil && !(contentHTML!.contains("<")) { 81 | contentText = contentHTML 82 | contentHTML = nil 83 | } 84 | if contentHTML == nil && contentText == nil && title == nil { 85 | return nil 86 | } 87 | 88 | var datePublished: Date? = nil 89 | if let datePublishedString = itemDictionary["pubDate"] as? String { 90 | datePublished = RSDateWithString(datePublishedString) 91 | } 92 | 93 | let authors = parseAuthors(itemDictionary) 94 | let tags = parseTags(itemDictionary) 95 | let attachments = parseAttachments(itemDictionary) 96 | 97 | var uniqueID: String? = itemDictionary["guid"] as? String 98 | if uniqueID == nil { 99 | 100 | // Calculate a uniqueID based on a combination of non-empty elements. Then hash the result. 101 | // Items should have guids. When they don't, re-runs are very likely 102 | // because there's no other 100% reliable way to determine identity. 103 | // This calculated uniqueID is valid only for this particular feed. (Just like ids in JSON Feed.) 104 | 105 | var s = "" 106 | if let datePublished = datePublished { 107 | s += "\(datePublished.timeIntervalSince1970)" 108 | } 109 | if let title = title { 110 | s += title 111 | } 112 | if let externalURL = externalURL { 113 | s += externalURL 114 | } 115 | if let authorEmailAddress = authors?.first?.emailAddress { 116 | s += authorEmailAddress 117 | } 118 | if let oneAttachmentURL = attachments?.first?.url { 119 | s += oneAttachmentURL 120 | } 121 | if s.isEmpty { 122 | // Sheesh. Tough case. 123 | if let _ = contentHTML { 124 | s = contentHTML! 125 | } 126 | if let _ = contentText { 127 | s = contentText! 128 | } 129 | } 130 | uniqueID = (s as NSString).rsparser_md5Hash() 131 | } 132 | 133 | if let uniqueID = uniqueID { 134 | return ParsedItem(syncServiceID: nil, uniqueID: uniqueID, feedURL: feedURL, url: nil, externalURL: externalURL, title: title, language: nil, contentHTML: contentHTML, contentText: contentText, summary: nil, imageURL: nil, bannerImageURL: nil, datePublished: datePublished, dateModified: nil, authors: authors, tags: tags, attachments: attachments, extensions: nil) 135 | } 136 | return nil 137 | } 138 | 139 | static func parseAuthors(_ itemDictionary: JSONDictionary) -> Set? { 140 | 141 | guard let authorEmailAddress = itemDictionary["author"] as? String else { 142 | return nil 143 | } 144 | let parsedAuthor = ParsedAuthor(name: nil, url: nil, avatarURL: nil, emailAddress: authorEmailAddress) 145 | return Set([parsedAuthor]) 146 | } 147 | 148 | static func parseTags(_ itemDictionary: JSONDictionary) -> Set? { 149 | 150 | if let categoryObject = itemDictionary["category"] as? JSONDictionary { 151 | if let oneTag = categoryObject["#value"] as? String { 152 | return Set([oneTag]) 153 | } 154 | return nil 155 | } 156 | else if let categoryArray = itemDictionary["category"] as? JSONArray { 157 | return Set(categoryArray.compactMap{ $0["#value"] as? String }) 158 | } 159 | return nil 160 | } 161 | 162 | static func parseAttachments(_ itemDictionary: JSONDictionary) -> Set? { 163 | 164 | guard let enclosureObject = itemDictionary["enclosure"] as? JSONDictionary else { 165 | return nil 166 | } 167 | guard let attachmentURL = enclosureObject["url"] as? String else { 168 | return nil 169 | } 170 | 171 | var attachmentSize = enclosureObject["length"] as? Int 172 | if attachmentSize == nil { 173 | if let attachmentSizeString = enclosureObject["length"] as? String { 174 | attachmentSize = (attachmentSizeString as NSString).integerValue 175 | } 176 | } 177 | 178 | let type = enclosureObject["type"] as? String 179 | if let attachment = ParsedAttachment(url: attachmentURL, mimeType: type, title: nil, sizeInBytes: attachmentSize, durationInSeconds: nil) { 180 | return Set([attachment]) 181 | } 182 | return nil 183 | } 184 | } 185 | -------------------------------------------------------------------------------- /Sources/Swift/Feeds/ParsedAttachment.swift: -------------------------------------------------------------------------------- 1 | // 2 | // ParsedAttachment.swift 3 | // RSParser 4 | // 5 | // Created by Brent Simmons on 6/20/17. 6 | // Copyright © 2017 Ranchero Software, LLC. All rights reserved. 7 | // 8 | 9 | import Foundation 10 | 11 | public struct ParsedAttachment: Hashable, Codable { 12 | 13 | public let url: String 14 | public let mimeType: String? 15 | public let title: String? 16 | public let sizeInBytes: Int? 17 | public let durationInSeconds: Int? 18 | 19 | public init?(url: String, mimeType: String?, title: String?, sizeInBytes: Int?, durationInSeconds: Int?) { 20 | if url.isEmpty { 21 | return nil 22 | } 23 | 24 | self.url = url 25 | self.mimeType = mimeType 26 | self.title = title 27 | self.sizeInBytes = sizeInBytes 28 | self.durationInSeconds = durationInSeconds 29 | } 30 | 31 | // MARK: - Hashable 32 | 33 | public func hash(into hasher: inout Hasher) { 34 | hasher.combine(url) 35 | } 36 | } 37 | -------------------------------------------------------------------------------- /Sources/Swift/Feeds/ParsedAuthor.swift: -------------------------------------------------------------------------------- 1 | // 2 | // ParsedAuthor.swift 3 | // RSParser 4 | // 5 | // Created by Brent Simmons on 6/20/17. 6 | // Copyright © 2017 Ranchero Software, LLC. All rights reserved. 7 | // 8 | 9 | import Foundation 10 | 11 | public struct ParsedAuthor: Hashable, Codable { 12 | 13 | public let name: String? 14 | public let url: String? 15 | public let avatarURL: String? 16 | public let emailAddress: String? 17 | 18 | public init(name: String?, url: String?, avatarURL: String?, emailAddress: String?) { 19 | self.name = name 20 | self.url = url 21 | self.avatarURL = avatarURL 22 | self.emailAddress = emailAddress 23 | } 24 | 25 | // MARK: - Hashable 26 | 27 | public func hash(into hasher: inout Hasher) { 28 | if let name = name { 29 | hasher.combine(name) 30 | } 31 | else if let url = url { 32 | hasher.combine(url) 33 | } 34 | else if let emailAddress = emailAddress { 35 | hasher.combine(emailAddress) 36 | } 37 | else if let avatarURL = avatarURL { 38 | hasher.combine(avatarURL) 39 | } 40 | else { 41 | hasher.combine("") 42 | } 43 | } 44 | } 45 | -------------------------------------------------------------------------------- /Sources/Swift/Feeds/ParsedExtension.swift: -------------------------------------------------------------------------------- 1 | // 2 | // ParsedExtension.swift 3 | // 4 | // 5 | // Created by Jaanus Kase on 07.03.2021. 6 | // 7 | 8 | import Foundation 9 | 10 | /// JSON Feed extension. 11 | public struct ParsedExtension: Hashable { 12 | 13 | /// Name of extension, beginning with “_” according to JSON Feed spec. RSParser doesn’t transform the name in any way. 14 | public let name: String 15 | 16 | /* 17 | JSON Feed spec isn’t exhaustive about what types of values an extension can contain. 18 | We keep this ambiguity in implementation, only requiring values to be hashable, 19 | so that these values can be stored in sets. 20 | 21 | Public access to content values happens through subscript. 22 | */ 23 | private let content: Dictionary 24 | 25 | public init(name: String, content: Dictionary) { 26 | self.name = name 27 | self.content = content 28 | } 29 | 30 | /// Access content items directly with subscript. 31 | public subscript(s: String) -> AnyHashable? { 32 | return self.content[s] 33 | } 34 | 35 | } 36 | 37 | extension Set where Element == ParsedExtension { 38 | 39 | /// Access individual extensions by name. 40 | public subscript(s: String) -> ParsedExtension? { 41 | return first { $0.name == s } 42 | } 43 | } 44 | -------------------------------------------------------------------------------- /Sources/Swift/Feeds/ParsedFeed.swift: -------------------------------------------------------------------------------- 1 | // 2 | // ParsedFeed.swift 3 | // RSParser 4 | // 5 | // Created by Brent Simmons on 6/20/17. 6 | // Copyright © 2017 Ranchero Software, LLC. All rights reserved. 7 | // 8 | 9 | import Foundation 10 | 11 | public struct ParsedFeed { 12 | 13 | public let type: FeedType 14 | public let title: String? 15 | public let homePageURL: String? 16 | public let feedURL: String? 17 | public let language: String? 18 | public let feedDescription: String? 19 | public let nextURL: String? 20 | public let iconURL: String? 21 | public let faviconURL: String? 22 | public let authors: Set? 23 | public let expired: Bool 24 | public let hubs: Set? 25 | public let items: Set 26 | public let extensions: Set? 27 | 28 | public init(type: FeedType, title: String?, homePageURL: String?, feedURL: String?, language: String?, feedDescription: String?, nextURL: String?, iconURL: String?, faviconURL: String?, authors: Set?, expired: Bool, hubs: Set?, items: Set, extensions: Set?) { 29 | self.type = type 30 | self.title = title 31 | self.homePageURL = homePageURL?.nilIfEmptyOrWhitespace 32 | self.feedURL = feedURL 33 | self.language = language 34 | self.feedDescription = feedDescription 35 | self.nextURL = nextURL 36 | self.iconURL = iconURL 37 | self.faviconURL = faviconURL 38 | self.authors = authors 39 | self.expired = expired 40 | self.hubs = hubs 41 | self.items = items 42 | self.extensions = extensions 43 | } 44 | } 45 | -------------------------------------------------------------------------------- /Sources/Swift/Feeds/ParsedHub.swift: -------------------------------------------------------------------------------- 1 | // 2 | // ParsedHub.swift 3 | // RSParser 4 | // 5 | // Created by Brent Simmons on 6/20/17. 6 | // Copyright © 2017 Ranchero Software, LLC. All rights reserved. 7 | // 8 | 9 | import Foundation 10 | 11 | public struct ParsedHub: Hashable { 12 | 13 | public let type: String 14 | public let url: String 15 | } 16 | -------------------------------------------------------------------------------- /Sources/Swift/Feeds/ParsedItem.swift: -------------------------------------------------------------------------------- 1 | // 2 | // ParsedItem.swift 3 | // RSParser 4 | // 5 | // Created by Brent Simmons on 6/20/17. 6 | // Copyright © 2017 Ranchero Software, LLC. All rights reserved. 7 | // 8 | 9 | import Foundation 10 | 11 | public struct ParsedItem: Hashable { 12 | 13 | public let syncServiceID: String? //Nil when not syncing 14 | public let uniqueID: String //RSS guid, for instance; may be calculated 15 | public let feedURL: String 16 | public let url: String? 17 | public let externalURL: String? 18 | public let title: String? 19 | public let language: String? 20 | public let contentHTML: String? 21 | public let contentText: String? 22 | public let summary: String? 23 | public let imageURL: String? 24 | public let bannerImageURL: String? 25 | public let datePublished: Date? 26 | public let dateModified: Date? 27 | public let authors: Set? 28 | public let tags: Set? 29 | public let attachments: Set? 30 | public let extensions: Set? 31 | 32 | public init(syncServiceID: String?, uniqueID: String, feedURL: String, url: String?, externalURL: String?, title: String?, 33 | language: String?, contentHTML: String?, contentText: String?, summary: String?, imageURL: String?, 34 | bannerImageURL: String?,datePublished: Date?, dateModified: Date?, authors: Set?, 35 | tags: Set?, attachments: Set?, extensions: Set?) { 36 | 37 | self.syncServiceID = syncServiceID 38 | self.uniqueID = uniqueID 39 | self.feedURL = feedURL 40 | self.url = url 41 | self.externalURL = externalURL 42 | self.title = title 43 | self.language = language 44 | self.contentHTML = contentHTML 45 | self.contentText = contentText 46 | self.summary = summary 47 | self.imageURL = imageURL 48 | self.bannerImageURL = bannerImageURL 49 | self.datePublished = datePublished 50 | self.dateModified = dateModified 51 | self.authors = authors 52 | self.tags = tags 53 | self.attachments = attachments 54 | self.extensions = extensions 55 | } 56 | 57 | // MARK: - Hashable 58 | 59 | public func hash(into hasher: inout Hasher) { 60 | if let syncServiceID = syncServiceID { 61 | hasher.combine(syncServiceID) 62 | } 63 | else { 64 | hasher.combine(uniqueID) 65 | hasher.combine(feedURL) 66 | } 67 | } 68 | } 69 | 70 | -------------------------------------------------------------------------------- /Sources/Swift/Feeds/XML/AtomParser.swift: -------------------------------------------------------------------------------- 1 | // 2 | // AtomParser.swift 3 | // RSParser 4 | // 5 | // Created by Brent Simmons on 6/25/17. 6 | // Copyright © 2017 Ranchero Software, LLC. All rights reserved. 7 | // 8 | 9 | import Foundation 10 | 11 | #if SWIFT_PACKAGE 12 | import RSParserObjC 13 | #endif 14 | 15 | // RSSParser wraps the Objective-C RSAtomParser. 16 | // 17 | // The Objective-C parser creates RSParsedFeed, RSParsedArticle, etc. 18 | // This wrapper then creates ParsedFeed, ParsedItem, etc. so that it creates 19 | // the same things that JSONFeedParser and RSSInJSONParser create. 20 | // 21 | // In general, you should see FeedParser.swift for all your feed-parsing needs. 22 | 23 | public struct AtomParser { 24 | 25 | public static func parse(_ parserData: ParserData) -> ParsedFeed? { 26 | 27 | let rsParsedFeed = RSAtomParser.parseFeed(with: parserData) 28 | return RSParsedFeedTransformer.parsedFeed(rsParsedFeed) 29 | } 30 | } 31 | -------------------------------------------------------------------------------- /Sources/Swift/Feeds/XML/RSParsedFeedTransformer.swift: -------------------------------------------------------------------------------- 1 | // 2 | // RSParsedFeedTransformer.swift 3 | // RSParser 4 | // 5 | // Created by Brent Simmons on 6/25/17. 6 | // Copyright © 2017 Ranchero Software, LLC. All rights reserved. 7 | // 8 | 9 | import Foundation 10 | #if SWIFT_PACKAGE 11 | import RSParserObjC 12 | #endif 13 | 14 | // RSRSSParser and RSAtomParser were written in Objective-C quite a while ago. 15 | // They create an RSParsedFeed object and related Objective-C objects. 16 | // These functions take an RSParsedFeed and return a Swift-y ParsedFeed, 17 | // which is part of providing a single API for feed parsing. 18 | 19 | struct RSParsedFeedTransformer { 20 | 21 | static func parsedFeed(_ rsParsedFeed: RSParsedFeed) -> ParsedFeed { 22 | 23 | let items = parsedItems(rsParsedFeed.articles) 24 | return ParsedFeed(type: .rss, title: rsParsedFeed.title, homePageURL: rsParsedFeed.link, feedURL: rsParsedFeed.urlString, language: rsParsedFeed.language, feedDescription: nil, nextURL: nil, iconURL: nil, faviconURL: nil, authors: nil, expired: false, hubs: nil, items: items, extensions: nil) 25 | } 26 | } 27 | 28 | private extension RSParsedFeedTransformer { 29 | 30 | static func parsedItems(_ parsedArticles: Set) -> Set { 31 | 32 | // Create Set from Set 33 | 34 | return Set(parsedArticles.map(parsedItem)) 35 | } 36 | 37 | static func parsedItem(_ parsedArticle: RSParsedArticle) -> ParsedItem { 38 | 39 | let uniqueID = parsedArticle.articleID 40 | let url = parsedArticle.permalink 41 | let externalURL = parsedArticle.link 42 | let title = parsedArticle.title 43 | let language = parsedArticle.language 44 | let contentHTML = parsedArticle.body 45 | let datePublished = parsedArticle.datePublished 46 | let dateModified = parsedArticle.dateModified 47 | let authors = parsedAuthors(parsedArticle.authors) 48 | let attachments = parsedAttachments(parsedArticle.enclosures) 49 | 50 | return ParsedItem(syncServiceID: nil, uniqueID: uniqueID, feedURL: parsedArticle.feedURL, url: url, externalURL: externalURL, title: title, language: language, contentHTML: contentHTML, contentText: nil, summary: nil, imageURL: nil, bannerImageURL: nil, datePublished: datePublished, dateModified: dateModified, authors: authors, tags: nil, attachments: attachments, extensions: nil) 51 | } 52 | 53 | static func parsedAuthors(_ authors: Set?) -> Set? { 54 | 55 | guard let authors = authors, !authors.isEmpty else { 56 | return nil 57 | } 58 | 59 | let transformedAuthors = authors.compactMap { (author) -> ParsedAuthor? in 60 | return ParsedAuthor(name: author.name, url: author.url, avatarURL: nil, emailAddress: author.emailAddress) 61 | } 62 | 63 | return transformedAuthors.isEmpty ? nil : Set(transformedAuthors) 64 | } 65 | 66 | static func parsedAttachments(_ enclosures: Set?) -> Set? { 67 | 68 | guard let enclosures = enclosures, !enclosures.isEmpty else { 69 | return nil 70 | } 71 | 72 | let attachments = enclosures.compactMap { (enclosure) -> ParsedAttachment? in 73 | 74 | let sizeInBytes = enclosure.length > 0 ? enclosure.length : nil 75 | return ParsedAttachment(url: enclosure.url, mimeType: enclosure.mimeType, title: nil, sizeInBytes: sizeInBytes, durationInSeconds: nil) 76 | } 77 | 78 | return attachments.isEmpty ? nil : Set(attachments) 79 | } 80 | } 81 | -------------------------------------------------------------------------------- /Sources/Swift/Feeds/XML/RSSParser.swift: -------------------------------------------------------------------------------- 1 | // 2 | // RSSParser.swift 3 | // RSParser 4 | // 5 | // Created by Brent Simmons on 6/25/17. 6 | // Copyright © 2017 Ranchero Software, LLC. All rights reserved. 7 | // 8 | 9 | import Foundation 10 | import RSParserObjC 11 | 12 | // RSSParser wraps the Objective-C RSRSSParser. 13 | // 14 | // The Objective-C parser creates RSParsedFeed, RSParsedArticle, etc. 15 | // This wrapper then creates ParsedFeed, ParsedItem, etc. so that it creates 16 | // the same things that JSONFeedParser and RSSInJSONParser create. 17 | // 18 | // In general, you should see FeedParser.swift for all your feed-parsing needs. 19 | 20 | public struct RSSParser { 21 | 22 | public static func parse(_ parserData: ParserData) -> ParsedFeed? { 23 | 24 | let rsParsedFeed = RSRSSParser.parseFeed(with: parserData) 25 | return RSParsedFeedTransformer.parsedFeed(rsParsedFeed) 26 | } 27 | } 28 | -------------------------------------------------------------------------------- /Sources/Swift/JSON/JSONTypes.swift: -------------------------------------------------------------------------------- 1 | // 2 | // JSONDictionary.swift 3 | // RSParser 4 | // 5 | // Created by Brent Simmons on 6/24/17. 6 | // Copyright © 2017 Ranchero Software, LLC. All rights reserved. 7 | // 8 | 9 | import Foundation 10 | 11 | public typealias JSONDictionary = [String: Any] 12 | public typealias JSONArray = [JSONDictionary] 13 | -------------------------------------------------------------------------------- /Sources/Swift/JSON/JSONUtilities.swift: -------------------------------------------------------------------------------- 1 | // 2 | // JSONUtilities.swift 3 | // RSParser 4 | // 5 | // Created by Brent Simmons on 12/10/17. 6 | // Copyright © 2017 Ranchero Software, LLC. All rights reserved. 7 | // 8 | 9 | import Foundation 10 | 11 | public struct JSONUtilities { 12 | 13 | public static func object(with data: Data) -> Any? { 14 | 15 | return try? JSONSerialization.jsonObject(with: data) 16 | } 17 | 18 | public static func dictionary(with data: Data) -> JSONDictionary? { 19 | 20 | return object(with: data) as? JSONDictionary 21 | } 22 | 23 | public static func array(with data: Data) -> JSONArray? { 24 | 25 | return object(with: data) as? JSONArray 26 | } 27 | } 28 | -------------------------------------------------------------------------------- /Sources/Swift/Utilities/String+RSParser.swift: -------------------------------------------------------------------------------- 1 | // 2 | // String+RSParser.swift 3 | // RSParser 4 | // 5 | // Created by Nate Weaver on 2020-01-19. 6 | // Copyright © 2020 Ranchero Software, LLC. All rights reserved. 7 | // 8 | 9 | import Foundation 10 | 11 | extension String { 12 | 13 | var nilIfEmptyOrWhitespace: String? { 14 | return self.trimmingCharacters(in: .whitespacesAndNewlines).isEmpty ? nil : self 15 | } 16 | 17 | } 18 | -------------------------------------------------------------------------------- /Tests/LinuxMain.swift: -------------------------------------------------------------------------------- 1 | import XCTest 2 | 3 | import RSParserTests 4 | 5 | var tests = [XCTestCaseEntry]() 6 | tests += RSParserTests.allTests() 7 | XCTMain(tests) 8 | -------------------------------------------------------------------------------- /Tests/RSParserTests/AtomParserTests.swift: -------------------------------------------------------------------------------- 1 | // 2 | // AtomParserTests.swift 3 | // RSParser 4 | // 5 | // Created by Brent Simmons on 6/26/17. 6 | // Copyright © 2017 Ranchero Software, LLC. All rights reserved. 7 | // 8 | 9 | import XCTest 10 | import RSParser 11 | 12 | class AtomParserTests: XCTestCase { 13 | 14 | func testDaringFireballPerformance() { 15 | 16 | // 0.009 sec on my 2012 iMac. 17 | let d = parserData("DaringFireball", "atom", "http://daringfireball.net/") //It’s actually an Atom feed 18 | self.measure { 19 | let _ = try! FeedParser.parse(d) 20 | } 21 | } 22 | 23 | func testAllThisPerformance() { 24 | 25 | // 0.003 sec on my 2012 iMac. 26 | let d = parserData("allthis", "atom", "http://leancrew.com/all-this") 27 | self.measure { 28 | let _ = try! FeedParser.parse(d) 29 | } 30 | } 31 | 32 | func testGettingHomePageLink() { 33 | 34 | let d = parserData("allthis", "atom", "http://leancrew.com/all-this") 35 | let parsedFeed = try! FeedParser.parse(d)! 36 | 37 | XCTAssertTrue(parsedFeed.homePageURL == "http://leancrew.com/all-this") 38 | } 39 | 40 | func testDaringFireball() { 41 | 42 | let d = parserData("DaringFireball", "atom", "http://daringfireball.net/") //It’s actually an Atom feed 43 | let parsedFeed = try! FeedParser.parse(d)! 44 | 45 | for article in parsedFeed.items { 46 | 47 | XCTAssertNotNil(article.url) 48 | 49 | XCTAssertTrue(article.uniqueID.hasPrefix("tag:daringfireball.net,2017:/")) 50 | 51 | XCTAssertEqual(article.authors!.count, 1) // TODO: parse Atom authors 52 | let author = article.authors!.first! 53 | if author.name == "Daring Fireball Department of Commerce" { 54 | XCTAssertNil(author.url) 55 | } 56 | else { 57 | XCTAssertEqual(author.name, "John Gruber") 58 | XCTAssertEqual(author.url, "http://daringfireball.net/") 59 | } 60 | 61 | XCTAssertNotNil(article.datePublished) 62 | XCTAssert(article.attachments == nil) 63 | 64 | XCTAssertEqual(article.language, "en") 65 | } 66 | } 67 | 68 | func test4fsodonlineAttachments() { 69 | 70 | // Thanks to Marco for finding me some Atom podcast feeds. Apparently they’re super-rare. 71 | 72 | let d = parserData("4fsodonline", "atom", "http://4fsodonline.blogspot.com/") 73 | let parsedFeed = try! FeedParser.parse(d)! 74 | 75 | for article in parsedFeed.items { 76 | 77 | XCTAssertTrue(article.attachments!.count > 0) 78 | let attachment = article.attachments!.first! 79 | 80 | XCTAssertTrue(attachment.url.hasPrefix("http://www.blogger.com/video-play.mp4?")) 81 | XCTAssertNil(attachment.sizeInBytes) 82 | XCTAssertEqual(attachment.mimeType!, "video/mp4") 83 | } 84 | } 85 | 86 | func testExpertOpinionENTAttachments() { 87 | 88 | // Another from Marco. 89 | 90 | let d = parserData("expertopinionent", "atom", "http://expertopinionent.typepad.com/my-blog/") 91 | let parsedFeed = try! FeedParser.parse(d)! 92 | 93 | for article in parsedFeed.items { 94 | 95 | guard let attachments = article.attachments else { 96 | continue 97 | } 98 | 99 | XCTAssertEqual(attachments.count, 1) 100 | let attachment = attachments.first! 101 | 102 | XCTAssertTrue(attachment.url.hasSuffix(".mp3")) 103 | XCTAssertNil(attachment.sizeInBytes) 104 | XCTAssertEqual(attachment.mimeType!, "audio/mpeg") 105 | } 106 | } 107 | } 108 | -------------------------------------------------------------------------------- /Tests/RSParserTests/EntityDecodingTests.swift: -------------------------------------------------------------------------------- 1 | // 2 | // EntityDecodingTests.swift 3 | // RSParserTests 4 | // 5 | // Created by Brent Simmons on 12/30/17. 6 | // Copyright © 2017 Ranchero Software, LLC. All rights reserved. 7 | // 8 | 9 | import XCTest 10 | import RSParser 11 | 12 | class EntityDecodingTests: XCTestCase { 13 | 14 | func test39Decoding() { 15 | 16 | // Bug found by Manton Reece — the ' entity was not getting decoded by NetNewsWire in JSON Feeds from micro.blog. 17 | 18 | let s = "These are the times that try men's souls." 19 | let decoded = s.rsparser_stringByDecodingHTMLEntities() 20 | 21 | XCTAssertEqual(decoded, "These are the times that try men's souls.") 22 | } 23 | 24 | func testEntities() { 25 | var s = "…" 26 | var decoded = s.rsparser_stringByDecodingHTMLEntities() 27 | 28 | XCTAssertEqual(decoded, "…") 29 | 30 | s = "…" 31 | decoded = s.rsparser_stringByDecodingHTMLEntities() 32 | XCTAssertEqual(decoded, "…") 33 | 34 | s = "'" 35 | decoded = s.rsparser_stringByDecodingHTMLEntities() 36 | XCTAssertEqual(decoded, "'") 37 | 38 | s = "§" 39 | decoded = s.rsparser_stringByDecodingHTMLEntities() 40 | XCTAssertEqual(decoded, "§") 41 | 42 | s = "£" 43 | decoded = s.rsparser_stringByDecodingHTMLEntities() 44 | XCTAssertEqual(decoded, "£") 45 | 46 | } 47 | } 48 | -------------------------------------------------------------------------------- /Tests/RSParserTests/FeedParserTypeTests.swift: -------------------------------------------------------------------------------- 1 | // 2 | // FeedParserTypeTests.swift 3 | // RSParser 4 | // 5 | // Created by Brent Simmons on 6/25/17. 6 | // Copyright © 2017 Ranchero Software, LLC. All rights reserved. 7 | // 8 | 9 | import XCTest 10 | import RSParser 11 | import RSParserObjC 12 | 13 | class FeedParserTypeTests: XCTestCase { 14 | 15 | // MARK: HTML 16 | 17 | func testDaringFireballHTMLType() { 18 | 19 | let d = parserData("DaringFireball", "html", "http://daringfireball.net/") 20 | let type = feedType(d) 21 | XCTAssertTrue(type == .notAFeed) 22 | } 23 | 24 | func testFurboHTMLType() { 25 | 26 | let d = parserData("furbo", "html", "http://furbo.org/") 27 | let type = feedType(d) 28 | XCTAssertTrue(type == .notAFeed) 29 | } 30 | 31 | func testInessentialHTMLType() { 32 | 33 | let d = parserData("inessential", "html", "http://inessential.com/") 34 | let type = feedType(d) 35 | XCTAssertTrue(type == .notAFeed) 36 | } 37 | 38 | func testSixColorsHTMLType() { 39 | 40 | let d = parserData("sixcolors", "html", "https://sixcolors.com/") 41 | let type = feedType(d) 42 | XCTAssertTrue(type == .notAFeed) 43 | } 44 | 45 | // MARK: RSS 46 | 47 | func testEMarleyRSSType() { 48 | 49 | let d = parserData("EMarley", "rss", "https://medium.com/@emarley") 50 | let type = feedType(d) 51 | XCTAssertTrue(type == .rss) 52 | } 53 | 54 | func testScriptingNewsRSSType() { 55 | 56 | let d = parserData("scriptingNews", "rss", "http://scripting.com/") 57 | let type = feedType(d) 58 | XCTAssertTrue(type == .rss) 59 | } 60 | 61 | func testKatieFloydRSSType() { 62 | 63 | let d = parserData("KatieFloyd", "rss", "https://katiefloyd.com/") 64 | let type = feedType(d) 65 | XCTAssertTrue(type == .rss) 66 | } 67 | 68 | func testMantonRSSType() { 69 | 70 | let d = parserData("manton", "rss", "http://manton.org/") 71 | let type = feedType(d) 72 | XCTAssertTrue(type == .rss) 73 | } 74 | 75 | func testDCRainmakerRSSType() { 76 | 77 | let d = parserData("dcrainmaker", "xml", "https://www.dcrainmaker.com/") 78 | let type = feedType(d) 79 | XCTAssertTrue(type == .rss) 80 | } 81 | 82 | func testMacworldRSSType() { 83 | 84 | let d = parserData("macworld", "rss", "https://www.macworld.com/") 85 | let type = feedType(d) 86 | XCTAssertTrue(type == .rss) 87 | } 88 | 89 | func testNatashaTheRobotRSSType() { 90 | 91 | let d = parserData("natasha", "xml", "https://www.natashatherobot.com/") 92 | let type = feedType(d) 93 | XCTAssertTrue(type == .rss) 94 | } 95 | 96 | func testDontHitSaveRSSWithBOMType() { 97 | 98 | let d = parserData("donthitsave", "xml", "http://donthitsave.com/donthitsavefeed.xml") 99 | let type = feedType(d) 100 | XCTAssertTrue(type == .rss) 101 | } 102 | 103 | func testBioRDF() { 104 | let d = parserData("bio", "rdf", "http://connect.biorxiv.org/") 105 | let type = feedType(d) 106 | XCTAssertTrue(type == .rss) 107 | } 108 | 109 | func testPHPXML() { 110 | let d = parserData("phpxml", "rss", "https://www.fcutrecht.net/") 111 | let type = feedType(d) 112 | XCTAssertTrue(type == .rss) 113 | } 114 | 115 | // MARK: Atom 116 | 117 | func testDaringFireballAtomType() { 118 | 119 | // File extension is .rss, but it’s really an Atom feed. 120 | let d = parserData("DaringFireball", "rss", "http://daringfireball.net/") 121 | let type = feedType(d) 122 | XCTAssertTrue(type == .atom) 123 | } 124 | 125 | func testOneFootTsunamiAtomType() { 126 | 127 | let d = parserData("OneFootTsunami", "atom", "http://onefoottsunami.com/") 128 | let type = feedType(d) 129 | XCTAssertTrue(type == .atom) 130 | } 131 | 132 | func testRussCoxAtomType() { 133 | let d = parserData("russcox", "atom", "https://research.swtch.com/") 134 | let type = feedType(d) 135 | XCTAssertTrue(type == .atom) 136 | } 137 | 138 | // MARK: RSS-in-JSON 139 | 140 | func testScriptingNewsJSONType() { 141 | 142 | let d = parserData("ScriptingNews", "json", "http://scripting.com/") 143 | let type = feedType(d) 144 | XCTAssertTrue(type == .rssInJSON) 145 | } 146 | 147 | // MARK: JSON Feed 148 | 149 | func testInessentialJSONFeedType() { 150 | 151 | let d = parserData("inessential", "json", "http://inessential.com/") 152 | let type = feedType(d) 153 | XCTAssertTrue(type == .jsonFeed) 154 | } 155 | 156 | func testAllThisJSONFeedType() { 157 | 158 | let d = parserData("allthis", "json", "http://leancrew.com/allthis/") 159 | let type = feedType(d) 160 | XCTAssertTrue(type == .jsonFeed) 161 | } 162 | 163 | func testCurtJSONFeedType() { 164 | 165 | let d = parserData("curt", "json", "http://curtclifton.net/") 166 | let type = feedType(d) 167 | XCTAssertTrue(type == .jsonFeed) 168 | } 169 | 170 | func testPixelEnvyJSONFeedType() { 171 | 172 | let d = parserData("pxlnv", "json", "http://pxlnv.com/") 173 | let type = feedType(d) 174 | XCTAssertTrue(type == .jsonFeed) 175 | } 176 | 177 | func testRoseJSONFeedType() { 178 | 179 | let d = parserData("rose", "json", "https://www.rosemaryorchard.com/") 180 | let type = feedType(d) 181 | XCTAssertTrue(type == .jsonFeed) 182 | } 183 | 184 | // MARK: Unknown 185 | 186 | func testPartialAllThisUnknownFeedType() { 187 | 188 | // In the case of this feed, the partial data isn’t enough to detect that it’s a JSON Feed. 189 | // The type detector should return .unknown rather than .notAFeed. 190 | 191 | let d = parserData("allthis-partial", "json", "http://leancrew.com/allthis/") 192 | let type = feedType(d, isPartialData: true) 193 | XCTAssertEqual(type, .unknown) 194 | } 195 | 196 | // MARK: Performance 197 | 198 | func testFeedTypePerformance() { 199 | 200 | // 0.000 on my 2012 iMac. 201 | 202 | let d = parserData("EMarley", "rss", "https://medium.com/@emarley") 203 | self.measure { 204 | let _ = feedType(d) 205 | } 206 | } 207 | 208 | func testFeedTypePerformance2() { 209 | 210 | // 0.000 on my 2012 iMac. 211 | 212 | let d = parserData("inessential", "json", "http://inessential.com/") 213 | self.measure { 214 | let _ = feedType(d) 215 | } 216 | } 217 | 218 | func testFeedTypePerformance3() { 219 | 220 | // 0.000 on my 2012 iMac. 221 | 222 | let d = parserData("DaringFireball", "html", "http://daringfireball.net/") 223 | self.measure { 224 | let _ = feedType(d) 225 | } 226 | } 227 | 228 | func testFeedTypePerformance4() { 229 | 230 | // 0.001 on my 2012 iMac. 231 | 232 | let d = parserData("DaringFireball", "rss", "http://daringfireball.net/") 233 | self.measure { 234 | let _ = feedType(d) 235 | } 236 | } 237 | 238 | } 239 | 240 | func parserData(_ filename: String, _ fileExtension: String, _ url: String) -> ParserData { 241 | let filename = "Resources/\(filename)" 242 | let path = Bundle.module.path(forResource: filename, ofType: fileExtension)! 243 | let data = try! Data(contentsOf: URL(fileURLWithPath: path)) 244 | return ParserData(url: url, data: data) 245 | } 246 | -------------------------------------------------------------------------------- /Tests/RSParserTests/HTMLLinkTests.swift: -------------------------------------------------------------------------------- 1 | // 2 | // HTMLLinkTests.swift 3 | // RSParser 4 | // 5 | // Created by Brent Simmons on 6/25/17. 6 | // Copyright © 2017 Ranchero Software, LLC. All rights reserved. 7 | // 8 | 9 | import XCTest 10 | import RSParser 11 | import RSParserObjC 12 | 13 | class HTMLLinkTests: XCTestCase { 14 | 15 | func testSixColorsPerformance() { 16 | 17 | // 0.003 sec on my 2012 iMac 18 | let d = parserData("sixcolors", "html", "http://sixcolors.com/") 19 | self.measure { 20 | let _ = RSHTMLLinkParser.htmlLinks(with: d) 21 | } 22 | } 23 | 24 | func testSixColorsLink() { 25 | 26 | let d = parserData("sixcolors", "html", "http://sixcolors.com/") 27 | let links = RSHTMLLinkParser.htmlLinks(with: d) 28 | 29 | let linkToFind = "https://www.theincomparable.com/theincomparable/290/index.php" 30 | let textToFind = "this week’s episode of The Incomparable" 31 | 32 | var found = false 33 | for oneLink in links { 34 | if let urlString = oneLink.urlString, let text = oneLink.text, urlString == linkToFind, text == textToFind { 35 | found = true 36 | } 37 | } 38 | 39 | XCTAssertTrue(found) 40 | XCTAssertEqual(links.count, 131) 41 | } 42 | 43 | } 44 | -------------------------------------------------------------------------------- /Tests/RSParserTests/HTMLMetadataTests.swift: -------------------------------------------------------------------------------- 1 | // 2 | // HTMLMetadataTests.swift 3 | // RSParser 4 | // 5 | // Created by Brent Simmons on 6/25/17. 6 | // Copyright © 2017 Ranchero Software, LLC. All rights reserved. 7 | // 8 | 9 | import XCTest 10 | import RSParser 11 | import RSParserObjC 12 | 13 | class HTMLMetadataTests: XCTestCase { 14 | 15 | func testDaringFireball() { 16 | 17 | let d = parserData("DaringFireball", "html", "http://daringfireball.net/") 18 | let metadata = RSHTMLMetadataParser.htmlMetadata(with: d) 19 | 20 | XCTAssertEqual(metadata.favicons.first?.urlString, "http://daringfireball.net/graphics/favicon.ico?v=005") 21 | 22 | XCTAssertEqual(metadata.feedLinks.count, 1) 23 | 24 | let feedLink = metadata.feedLinks.first! 25 | XCTAssertNil(feedLink.title) 26 | XCTAssertEqual(feedLink.type, "application/atom+xml") 27 | XCTAssertEqual(feedLink.urlString, "http://daringfireball.net/feeds/main") 28 | } 29 | 30 | func testDaringFireballPerformance() { 31 | 32 | // 0.002 sec on my 2012 iMac 33 | let d = parserData("DaringFireball", "html", "http://daringfireball.net/") 34 | self.measure { 35 | let _ = RSHTMLMetadataParser.htmlMetadata(with: d) 36 | } 37 | } 38 | 39 | func testFurbo() { 40 | 41 | let d = parserData("furbo", "html", "http://furbo.org/") 42 | let metadata = RSHTMLMetadataParser.htmlMetadata(with: d) 43 | 44 | XCTAssertEqual(metadata.favicons.first?.urlString, "http://furbo.org/favicon.ico") 45 | 46 | XCTAssertEqual(metadata.feedLinks.count, 1) 47 | 48 | let feedLink = metadata.feedLinks.first! 49 | XCTAssertEqual(feedLink.title, "Iconfactory News Feed") 50 | XCTAssertEqual(feedLink.type, "application/rss+xml") 51 | } 52 | 53 | func testFurboPerformance() { 54 | 55 | // 0.001 sec on my 2012 iMac 56 | let d = parserData("furbo", "html", "http://furbo.org/") 57 | self.measure { 58 | let _ = RSHTMLMetadataParser.htmlMetadata(with: d) 59 | } 60 | } 61 | 62 | func testInessential() { 63 | 64 | let d = parserData("inessential", "html", "http://inessential.com/") 65 | let metadata = RSHTMLMetadataParser.htmlMetadata(with: d) 66 | 67 | XCTAssertNil(metadata.favicons.first?.urlString) 68 | 69 | XCTAssertEqual(metadata.feedLinks.count, 1) 70 | let feedLink = metadata.feedLinks.first! 71 | XCTAssertEqual(feedLink.title, "RSS") 72 | XCTAssertEqual(feedLink.type, "application/rss+xml") 73 | XCTAssertEqual(feedLink.urlString, "http://inessential.com/xml/rss.xml") 74 | 75 | XCTAssertEqual(metadata.appleTouchIcons.count, 0); 76 | } 77 | 78 | func testInessentialPerformance() { 79 | 80 | // 0.001 sec on my 2012 iMac 81 | let d = parserData("inessential", "html", "http://inessential.com/") 82 | self.measure { 83 | let _ = RSHTMLMetadataParser.htmlMetadata(with: d) 84 | } 85 | } 86 | 87 | func testCocoPerformance() { 88 | 89 | // 0.004 sec on my 2012 iMac 90 | let d = parserData("coco", "html", "https://www.theatlantic.com/entertainment/archive/2017/11/coco-is-among-pixars-best-movies-in-years/546695/") 91 | self.measure { 92 | let _ = RSHTMLMetadataParser.htmlMetadata(with: d) 93 | } 94 | } 95 | 96 | func testSixColors() { 97 | 98 | let d = parserData("sixcolors", "html", "http://sixcolors.com/") 99 | let metadata = RSHTMLMetadataParser.htmlMetadata(with: d) 100 | 101 | XCTAssertEqual(metadata.favicons.first?.urlString, "https://sixcolors.com/images/favicon.ico") 102 | 103 | XCTAssertEqual(metadata.feedLinks.count, 1); 104 | let feedLink = metadata.feedLinks.first! 105 | XCTAssertEqual(feedLink.title, "RSS"); 106 | XCTAssertEqual(feedLink.type, "application/rss+xml"); 107 | XCTAssertEqual(feedLink.urlString, "http://feedpress.me/sixcolors"); 108 | 109 | XCTAssertEqual(metadata.appleTouchIcons.count, 6); 110 | let icon = metadata.appleTouchIcons[3]; 111 | XCTAssertEqual(icon.rel, "apple-touch-icon"); 112 | XCTAssertEqual(icon.sizes, "120x120"); 113 | XCTAssertEqual(icon.urlString, "https://sixcolors.com/apple-touch-icon-120.png"); 114 | } 115 | 116 | func testSixColorsPerformance() { 117 | 118 | // 0.002 sec on my 2012 iMac 119 | let d = parserData("sixcolors", "html", "http://sixcolors.com/") 120 | self.measure { 121 | let _ = RSHTMLMetadataParser.htmlMetadata(with: d) 122 | } 123 | } 124 | 125 | func testCocoOGImage() { 126 | 127 | let d = parserData("coco", "html", "https://www.theatlantic.com/entertainment/archive/2017/11/coco-is-among-pixars-best-movies-in-years/546695/") 128 | let metadata = RSHTMLMetadataParser.htmlMetadata(with: d) 129 | let openGraphData = metadata.openGraphProperties 130 | let image = openGraphData.images.first! 131 | XCTAssert(image.url == "https://cdn.theatlantic.com/assets/media/img/mt/2017/11/1033101_first_full_length_trailer_arrives_pixars_coco/facebook.jpg?1511382177") 132 | } 133 | 134 | func testCocoTwitterImage() { 135 | 136 | let d = parserData("coco", "html", "https://www.theatlantic.com/entertainment/archive/2017/11/coco-is-among-pixars-best-movies-in-years/546695/") 137 | let metadata = RSHTMLMetadataParser.htmlMetadata(with: d) 138 | let twitterData = metadata.twitterProperties 139 | let imageURL = twitterData.imageURL! 140 | XCTAssert(imageURL == "https://cdn.theatlantic.com/assets/media/img/mt/2017/11/1033101_first_full_length_trailer_arrives_pixars_coco/facebook.jpg?1511382177") 141 | } 142 | 143 | func testYouTube() { 144 | // YouTube is a special case — the feed links appear after the head section, in the body section. 145 | let d = parserData("YouTubeTheVolvoRocks", "html", "https://www.youtube.com/user/TheVolvorocks") 146 | let metadata = RSHTMLMetadataParser.htmlMetadata(with: d) 147 | 148 | XCTAssertEqual(metadata.feedLinks.count, 1); 149 | let feedLink = metadata.feedLinks.first! 150 | XCTAssertEqual(feedLink.title, "RSS"); 151 | XCTAssertEqual(feedLink.type, "application/rss+xml"); 152 | XCTAssertEqual(feedLink.urlString, "https://www.youtube.com/feeds/videos.xml?channel_id=UCct7QF2jcWRY6dhXWMSq9LQ"); 153 | } 154 | } 155 | -------------------------------------------------------------------------------- /Tests/RSParserTests/Info.plist: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | CFBundleDevelopmentRegion 6 | $(DEVELOPMENT_LANGUAGE) 7 | CFBundleExecutable 8 | $(EXECUTABLE_NAME) 9 | CFBundleIdentifier 10 | $(PRODUCT_BUNDLE_IDENTIFIER) 11 | CFBundleInfoDictionaryVersion 12 | 6.0 13 | CFBundleName 14 | $(PRODUCT_NAME) 15 | CFBundlePackageType 16 | BNDL 17 | CFBundleShortVersionString 18 | 1.0 19 | CFBundleVersion 20 | 1 21 | 22 | 23 | -------------------------------------------------------------------------------- /Tests/RSParserTests/JSONFeedParserTests.swift: -------------------------------------------------------------------------------- 1 | // 2 | // JSONFeedParserTests.swift 3 | // RSParser 4 | // 5 | // Created by Brent Simmons on 6/26/17. 6 | // Copyright © 2017 Ranchero Software, LLC. All rights reserved. 7 | // 8 | 9 | import XCTest 10 | import RSParser 11 | 12 | class JSONFeedParserTests: XCTestCase { 13 | 14 | func testInessentialPerformance() { 15 | 16 | // 0.001 sec on my 2012 iMac. 17 | let d = parserData("inessential", "json", "http://inessential.com/") 18 | self.measure { 19 | let _ = try! FeedParser.parse(d) 20 | } 21 | } 22 | 23 | func testDaringFireballPerformance() { 24 | 25 | // 0.009 sec on my 2012 iMac. 26 | let d = parserData("DaringFireball", "json", "http://daringfireball.net/") 27 | self.measure { 28 | let _ = try! FeedParser.parse(d) 29 | } 30 | } 31 | 32 | func testGettingFaviconAndIconURLs() { 33 | 34 | let d = parserData("DaringFireball", "json", "http://daringfireball.net/") 35 | let parsedFeed = try! FeedParser.parse(d)! 36 | 37 | XCTAssert(parsedFeed.faviconURL == "https://daringfireball.net/graphics/favicon-64.png") 38 | XCTAssert(parsedFeed.iconURL == "https://daringfireball.net/graphics/apple-touch-icon.png") 39 | } 40 | 41 | func testAllThis() { 42 | 43 | let d = parserData("allthis", "json", "http://leancrew.com/allthis/") 44 | let parsedFeed = try! FeedParser.parse(d)! 45 | 46 | XCTAssertEqual(parsedFeed.items.count, 12) 47 | } 48 | 49 | func testCurt() { 50 | 51 | let d = parserData("curt", "json", "http://curtclifton.net/") 52 | let parsedFeed = try! FeedParser.parse(d)! 53 | 54 | XCTAssertEqual(parsedFeed.items.count, 26) 55 | 56 | var didFindTwitterQuitterArticle = false 57 | for article in parsedFeed.items { 58 | if article.title == "Twitter Quitter" { 59 | didFindTwitterQuitterArticle = true 60 | XCTAssertTrue(article.contentHTML!.hasPrefix("

I’ve decided to close my Twitter account. William Van Hecke makes a convincing case")) 61 | } 62 | } 63 | 64 | XCTAssertTrue(didFindTwitterQuitterArticle) 65 | } 66 | 67 | func testPixelEnvy() { 68 | 69 | let d = parserData("pxlnv", "json", "http://pxlnv.com/") 70 | let parsedFeed = try! FeedParser.parse(d)! 71 | XCTAssertEqual(parsedFeed.items.count, 20) 72 | 73 | } 74 | 75 | func testRose() { 76 | let d = parserData("rose", "json", "http://www.rosemaryorchard.com/") 77 | let parsedFeed = try! FeedParser.parse(d)! 78 | XCTAssertEqual(parsedFeed.items.count, 84) 79 | } 80 | 81 | func test3960() { 82 | let d = parserData("3960", "json", "http://journal.3960.org/") 83 | let parsedFeed = try! FeedParser.parse(d)! 84 | XCTAssertEqual(parsedFeed.items.count, 20) 85 | XCTAssertEqual(parsedFeed.language, "de-DE") 86 | 87 | for item in parsedFeed.items { 88 | XCTAssertEqual(item.language, "de-DE") 89 | } 90 | } 91 | 92 | func testAuthors() { 93 | let d = parserData("authors", "json", "https://example.com/") 94 | let parsedFeed = try! FeedParser.parse(d)! 95 | XCTAssertEqual(parsedFeed.items.count, 4) 96 | 97 | let rootAuthors = Set([ 98 | ParsedAuthor(name: "Root Author 1", url: nil, avatarURL: nil, emailAddress: nil), 99 | ParsedAuthor(name: "Root Author 2", url: nil, avatarURL: nil, emailAddress: nil) 100 | ]) 101 | let itemAuthors = Set([ 102 | ParsedAuthor(name: "Item Author 1", url: nil, avatarURL: nil, emailAddress: nil), 103 | ParsedAuthor(name: "Item Author 2", url: nil, avatarURL: nil, emailAddress: nil) 104 | ]) 105 | let legacyItemAuthors = Set([ 106 | ParsedAuthor(name: "Legacy Item Author", url: nil, avatarURL: nil, emailAddress: nil) 107 | ]) 108 | 109 | XCTAssertEqual(parsedFeed.authors?.count, 2) 110 | XCTAssertEqual(parsedFeed.authors, rootAuthors) 111 | 112 | let noAuthorsItem = parsedFeed.items.first { $0.uniqueID == "Item without authors" }! 113 | XCTAssertEqual(noAuthorsItem.authors, nil) 114 | 115 | let legacyAuthorItem = parsedFeed.items.first { $0.uniqueID == "Item with legacy author" }! 116 | XCTAssertEqual(legacyAuthorItem.authors, legacyItemAuthors) 117 | 118 | let modernAuthorsItem = parsedFeed.items.first { $0.uniqueID == "Item with modern authors" }! 119 | XCTAssertEqual(modernAuthorsItem.authors, itemAuthors) 120 | 121 | let bothAuthorsItem = parsedFeed.items.first { $0.uniqueID == "Item with both" }! 122 | XCTAssertEqual(bothAuthorsItem.authors, itemAuthors) 123 | } 124 | 125 | func testExtensions() { 126 | let d = parserData("jsonfeed-extension", "json", "https://example.com/") 127 | let parsedFeed = try! FeedParser.parse(d)! 128 | 129 | XCTAssertEqual(parsedFeed.extensions?.count, 1) 130 | 131 | let nonexistentExtension = parsedFeed.extensions?["_nonexistent"] 132 | XCTAssertNil(nonexistentExtension) 133 | 134 | let feedExtension = parsedFeed.extensions!["_contoso"]! 135 | XCTAssertEqual(feedExtension.name, "_contoso") 136 | 137 | // Test content access 138 | XCTAssertEqual(feedExtension["about"], "Contoso JSON feed extension. There is actually no such extension in real life. This file just tests if the JSONFeed parser can parse a feed with JSON Feed extensions.") 139 | XCTAssertEqual(feedExtension["someNumber"], 42) 140 | XCTAssertEqual(feedExtension["someBool"], true) 141 | XCTAssertEqual(feedExtension["someString"], "Hello Contoso") 142 | 143 | let chronologicalItems = parsedFeed.items.sorted { $0.datePublished! > $1.datePublished! } 144 | 145 | let itemExtension = chronologicalItems[0].extensions!["_contoso"]! 146 | XCTAssertEqual(itemExtension.name, "_contoso") 147 | XCTAssertEqual(itemExtension["someKey"], "SomeValue2") 148 | XCTAssertEqual(itemExtension["someBoolKey"], true) 149 | XCTAssertEqual(itemExtension["anotherKey"], "AnotherValue2") 150 | 151 | XCTAssertEqual(itemExtension["someKey"], "SomeValue2") 152 | 153 | let secondItemExtension = chronologicalItems[1].extensions!["_contoso"]! 154 | XCTAssertEqual(secondItemExtension["someBoolKey"], false) 155 | XCTAssertEqual(secondItemExtension["someIntKey"], 43) 156 | 157 | let thirdItemExtensions = chronologicalItems[2].extensions 158 | XCTAssertNil(thirdItemExtensions) 159 | } 160 | } 161 | -------------------------------------------------------------------------------- /Tests/RSParserTests/OPMLTests.swift: -------------------------------------------------------------------------------- 1 | // 2 | // OPMLTests.swift 3 | // RSParser 4 | // 5 | // Created by Brent Simmons on 6/25/17. 6 | // Copyright © 2017 Ranchero Software, LLC. All rights reserved. 7 | // 8 | 9 | import XCTest 10 | import RSParser 11 | import RSParserObjC 12 | 13 | class OPMLTests: XCTestCase { 14 | 15 | let subsData = parserData("Subs", "opml", "http://example.org/") 16 | 17 | func testOPMLParsingPerformance() { 18 | 19 | // 0.002 sec on my 2012 iMac. 20 | self.measure { 21 | let _ = try! RSOPMLParser.parseOPML(with: self.subsData) 22 | } 23 | } 24 | 25 | func testNotOPML() { 26 | 27 | let d = parserData("DaringFireball", "rss", "http://daringfireball.net/") 28 | XCTAssertThrowsError(try RSOPMLParser.parseOPML(with: d)) 29 | } 30 | 31 | func testSubsStructure() { 32 | let opmlDocument = try! RSOPMLParser.parseOPML(with: subsData) 33 | XCTAssertEqual("Subs", opmlDocument.title) 34 | XCTAssertEqual("http://example.org/", opmlDocument.url) 35 | recursivelyCheckOPMLStructure(opmlDocument) 36 | } 37 | 38 | 39 | func testFindingTitles() { 40 | // https://github.com/brentsimmons/NetNewsWire/issues/527 41 | // Fix a bug where titles aren’t found when there’s no title attribute in the OPML, 42 | // which appears to be true with OPML generated by The Old Reader. 43 | 44 | let d = parserData("SubsNoTitleAttributes", "opml", "http://example.org/") 45 | let opmlDocument = try! RSOPMLParser.parseOPML(with: d) 46 | recursivelyCheckOPMLStructure(opmlDocument) 47 | } 48 | 49 | } 50 | 51 | private extension OPMLTests { 52 | 53 | func recursivelyCheckOPMLStructure(_ item: RSOPMLItem) { 54 | let feedSpecifier = item.feedSpecifier 55 | if !(item is RSOPMLDocument) { 56 | XCTAssertNotNil((item.attributes! as NSDictionary).opml_text) 57 | } 58 | 59 | // If it has no children, it should have a feed specifier. The converse is also true. 60 | var isFolder = item.children != nil && item.children!.count > 0 61 | if !isFolder && (item.attributes! as NSDictionary).opml_title == "Skip" { 62 | isFolder = true 63 | } 64 | 65 | if !isFolder { 66 | XCTAssertNotNil(feedSpecifier!.title) 67 | XCTAssertNotNil(feedSpecifier!.feedURL) 68 | } 69 | else { 70 | XCTAssertNil(feedSpecifier) 71 | } 72 | 73 | if item.children != nil && item.children!.count > 0 { 74 | for oneItem in item.children! { 75 | recursivelyCheckOPMLStructure(oneItem) 76 | } 77 | } 78 | } 79 | } 80 | -------------------------------------------------------------------------------- /Tests/RSParserTests/RSDateParserTests.swift: -------------------------------------------------------------------------------- 1 | // 2 | // RSDateParserTests.swift 3 | // 4 | // 5 | // Created by Maurice Parker on 4/1/21. 6 | // 7 | 8 | import Foundation 9 | import XCTest 10 | import RSParser 11 | 12 | class RSDateParserTests: XCTestCase { 13 | 14 | static func dateWithValues(_ year: Int, _ month: Int, _ day: Int, _ hour: Int, _ minute: Int, _ second: Int, _ milliseconds: Int = 0) -> Date { 15 | var dateComponents = DateComponents() 16 | dateComponents.calendar = Calendar.current 17 | dateComponents.timeZone = TimeZone(secondsFromGMT: 0) 18 | 19 | dateComponents.year = year 20 | dateComponents.month = month 21 | dateComponents.day = day 22 | dateComponents.hour = hour 23 | dateComponents.minute = minute 24 | dateComponents.second = second 25 | dateComponents.nanosecond = milliseconds * 1000000 26 | 27 | return dateComponents.date! 28 | } 29 | 30 | func testDateWithString() { 31 | var expectedDateResult = Self.dateWithValues(2010, 5, 28, 21, 3, 38) 32 | 33 | var d = RSDateWithString("Fri, 28 May 2010 21:03:38 +0000") 34 | XCTAssertEqual(d, expectedDateResult) 35 | 36 | d = RSDateWithString("Fri, 28 May 2010 21:03:38 +00:00") 37 | XCTAssertEqual(d, expectedDateResult) 38 | 39 | d = RSDateWithString("Fri, 28 May 2010 21:03:38 -00:00") 40 | XCTAssertEqual(d, expectedDateResult) 41 | 42 | d = RSDateWithString("Fri, 28 May 2010 21:03:38 -0000") 43 | XCTAssertEqual(d, expectedDateResult) 44 | 45 | d = RSDateWithString("Fri, 28 May 2010 21:03:38 GMT") 46 | XCTAssertEqual(d, expectedDateResult) 47 | 48 | d = RSDateWithString("2010-05-28T21:03:38+00:00") 49 | XCTAssertEqual(d, expectedDateResult) 50 | 51 | d = RSDateWithString("2010-05-28T21:03:38+0000") 52 | XCTAssertEqual(d, expectedDateResult) 53 | 54 | d = RSDateWithString("2010-05-28T21:03:38-0000") 55 | XCTAssertEqual(d, expectedDateResult) 56 | 57 | d = RSDateWithString("2010-05-28T21:03:38-00:00") 58 | XCTAssertEqual(d, expectedDateResult) 59 | 60 | d = RSDateWithString("2010-05-28T21:03:38Z") 61 | XCTAssertEqual(d, expectedDateResult) 62 | 63 | expectedDateResult = Self.dateWithValues(2010, 7, 13, 17, 6, 40) 64 | d = RSDateWithString("2010-07-13T17:06:40+00:00") 65 | XCTAssertEqual(d, expectedDateResult) 66 | 67 | expectedDateResult = Self.dateWithValues(2010, 4, 30, 12, 0, 0) 68 | d = RSDateWithString("30 Apr 2010 5:00 PDT") 69 | XCTAssertEqual(d, expectedDateResult) 70 | 71 | expectedDateResult = Self.dateWithValues(2010, 5, 21, 21, 22, 53) 72 | d = RSDateWithString("21 May 2010 21:22:53 GMT") 73 | XCTAssertEqual(d, expectedDateResult) 74 | 75 | expectedDateResult = Self.dateWithValues(2010, 6, 9, 5, 0, 0) 76 | d = RSDateWithString("Wed, 09 Jun 2010 00:00 EST") 77 | XCTAssertEqual(d, expectedDateResult) 78 | 79 | expectedDateResult = Self.dateWithValues(2010, 6, 23, 3, 43, 50) 80 | d = RSDateWithString("Wed, 23 Jun 2010 03:43:50 Z") 81 | XCTAssertEqual(d, expectedDateResult) 82 | 83 | expectedDateResult = Self.dateWithValues(2010, 6, 22, 3, 57, 49) 84 | d = RSDateWithString("2010-06-22T03:57:49+00:00") 85 | XCTAssertEqual(d, expectedDateResult) 86 | 87 | expectedDateResult = Self.dateWithValues(2010, 11, 17, 13, 40, 07) 88 | d = RSDateWithString("2010-11-17T08:40:07-05:00") 89 | XCTAssertEqual(d, expectedDateResult) 90 | } 91 | 92 | func testAtomDateWithMissingTCharacter() { 93 | let expectedDateResult = Self.dateWithValues(2010, 11, 17, 13, 40, 07) 94 | let d = RSDateWithString("2010-11-17 08:40:07-05:00") 95 | XCTAssertEqual(d, expectedDateResult) 96 | } 97 | 98 | func testFeedbinDate() { 99 | let expectedDateResult = Self.dateWithValues(2019, 9, 27, 21, 01, 48) 100 | let d = RSDateWithString("2019-09-27T21:01:48.000000Z") 101 | XCTAssertEqual(d, expectedDateResult) 102 | } 103 | 104 | func testHighMillisecondDate() { 105 | let expectedDateResult = Self.dateWithValues(2021, 03, 29, 10, 46, 56, 516) 106 | let d = RSDateWithString("2021-03-29T10:46:56.516941+00:00") 107 | XCTAssertEqual(d!.timeIntervalSince1970, expectedDateResult.timeIntervalSince1970, accuracy: 0.000001) 108 | } 109 | } 110 | -------------------------------------------------------------------------------- /Tests/RSParserTests/RSSInJSONParserTests.swift: -------------------------------------------------------------------------------- 1 | // 2 | // RSSInJSONParserTests.swift 3 | // RSParser 4 | // 5 | // Created by Brent Simmons on 6/26/17. 6 | // Copyright © 2017 Ranchero Software, LLC. All rights reserved. 7 | // 8 | 9 | import XCTest 10 | import RSParser 11 | 12 | class RSSInJSONParserTests: XCTestCase { 13 | 14 | func testScriptingNewsPerformance() { 15 | 16 | // 0.003 sec on my 2012 iMac. 17 | let d = parserData("ScriptingNews", "json", "http://scripting.com/") 18 | self.measure { 19 | let _ = try! FeedParser.parse(d) 20 | } 21 | } 22 | 23 | func testFeedLanguage() { 24 | let d = parserData("ScriptingNews", "json", "http://scripting.com/") 25 | let parsedFeed = try! FeedParser.parse(d)! 26 | XCTAssertEqual(parsedFeed.language, "en-us") 27 | } 28 | } 29 | -------------------------------------------------------------------------------- /Tests/RSParserTests/RSSParserTests.swift: -------------------------------------------------------------------------------- 1 | // 2 | // RSSParserTests.swift 3 | // RSParser 4 | // 5 | // Created by Brent Simmons on 6/26/17. 6 | // Copyright © 2017 Ranchero Software, LLC. All rights reserved. 7 | // 8 | 9 | import XCTest 10 | import RSParser 11 | 12 | class RSSParserTests: XCTestCase { 13 | 14 | func testScriptingNewsPerformance() { 15 | 16 | // 0.004 sec on my 2012 iMac. 17 | let d = parserData("scriptingNews", "rss", "http://scripting.com/") 18 | self.measure { 19 | let _ = try! FeedParser.parse(d) 20 | } 21 | } 22 | 23 | func testKatieFloydPerformance() { 24 | 25 | // 0.004 sec on my 2012 iMac. 26 | let d = parserData("KatieFloyd", "rss", "http://katiefloyd.com/") 27 | self.measure { 28 | let _ = try! FeedParser.parse(d) 29 | } 30 | } 31 | 32 | func testEMarleyPerformance() { 33 | 34 | // 0.001 sec on my 2012 iMac. 35 | let d = parserData("EMarley", "rss", "https://medium.com/@emarley") 36 | self.measure { 37 | let _ = try! FeedParser.parse(d) 38 | } 39 | } 40 | 41 | func testMantonPerformance() { 42 | 43 | // 0.002 sec on my 2012 iMac. 44 | let d = parserData("manton", "rss", "http://manton.org/") 45 | self.measure { 46 | let _ = try! FeedParser.parse(d) 47 | } 48 | } 49 | 50 | func testNatashaTheRobot() { 51 | 52 | let d = parserData("natasha", "xml", "https://www.natashatherobot.com/") 53 | let parsedFeed = try! FeedParser.parse(d)! 54 | XCTAssertEqual(parsedFeed.items.count, 10) 55 | } 56 | 57 | func testTheOmniShowAttachments() { 58 | 59 | let d = parserData("theomnishow", "rss", "https://theomnishow.omnigroup.com/") 60 | let parsedFeed = try! FeedParser.parse(d)! 61 | 62 | for article in parsedFeed.items { 63 | XCTAssertNotNil(article.attachments) 64 | XCTAssertEqual(article.attachments!.count, 1) 65 | let attachment = Array(article.attachments!).first! 66 | XCTAssertNotNil(attachment.mimeType) 67 | XCTAssertNotNil(attachment.sizeInBytes) 68 | XCTAssert(attachment.url.contains("cloudfront")) 69 | XCTAssertGreaterThanOrEqual(attachment.sizeInBytes!, 22275279) 70 | XCTAssertEqual(attachment.mimeType, "audio/mpeg") 71 | } 72 | } 73 | 74 | func testTheOmniShowUniqueIDs() { 75 | 76 | let d = parserData("theomnishow", "rss", "https://theomnishow.omnigroup.com/") 77 | let parsedFeed = try! FeedParser.parse(d)! 78 | 79 | for article in parsedFeed.items { 80 | XCTAssertNotNil(article.uniqueID) 81 | XCTAssertTrue(article.uniqueID.hasPrefix("https://theomnishow.omnigroup.com/episode/")) 82 | } 83 | } 84 | 85 | func testMacworldUniqueIDs() { 86 | 87 | // Macworld’s feed doesn’t have guids, so they should be calculated unique IDs. 88 | 89 | let d = parserData("macworld", "rss", "https://www.macworld.com/") 90 | let parsedFeed = try! FeedParser.parse(d)! 91 | 92 | for article in parsedFeed.items { 93 | XCTAssertNotNil(article.uniqueID) 94 | XCTAssertEqual(article.uniqueID.count, 32) // calculated unique IDs are MD5 hashes 95 | } 96 | } 97 | 98 | func testMacworldAuthors() { 99 | 100 | // Macworld uses names instead of email addresses (despite the RSS spec saying they should be email addresses). 101 | 102 | let d = parserData("macworld", "rss", "https://www.macworld.com/") 103 | let parsedFeed = try! FeedParser.parse(d)! 104 | 105 | for article in parsedFeed.items { 106 | 107 | let author = article.authors!.first! 108 | XCTAssertNil(author.emailAddress) 109 | XCTAssertNil(author.url) 110 | XCTAssertNotNil(author.name) 111 | } 112 | } 113 | 114 | func testMonkeyDomGuids() { 115 | 116 | // https://coding.monkeydom.de/posts.rss has a bug in the feed (at this writing): 117 | // It has guids that are supposed to be permalinks, per the spec — 118 | // except that they’re not actually permalinks. The RSS parser should 119 | // detect this situation, and every article in the feed should have a permalink. 120 | 121 | let d = parserData("monkeydom", "rss", "https://coding.monkeydom.de/") 122 | let parsedFeed = try! FeedParser.parse(d)! 123 | 124 | for article in parsedFeed.items { 125 | XCTAssertNil(article.url) 126 | XCTAssertNotNil(article.uniqueID) 127 | } 128 | } 129 | 130 | func testEmptyContentEncoded() { 131 | // The ATP feed (at the time of this writing) has some empty content:encoded elements. The parser should ignore those. 132 | // https://github.com/brentsimmons/NetNewsWire/issues/529 133 | 134 | let d = parserData("atp", "rss", "http://atp.fm/") 135 | let parsedFeed = try! FeedParser.parse(d)! 136 | 137 | for article in parsedFeed.items { 138 | XCTAssertNotNil(article.contentHTML) 139 | } 140 | } 141 | 142 | func testFeedKnownToHaveGuidsThatArentPermalinks() { 143 | let d = parserData("livemint", "xml", "https://www.livemint.com/rss/news") 144 | let parsedFeed = try! FeedParser.parse(d)! 145 | for article in parsedFeed.items { 146 | XCTAssertNil(article.url) 147 | } 148 | } 149 | 150 | func testAuthorsWithTitlesInside() { 151 | // This feed uses atom authors, and we don’t want author/title to be used as item/title. 152 | // https://github.com/brentsimmons/NetNewsWire/issues/943 153 | let d = parserData("cloudblog", "rss", "https://cloudblog.withgoogle.com/") 154 | let parsedFeed = try! FeedParser.parse(d)! 155 | for article in parsedFeed.items { 156 | XCTAssertNotEqual(article.title, "Product Manager, Office of the CTO") 157 | XCTAssertNotEqual(article.title, "Developer Programs Engineer") 158 | XCTAssertNotEqual(article.title, "Product Director") 159 | } 160 | } 161 | 162 | func testTitlesWithInvalidFeedWithImageStructures() { 163 | // This invalid feed has elements inside s. 164 | // 17 Jan 2021 bug report — we’re not parsing titles in this feed. 165 | let d = parserData("aktuality", "rss", "https://www.aktuality.sk/") 166 | let parsedFeed = try! FeedParser.parse(d)! 167 | for article in parsedFeed.items { 168 | XCTAssertNotNil(article.title) 169 | } 170 | } 171 | 172 | func testFeedTitleWithTextInput() { 173 | // This feed has a element in the . This is valid, but uncommon. 174 | // Previously, this incorrectly caused the parser to set the feed's title to the textInput's . 175 | let d = parserData("rubenerd", "rss", "https://rubenerd.com/") 176 | let parsedFeed = try! FeedParser.parse(d)! 177 | XCTAssertEqual(parsedFeed.title, "Rubenerd") 178 | } 179 | 180 | func testFeedLanguage() { 181 | let d = parserData("manton", "rss", "http://manton.org/") 182 | let parsedFeed = try! FeedParser.parse(d)! 183 | XCTAssertEqual(parsedFeed.language, "en-US") 184 | } 185 | 186 | // func testFeedWithGB2312Encoding() { 187 | // // This feed has an encoding we don’t run into very often. 188 | // // https://github.com/Ranchero-Software/NetNewsWire/issues/1477 189 | // let d = parserData("kc0011", "rss", "http://kc0011.net/") 190 | // let parsedFeed = try! FeedParser.parse(d)! 191 | // XCTAssert(parsedFeed.items.count > 0) 192 | // for article in parsedFeed.items { 193 | // XCTAssertNotNil(article.contentHTML) 194 | // } 195 | // } 196 | } 197 | -------------------------------------------------------------------------------- /Tests/RSParserTests/Resources/EMarley.rss: -------------------------------------------------------------------------------- 1 | <?xml version="1.0" encoding="UTF-8"?><rss xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:atom="http://www.w3.org/2005/Atom" version="2.0" xmlns:cc="http://cyber.law.harvard.edu/rss/creativeCommonsRssModule.html"> 2 | <channel> 3 | <title><![CDATA[Stories by Liz Marley on Medium]]> 4 | 5 | https://medium.com/@emarley?source=rss-b4981c59ffa5------2 6 | 7 | https://d262ilb51hltx0.cloudfront.net/fit/c/150/150/0*I9s5OlzJw_En0NzC.jpg 8 | Stories by Liz Marley on Medium 9 | https://medium.com/@emarley?source=rss-b4981c59ffa5------2 10 | 11 | Medium 12 | Sun, 28 Aug 2016 17:27:51 GMT 13 | 14 | 15 | 16 | 17 | <![CDATA[UI Automation & screenshots]]> 18 |

Here’s a partial collection of links from my talk today…

]]> 19 | https://medium.com/@emarley/ui-automation-screenshots-c44a41af38d1?source=rss-b4981c59ffa5------2 20 | https://medium.com/p/c44a41af38d1 21 | 22 | Sat, 07 May 2016 23:53:30 GMT 23 | 24 | 25 | <![CDATA[They didn’t.]]> 26 |

“The [software developer tool] team clearly doesn’t use [that tool] themselves.”

]]>
27 | https://medium.com/@emarley/they-didn-t-3a4dab489f45?source=rss-b4981c59ffa5------2 28 | https://medium.com/p/3a4dab489f45 29 | 30 | Sat, 09 Jan 2016 15:29:25 GMT 31 |
32 | 33 | <![CDATA[Side quest: Drawing]]> 34 |

]]>
35 | https://medium.com/@emarley/side-quest-drawing-b959ded1a1a4?source=rss-b4981c59ffa5------2 36 | https://medium.com/p/b959ded1a1a4 37 | 38 | Wed, 09 Dec 2015 03:37:35 GMT 39 |
40 | 41 | <![CDATA[And if I somehow lose the iPad Pro, I can find that with Find My iPhone.]]> 42 | ]]> 43 | https://medium.com/@emarley/and-if-i-somehow-lose-the-ipad-pro-i-can-find-that-with-find-my-iphone-e9aa43486521?source=rss-b4981c59ffa5------2 44 | https://medium.com/p/e9aa43486521 45 | 46 | Mon, 23 Nov 2015 19:38:20 GMT 47 | 48 | 49 | <![CDATA[Though not as much more weight as you might expect.]]> 50 | ]]> 51 | https://medium.com/@emarley/though-not-as-much-more-weight-as-you-might-expect-7b33fe989f6e?source=rss-b4981c59ffa5------2 52 | https://medium.com/p/7b33fe989f6e 53 | 54 | Mon, 23 Nov 2015 19:37:38 GMT 55 | 56 | 57 | <![CDATA[I avoided art classes in high school and college because I was afraid they would hurt my GPA.]]> 58 | ]]> 59 | https://medium.com/@emarley/i-avoided-art-classes-in-high-school-and-college-because-i-was-afraid-they-would-hurt-my-gpa-ab916601f2ad?source=rss-b4981c59ffa5------2 60 | https://medium.com/p/ab916601f2ad 61 | 62 | Mon, 23 Nov 2015 19:37:13 GMT 63 | 64 | 65 | <![CDATA[Finding Value]]> 66 |

I lose things a lot. Sometimes they’re just misplaced, sometimes gone forever. I don’t know if I have ever run out of ink in a pen—there’s…

]]>
67 | https://medium.com/@emarley/finding-value-20a90bf5ebf?source=rss-b4981c59ffa5------2 68 | https://medium.com/p/20a90bf5ebf 69 | 70 | Mon, 23 Nov 2015 19:34:18 GMT 71 |
72 | 73 | <![CDATA[Replaying this post in my head last night, I regret this word.]]> 74 |

Keyboard shortcuts, and other little details may be programmatically simple to set up, but they are still an important part of an app’s…

]]>
75 | https://medium.com/@emarley/replaying-this-post-in-my-head-last-night-i-regret-this-word-d8ed0b43f0f9?source=rss-b4981c59ffa5------2 76 | https://medium.com/p/d8ed0b43f0f9 77 | 78 | Tue, 10 Nov 2015 18:08:19 GMT 79 |
80 | 81 | <![CDATA[Betterment]]> 82 |

I moved from Senior Test Pilot to Software Engineer last month.

]]>
83 | https://medium.com/@emarley/betterment-e0ef45fcd284?source=rss-b4981c59ffa5------2 84 | https://medium.com/p/e0ef45fcd284 85 | 86 | Tue, 10 Nov 2015 02:17:46 GMT 87 |
88 | 89 | <![CDATA[This is a test.]]> 90 |

This is only a test.

]]>
91 | https://medium.com/@emarley/this-is-a-test-6ab141a1c5b5?source=rss-b4981c59ffa5------2 92 | https://medium.com/p/6ab141a1c5b5 93 | 94 | Sun, 20 Sep 2015 07:00:44 GMT 95 |
96 | 97 | -------------------------------------------------------------------------------- /Tests/RSParserTests/Resources/allthis-partial.json: -------------------------------------------------------------------------------- 1 | {"description": "I just said what I said and it was wrong. Or was taken wrong.", "feed_url": "http://leancrew.com/all-this/feed.json", "title": "And now it’s all this", "items": [{"title": "Last thoughts on modifier keys", "url": "http://leancrew.com/all-this/2017/11/last-thoughts-on-modifier-keys/", "author": {"name": "Dr. Drang"}, "summary": "The first shall be last.", "date_published": "2017-11-23T21:08:29+00:00", "id": "http://leancrew.com/all-this/2017/11/last-thoughts-on-modifier-keys/", "content_html": "

When I wrote the post about ordering Mac modifier keys a few days ago, I was thinking primarily about the proper order of the symbols when writing about a keyboard shortcut, like ⌃⌥⌘P.1. I mentioned parenthetically that this order isn’t always observed when people speak about keyboard shortcuts or when they write the names of the keys out fully, as in “Command-Shift-3 takes a screenshot.”

\n

Jason Snell, in both a post at Six Colors and in conversation with John Siracusa on the lastest episode of Upgrade, took a stand against Apple’s ordering:2

\n
\n

Command is the commander! Command is the monarch of all keys! Command always comes first, in my book.

\n
\n

Siracusa agreed, and so do I. The ⌘ key is, and has always been, the key that signals a keyboard shortcut. While other modifier keys are sometimes used without ⌘—in cursor control and text selection, for example—I can’t think of any Apple applications that don’t use ⌘ to signal a keyboard shortcut for a menu item. And that primacy in shortcuts to menu items is, I think, why Apple puts it last rather than first.

\n

Keyboard shortcuts are always presented right-justified along the right edge of the menu. The most common shortcuts are just ⌘ and a letter, like ⌘N to start a new document, for example. It’s typically the variations on the basic command that get additional modifier keys, like ⌥⌘N to start a new project. If that were presented in a menu as ⌘⌥N, the menu would look wrong because the ⌘ symbols wouldn’t line up.

\n

Here’s the File menu in Safari:

\n

\"Safari

\n

There are two different New commands and three different Close commands. This, in Apple’s opinion (and mine), wouldn’t be right:

\n

\"Altered

\n

It’s not just having the ⌘ symbols aligned. The additional modifier symbols go in front because ⌘ is king and must sit next to the N or the W. The importance of the modifier decreases as you move away from the letter.

\n

It should go without saying—but I’ll say it anyway—that the letter (or number or whatever) key is the most important because nothing happens until it’s pressed.

\n

Having said all this, and despite agreeing with Apple’s symbol ordering, my ear for shortcut ordering works just like Jason’s and John’s. The main reason I use keyboard shortcut symbols in my posts instead of words is that I can read ⌥⇧⌘W and not be bothered because I don’t “hear” it as I read the symbols. “Option-Shift-Command-W,” on the other hand, gets sounded out in my head, and it sounds wrong.

\n

I suspect that’s why Apple’s own documentation sometimes gets the order wrong when the modifiers get written out as words. In speaking out the keys, “Command” is natural to put first because it announces that what’s coming is a keyboard shortcut.

\n
\n
\n
    \n
  1. \n

    Which happens to be the shortcut I use for previewing a blog post locally before publishing it. ↩︎

    \n
  2. \n
  3. \n

    In the original post, I said I didn’t know where the order was documented. A few people pointed me to both the Human Interface Guidelines and the Style Guide, where Apple gives the proper order explicitly. ↩︎

    \n
  4. \n
\n

\n

[If the formatting looks odd in your feed reader, visit the original article]

"}, {"title": "My next Mac", "url": "http://leancrew.com/all-this/2017/11/my-next-mac/", "author": {"name": "Dr. Drang"}, "summary": "Apple isn't making it easy to choose a Mac and hasn't for a few years.", "date_published": "2017-11-22T22:04:57+00:00", "id": "http://leancrew.com/all-this/2017/11/my-next-mac/", "content_html": "

Will probably be an iMac. I guess that spoils the suspense, doesn’t it?

\n

My iMac at work is the 27″ Late 2012 model, the one that came out one step before Retina came to the iMac. I don’t regret buying it, as my previous iMac (a 2006 model, I think) was absolutely on its last legs—constantly swapping to hard disk and running hot. I hadn’t meant to wait so long to replace it, but there was a long delay before that 2012 model came out, and I didn’t want to buy something that would be last year’s model almost as soon as I set it up.

\n

My home Mac is the venerable 2010 13″ MacBook Air, the first good Air. In the normal course of things, this would be the Mac I replace next, and I’ve been expecting to do so for a few years now. but…

\n

But Apple never came out with a Retina MacBook Air, choosing instead to go with the MacBook, which I find a little too far on the portable side of the portability/power spectrum. A couple of years ago, I had a crisis when my Air crapped out on me. It seemed wrong to put money into a five-year-old machine, but I wasn’t enthused about any of the MacBooks in the lineup at the time. I didn’t know the just-released 2015 MacBook Pro would turn out to be the best laptop ever made, I didn’t want to spend MacBook Pro money on my home/travel machine.

\n

The $280 logic board upgrade turned out to be a good investment, as I’m now 2½ years into my rejuvenated Air. Yes, it takes a while to wake up when I open the lid. Yes, its 128 GB SSD is tiny. No, it can’t take advantage of many of the iOS integration features that newer Macs can. And no, I don’t think it’s a good idea to install High Sierra on it. But it’s given me 30 months of faithful use, much more than I expected at the time.

\n

The announcement of the Touch Bar last year made me certain I’d be getting a MacBook Pro with it. A software-configurable set of controls seemed perfect for someone who’s always ginning up little scripts. But no one seems to like it, possibly because its configurability isn’t especially open to users. Bummer.

\n

I’ve delayed the decision on my home Mac for such a long time that now my office Mac is long in the tooth, too. Still working fine for most tasks, but just a Core 2 Duo machine that often makes me wait to scroll through long PDFs of scanned engineering and architectural drawings, something I need to do at work quite often. And no Retina.

\n

So it looks like my best bet is to buy a new iMac for work and bring my current office iMac home. This will put the power where I need it the most and will give me extra ooomph here at home. Especially with disk space (3 TB vs. 128 GB) and RAM (24GB vs. 4GB).

\n

It will be weird, though, as I haven’t had a desktop computer here at home in a dozen years. Will I enjoy being tethered to one spot in the house? And what about a travel computer?

\n

Both of these questions are made less pressing by the device I’m typing this on: a 9.7″ iPad Pro. While I agree with Gabe that it is by no means a Mac substitute, it can handle a lot of what I do at home and virtually everything I need to do on the road.1

\n

As for which iMac, I think I’ll settle on a middle-of-the-road 27″ configuration with a 3TB Fusion drive. Sort of the 2017 of what I bought in 2012.

\n
\n
\n
    \n
  1. \n

    I bought the iPad Pro last year as a sort of experiment to find out how comfortable I’d be working on it. I intend to write a full post about the results of that experiment soon, but in the meantime, you really should read Gabe’s post over at Macdrifter. I’ll probably use his post as a jumping-off point. And there may be a quiz. ↩︎

    \n
  2. \n
\n

\n

[If the formatting looks odd in your feed reader, visit the original article]

"}, {"title": "Modifier key order", "url": "http://leancrew.com/all-this/2017/11/modifier-key-order/", "author": {"name": "Dr. Drang"}, "summary": "Writing about Mac keyboard shortcuts? Make sure you put them in canonical order.", "date_published": "2017-11-20T02:22:59+00:00", "id": "http://leancrew.com/all-this/2017/11/modifier-key-order/", "content_html": "

If you write about Mac keyboard shortcuts, as I did yesterday, you should know how to do it right. Just as there’s a proper order for adjectives in English, there’s a proper order for listing the modifier keys in a shortcut.

\n

I haven’t found any documentation for this, but Apple’s preferred order is clear in how they show the modifiers in menus and how they’re displayed in the Keyboard Shortcuts Setting.

\n

\"Canonical

\n

The order is similar to how you see them down at the bottom left of your keyboard.

\n

\"Modifier

\n

Control (⌃), Option (⌥), and Command (⌘) always go in that order. The oddball is the Shift(⇧) key, which sneaks in just in front of Command.

\n

Keyboard Maestro recognizes this standard order and presents its “hot key” shortcut the same way.

\n

\"Keyboard

\n

(When people speak about keyboard shortcuts, it’s not uncommon to put Command first, e.g., “Command-Shift-3 takes a screenshot.” I’ve seen it written out that way, too. Apple is usually pretty careful to use the same order when using words as when using symbols. This page, for example, uses “Shift-Command-3,” to match the ⇧⌘3 you’d see in the Keyboard Shortcut Setti 2 | -------------------------------------------------------------------------------- /Tests/RSParserTests/Resources/authors.json: -------------------------------------------------------------------------------- 1 | { 2 | "version": "https://jsonfeed.org/version/1.1", 3 | "title": "Author test feed", 4 | "authors": [{ "name": "Root Author 1"}, { "name": "Root Author 2" }], 5 | "author": { "name": "Legacy Root Author" }, 6 | "items": [ 7 | { 8 | "id": "Item without authors", 9 | "content_html": "" 10 | }, 11 | { 12 | "id": "Item with legacy author", 13 | "author": { "name": "Legacy Item Author" }, 14 | "content_html": "" 15 | }, 16 | { 17 | "id": "Item with modern authors", 18 | "authors": [{ "name": "Item Author 1" }, { "name": "Item Author 2" }], 19 | "content_html": "" 20 | }, 21 | { 22 | "id": "Item with both", 23 | "authors": [{ "name": "Item Author 1" }, { "name": "Item Author 2" }], 24 | "author": { "name": "Legacy Item Author" }, 25 | "content_html": "" 26 | } 27 | ] 28 | } 29 | -------------------------------------------------------------------------------- /Tests/RSParserTests/Resources/donthitsave.xml: -------------------------------------------------------------------------------- 1 |  Don't Hit Save https://donthitsave.com The webcomic that dares to take on the gritty world of software, technology, and indie game development (by Jeff Lofvers). en-us Skipping Around Fri, 24 May 2019 00:00:00 -0700 https://donthitsave.com/comic/2019/05/24/skipping-around

I was nearly murdered by a coconut today.

2 | 3 |

For real.

4 | 5 |

This happened as I took the dogs on their morning poop stroll (I can no longer speak or spell the word “walk” in my home). I wandered up and down the sidewalk, on some grass, and headed back home.

6 | 7 |

Near the end of the walk, I stopped to fiddle with my phone. I was listening to a podcast, and needed to skip the commercials. I stood in place for 30, maybe 40 seconds. When I was done, I took two steps... [read more]

]]> Budgets Fri, 17 May 2019 00:00:00 -0700 https://donthitsave.com/comic/2019/05/17/budgets

It's time I woke up before dawn again.

8 | 9 | 10 |

Voluntarily.

11 | 12 | 13 |

It not easy to beat the sun at its own game. Most of my social and computer life has been dominated by a late nights. I work during the day because I have to, then stay up as late as I want because I'm a grown up. Every once in a while, however, I will mix things up:

14 | 15 | 16 |
    17 |
  • A decade ago, I woke up every morning at 4:30 in order to go running. I did this for over two years. This lasted until I... [read more]

    ]]> Trade-In Fri, 10 May 2019 00:00:00 -0700 https://donthitsave.com/comic/2019/05/10/trade-in

    I bought a hammer this week. My first Hawaiian hammer.

    18 | 19 |

    I will use it not to hang something, not to bang something, but to remove a nail.

    20 | 21 |

    I hate that nail.

    22 | 23 |

    Today is a good day.

    24 | ]]>
    The Screwup Fri, 03 May 2019 06:00:00 -0700 https://donthitsave.com/comic/2019/05/03/the-screwup

    How's your night vision?

    25 | 26 | 27 |

    When I was little, mine was excellent. I could spot animals while walking at night, read in low light conditions, and effectively navigate the house based on the light of digital clocks. It only got better with time.

    28 | 29 | 30 |

    This wasn't a super power. I was adjusting to my surroundings.

    31 | 32 | 33 |

    I spent my younger years in upstate NY. Winter and cloudy weather seemed to last 9 months, which meant that I got used to gloomy conditions. Our house... [read more]

    ]]>
    Focus On The Task At Hand With A Little Help From Google Fri, 26 Apr 2019 00:00:00 -0700 https://donthitsave.com/comic/2019/04/26/ads-for-google

    Game of Thrones is back, Avengers are back, and the Tick Season 2 is out. It is a good month for amusing ourselves.

    34 | 35 |

    So why are you spending your time here?

    36 | 37 |

    Oh I know, you need some sort of control sample. What’s the point of having compelling and enjoyable entertainment if you don’t have something dull to compare it to? I see. It all makes sense now.

    38 | 39 |

    Wow. I can’t believe you think this comic dull. Nice, reader, reeeeeaaaal nice. I’ll remember... [read more]

    ]]>
    Clever Variable Name Fri, 19 Apr 2019 00:00:00 -0700 https://donthitsave.com/comic/2019/04/19/clever-variable-name

    Apologies for the extraordinarily late comic.

    40 | 41 |

    This is the first one ever delayed at length by my news. Here's why:

    42 | 43 |

    I wrote the comic on Thursday night. When I went to write the news in the morning, I decided to focus on the adorable habits of my two dogs. One of the monsters was acting weird. Comically weird.

    44 | 45 |

    I decided to write the news about that. I found it quite funny. I wrote a page and a half about how my dog was suddenly scared of everything. She ran from... [read more]

    ]]>
    Wrapping Up Client Work Fri, 12 Apr 2019 00:00:00 -0700 https://donthitsave.com/comic/2019/04/12/finishing-up-client-work

    I made Tofacos for dinner-- tofu tacos that are absolutely abhorrent. I love the food here in Hawaii, but when breakfast, lunch, and dinner consists of fish and noodles, things tend to get weird when I deviate.

    46 | 47 |

    I left about as big a mess as you can imagine while stirring the “food”. I’m not a great cook, I’m an adequate one. My system involves hopping from one nearly burning pile of food to another, narrowly staying ahead of a charred, blackened meal.

    48 | 49 |

    I’m... [read more]

    ]]>
    Personal Journey Fri, 05 Apr 2019 00:00:00 -0700 https://donthitsave.com/comic/2019/04/05/personal-journey

    To prove that I can.

    ]]>
    Reward Fri, 29 Mar 2019 08:00:00 -0700 https://donthitsave.com/comic/2019/03/29/reward

    Today’s comic is dedicated to my very sick team lead. Too bad you’re too sick to check and see if today’s comic is true.

    50 | 51 |

    For everyone else out there, welcome to the last 3 days you have to enter my sweepstakes thingy. If you want me to draw you, the deadline is Sunday, March 31st. Get to it!

    52 | 53 |

    If you’re reading after that date, or if you don’t care about my sweepstakes, I have only one... [read more]

    ]]>
    Costs Fri, 22 Mar 2019 00:00:00 -0700 https://donthitsave.com/comic/2019/03/22/costs

    It's 2am here. What time is it there? I'll post some news in some time.

    54 | 55 |

    56 | 57 |

    Okay, I’m back… at 2am. A different 2am. Let’s make this quick:

    58 | 59 |

    For those checking later, or too busy to google, here is an article about Facebook’s latest mishap, as mentioned in today’s comic.

    60 | 61 |

    In personal news, I've been thorough enjoying the new... [read more]

    ]]>
    -------------------------------------------------------------------------------- /Tests/RSParserTests/Resources/jsonfeed-extension.json: -------------------------------------------------------------------------------- 1 | { 2 | "version": "https://jsonfeed.org/version/1.1", 3 | "title": "JSON Extension test feed", 4 | "_contoso": { 5 | "about": "Contoso JSON feed extension. There is actually no such extension in real life. This file just tests if the JSONFeed parser can parse a feed with JSON Feed extensions.", 6 | "someNumber": 42, 7 | "someBool": true, 8 | "someString": "Hello Contoso" 9 | }, 10 | "items": [ 11 | { 12 | "id": "2", 13 | "content_html": "

    Second item.

    ", 14 | "date_published": "2021-03-05T13:04:04+02:00", 15 | "_contoso": { 16 | "someKey": "SomeValue2", 17 | "someBoolKey": true, 18 | "anotherKey": "AnotherValue2" 19 | }, 20 | "attachments": [ 21 | { 22 | "url": "https://example.com/SomeFile2.app.zip", 23 | "mime_type": "application/zip" 24 | } 25 | ] 26 | }, 27 | { 28 | "id": "1", 29 | "content_html": "

    First item.

    ", 30 | "date_published": "2021-02-26T15:04:04+02:00", 31 | "_contoso": { 32 | "someKey": "SomeValue1", 33 | "someBoolKey": false, 34 | "someIntKey": 43, 35 | "anotherKey": "AnotherValue1" 36 | }, 37 | "attachments": [ 38 | { 39 | "url": "https://example.com/SomeFile1.app.zip", 40 | "mime_type": "application/zip" 41 | } 42 | ] 43 | }, 44 | { 45 | "id": "0", 46 | "content_html": "

    Third item. Without an extension.

    ", 47 | "date_published": "2021-02-25T15:04:04+02:00" 48 | } 49 | ] 50 | } 51 | -------------------------------------------------------------------------------- /Tests/RSParserTests/Resources/kc0011.rss: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ranchero-Software/RSParser/1e73a4694fa5972b92e3d0012302e77770c32915/Tests/RSParserTests/Resources/kc0011.rss --------------------------------------------------------------------------------