├── .swift-version ├── .travis.yml ├── .gitignore ├── HNScraper.xcodeproj ├── project.xcworkspace │ ├── contents.xcworkspacedata │ └── xcshareddata │ │ └── IDEWorkspaceChecks.plist └── project.pbxproj ├── HNScraper ├── Helpers │ ├── Scanner+ScanBetweenString.swift │ └── RessourceFetcher.swift ├── Info.plist ├── HNParseConfig.swift ├── HNUser.swift ├── HNPost.swift ├── HNComment.swift ├── HNLogin.swift └── HNScraper.swift ├── HNScraper.podspec ├── HNScraperTests ├── Info.plist ├── HNPostTest.swift ├── HNLoginTest.swift ├── RessourceFetcherTest.swift └── HNScraperTest.swift ├── LICENSE ├── hn.json └── README.md /.swift-version: -------------------------------------------------------------------------------- 1 | 4.0 2 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: objective-c 2 | osx_image: xcode9.4 3 | script: 4 | - xcodebuild -project 'HNScraper.xcodeproj' -scheme 'HNScraper' -sdk iphonesimulator build -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | ## mac os 2 | .DS_Store 3 | 4 | ## User settings 5 | xcuserdata/ 6 | 7 | ## compatibility with Xcode 8 and earlier (ignoring not required starting Xcode 9) 8 | *.xcscmblueprint 9 | *.xccheckout -------------------------------------------------------------------------------- /HNScraper.xcodeproj/project.xcworkspace/contents.xcworkspacedata: -------------------------------------------------------------------------------- 1 | 2 | 4 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /HNScraper.xcodeproj/project.xcworkspace/xcshareddata/IDEWorkspaceChecks.plist: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | IDEDidComputeMac32BitWarning 6 | 7 | 8 | 9 | -------------------------------------------------------------------------------- /HNScraper/Helpers/Scanner+ScanBetweenString.swift: -------------------------------------------------------------------------------- 1 | // 2 | // Scanner+ScanBetweenString.swift 3 | // HNScraper 4 | // 5 | // Created by Stéphane Sercu on 29/09/17. 6 | // Copyright © 2017 Stéphane Sercu. All rights reserved. 7 | // 8 | 9 | import Foundation 10 | extension Scanner { 11 | func scanBetweenString(stringA: String, stringB: String, into: AutoreleasingUnsafeMutablePointer?) { 12 | var trash: NSString? = "" 13 | self.scanUpTo(stringA, into: &trash) 14 | self.scanString(stringA, into: &trash) 15 | self.scanUpTo(stringB, into: into) 16 | } 17 | } 18 | -------------------------------------------------------------------------------- /HNScraper.podspec: -------------------------------------------------------------------------------- 1 | Pod::Spec.new do |s| 2 | s.name = 'HNScraper' 3 | s.version = '0.2.2' 4 | s.summary = 'Scraper for hackernews written in swift' 5 | 6 | s.description = <<-DESC 7 | Scraper for hackernews written in swift. Supports grabbing posts, comments & user data as well as logging in, voting and favouriting items. 8 | DESC 9 | 10 | s.homepage = 'https://github.com/tsucres/HNScraper' 11 | s.license = { :type => 'MIT', :file => 'LICENSE' } 12 | s.author = { 'Stéphane Sercu' => 'stefsercu@gmail.com' } 13 | s.source = { :git => 'https://github.com/tsucres/HNScraper.git', :tag => s.version.to_s } 14 | 15 | s.ios.deployment_target = '9.0' 16 | s.source_files = 'HNScraper/**/*.swift' 17 | 18 | end -------------------------------------------------------------------------------- /HNScraperTests/Info.plist: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | CFBundleDevelopmentRegion 6 | $(DEVELOPMENT_LANGUAGE) 7 | CFBundleExecutable 8 | $(EXECUTABLE_NAME) 9 | CFBundleIdentifier 10 | $(PRODUCT_BUNDLE_IDENTIFIER) 11 | CFBundleInfoDictionaryVersion 12 | 6.0 13 | CFBundleName 14 | $(PRODUCT_NAME) 15 | CFBundlePackageType 16 | BNDL 17 | CFBundleShortVersionString 18 | 1.0 19 | CFBundleVersion 20 | 1 21 | 22 | 23 | -------------------------------------------------------------------------------- /HNScraper/Info.plist: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | CFBundleDevelopmentRegion 6 | $(DEVELOPMENT_LANGUAGE) 7 | CFBundleExecutable 8 | $(EXECUTABLE_NAME) 9 | CFBundleIdentifier 10 | $(PRODUCT_BUNDLE_IDENTIFIER) 11 | CFBundleInfoDictionaryVersion 12 | 6.0 13 | CFBundleName 14 | $(PRODUCT_NAME) 15 | CFBundlePackageType 16 | FMWK 17 | CFBundleShortVersionString 18 | 0.2.2 19 | CFBundleVersion 20 | $(CURRENT_PROJECT_VERSION) 21 | NSPrincipalClass 22 | 23 | 24 | 25 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (C) 2017-2018 by Stéphane Sercu 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy 4 | of this software and associated documentation files (the "Software"), to deal 5 | in the Software without restriction, including without limitation the rights 6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 | copies of the Software, and to permit persons to whom the Software is 8 | furnished to do so, subject to the following conditions: 9 | 10 | The above copyright notice and this permission notice shall be included in 11 | all copies or substantial portions of the Software. 12 | 13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 | THE SOFTWARE. -------------------------------------------------------------------------------- /HNScraperTests/HNPostTest.swift: -------------------------------------------------------------------------------- 1 | // 2 | // HNPostTest.swift 3 | // HNScraperTests 4 | // 5 | // Created by Stéphane Sercu on 29/09/17. 6 | // Copyright © 2017 Stéphane Sercu. All rights reserved. 7 | // 8 | 9 | import XCTest 10 | @testable import HNScraper 11 | 12 | class HNPostTest: XCTestCase { 13 | 14 | 15 | func testDefaultPostParsing() { 16 | // TODO: This tests nothing about upvoteURL, points, 17 | let exp = expectation(description: "All important field of HNPost are correctly parsed") 18 | HNScraper.shared.getPost(ById: "15364896", buildHierarchy: false) { (post, comments, error) in 19 | XCTAssertNil(error) 20 | XCTAssertNotNil(post) 21 | XCTAssertEqual(post?.title, "Cloudflare Workers: Run JavaScript Service Workers at the Edge") 22 | XCTAssertEqual(post?.username, "thomseddon") 23 | XCTAssertEqual(post?.urlDomain, "blog.cloudflare.com") 24 | XCTAssertGreaterThan(post!.points, 0) 25 | XCTAssertGreaterThan(post!.commentCount, 0) 26 | XCTAssertEqual(post?.commentCount, comments.count) 27 | XCTAssertEqual(post?.type, .defaultType) 28 | XCTAssertEqual(post?.url, URL(string:"https://blog.cloudflare.com/introducing-cloudflare-workers/")) 29 | exp.fulfill() 30 | } 31 | wait(for: [exp], timeout: HNScraperTest.defaultTimeOut) 32 | } 33 | 34 | func testAskPostParsing() { 35 | let exp = expectation(description: "All important field of HNPost are correctly parsed") 36 | HNScraper.shared.getPost(ById: "15361048", buildHierarchy: false) { (post, comments, error) in 37 | XCTAssertNil(error) 38 | XCTAssertNotNil(post) 39 | XCTAssertEqual(post?.title, "Ask HN: Library recommendations for a Client / Server project (all Java)") 40 | XCTAssertEqual(post?.username, "HockeyPlayer") 41 | XCTAssertEqual(post?.urlDomain, "news.ycombinator.com") 42 | XCTAssertGreaterThan(post!.points, 0) 43 | XCTAssertGreaterThan(post!.commentCount, 0) 44 | XCTAssertEqual(post?.commentCount, comments.count - 1) // The first comment is the Ask 45 | XCTAssertEqual(post?.type, .askHN) 46 | XCTAssertEqual(post?.url, URL(string:"https://news.ycombinator.com/item?id=15361048")) 47 | exp.fulfill() 48 | } 49 | wait(for: [exp], timeout: HNScraperTest.defaultTimeOut) 50 | } 51 | 52 | // TODO: test noob users... but noob users change every time 53 | 54 | func testJobPostParsing() { 55 | 56 | } 57 | 58 | func testShowPostParsing() { 59 | 60 | } 61 | 62 | } 63 | -------------------------------------------------------------------------------- /HNScraperTests/HNLoginTest.swift: -------------------------------------------------------------------------------- 1 | // 2 | // HNLoginTest.swift 3 | // HNScraper 4 | // 5 | // Created by Stéphane Sercu on 25/09/17. 6 | // Copyright © 2017 Stéphane Sercu. All rights reserved. 7 | // 8 | 9 | import XCTest 10 | @testable import HNScraper 11 | 12 | // 13 | // 14 | class HNLoginTest: XCTestCase { 15 | 16 | override func setUp() { 17 | super.setUp() 18 | // Put setup code here. This method is called before the invocation of each test method in the class. 19 | cleanCookies() 20 | } 21 | 22 | override func tearDown() { 23 | // Put teardown code here. This method is called after the invocation of each test method in the class. 24 | super.tearDown() 25 | cleanCookies() 26 | 27 | } 28 | func cleanCookies() { 29 | let cookieStore = HTTPCookieStorage.shared 30 | for cookie in cookieStore.cookies ?? [] { 31 | cookieStore.deleteCookie(cookie) 32 | } 33 | HNLogin.shared.logout() 34 | } 35 | 36 | func testGoodLogin() { 37 | // This is an example of a functional test case. 38 | // Use XCTAssert and related functions to verify your tests produce the correct results. 39 | let exp = expectation(description: "Correct login with abdurhtl") 40 | HNLogin.shared.login(username: "abdurhtl", psw: "!Bullshit?Psw$", completion: {(user, cookie, error) -> Void in 41 | XCTAssertNotNil(cookie) 42 | XCTAssertEqual(user?.username, "abdurhtl") 43 | exp.fulfill() 44 | }) 45 | let exp2 = expectation(description: "Correct login with testHNScrapper") 46 | HNLogin.shared.login(username: "testHNScrapper", psw: "&$!?èé%`ç\"'-some_thing", completion: {(user, cookie, error) -> Void in 47 | XCTAssertNotNil(cookie) 48 | XCTAssertEqual(user?.username, "testHNScrapper") 49 | exp2.fulfill() 50 | }) 51 | 52 | wait(for: [exp, exp2], timeout: HNScraperTest.defaultTimeOut) 53 | } 54 | 55 | func testBadPasswordLogin() { 56 | let exp = expectation(description: "not logged in") 57 | HNLogin.shared.login(username: "who?", psw: "random", completion: {(user, cookie, error) -> Void in 58 | XCTAssertNil(user) 59 | XCTAssertNil(cookie) 60 | XCTAssertEqual(error, HNLogin.HNLoginError.badCredentials) 61 | exp.fulfill() 62 | }) 63 | wait(for: [exp], timeout: HNScraperTest.defaultTimeOut) 64 | } 65 | 66 | 67 | func testIsLoggedIn() { 68 | let exp = expectation(description: "isLoggedIn() returns true") 69 | XCTAssertFalse(HNLogin.shared.isLoggedIn()) 70 | HNLogin.shared.login(username: "abdurhtl", psw: "!Bullshit?Psw$", completion: {(user, cookie, error) -> Void in 71 | XCTAssertTrue(HNLogin.shared.isLoggedIn()) 72 | exp.fulfill() 73 | 74 | }) 75 | wait(for: [exp], timeout: HNScraperTest.defaultTimeOut) 76 | 77 | } 78 | 79 | } 80 | -------------------------------------------------------------------------------- /HNScraper/HNParseConfig.swift: -------------------------------------------------------------------------------- 1 | // 2 | // HNParseConfig.swift 3 | // HackerNews2 4 | // 5 | // Created by Stéphane Sercu on 8/09/17. 6 | // Copyright © 2017 Stéphane Sercu. All rights reserved. 7 | // 8 | 9 | import Foundation 10 | 11 | 12 | /** 13 | Manage to download, store and cache the json file 14 | used to parse the pages of the website. 15 | */ 16 | public class HNParseConfig { 17 | private let savingKey = "HNParseConfig" 18 | private let url = "https://raw.githubusercontent.com/tsucres/HNScraper/v0.2.2/hn.json" 19 | private var _config: [String: Any]? = nil 20 | private init() {} 21 | public static let shared = HNParseConfig() 22 | 23 | /// Returns the data if in cache (if not it returns 24 | /// nil, you need to call getDictionnary to fetch the file) 25 | public var data:[String: Any]? { 26 | get { 27 | if (_config == nil) { 28 | _config = cacheData 29 | } 30 | return _config 31 | } 32 | } 33 | /// Looks for the data in local storage 34 | private var cacheData: [String: Any]? { 35 | get { 36 | let defaults = UserDefaults.standard 37 | if defaults.object(forKey: self.savingKey) != nil { 38 | return (defaults.object(forKey: self.savingKey) as! [String: Any]) 39 | } else { 40 | return nil 41 | } 42 | } 43 | } 44 | /** 45 | The completion handler is called with the configration data as 46 | parameter when the json file has been fetched. It firstly 47 | checks in the clocal storage if it has already been fetched. 48 | */ 49 | internal func getDictionnary(completion: @escaping (([String: Any]?, RessourceFetcher.RessourceFetchingError?) -> Void)) { 50 | if self.data != nil { 51 | completion(self.data, nil) 52 | } else { 53 | self.downloadConfigFile(completion: completion) 54 | } 55 | } 56 | 57 | 58 | /// Downloads the configFile and store it locally. If a configFile is already saved, it's replaced. 59 | internal func downloadConfigFile(completion: @escaping (([String: Any]?, RessourceFetcher.RessourceFetchingError?) -> Void)) { 60 | RessourceFetcher.shared.getJson(url: self.url, completion: { (json, error) -> Void in 61 | if (json != nil) { 62 | let defaults = UserDefaults.standard 63 | defaults.set(json, forKey: self.savingKey) 64 | self._config = json 65 | completion(json, error) 66 | } else { 67 | completion(nil, error ?? .noData) 68 | } 69 | }) 70 | } 71 | 72 | /// Downloads the configFile and store it locally. If a configFile is already saved, it's replaced. 73 | public func forceRedownload(completion: @escaping ((HNScraper.HNScraperError?) -> Void)) { 74 | self.downloadConfigFile { (_, ressourceFetcherError) in 75 | completion(HNScraper.HNScraperError.init(ressourceFetcherError)) 76 | } 77 | } 78 | } 79 | -------------------------------------------------------------------------------- /HNScraper/HNUser.swift: -------------------------------------------------------------------------------- 1 | // 2 | // HNUser.swift 3 | // HackerNews2 4 | // 5 | // Created by Stéphane Sercu on 8/09/17. 6 | // Copyright © 2017 Stéphane Sercu. All rights reserved. 7 | // 8 | 9 | import Foundation 10 | 11 | open class HNUser { 12 | public var username: String! 13 | public var karma: Int! 14 | public var age: Date! 15 | public var aboutInfo: String? 16 | public var isNoob: Bool! = false // TODO 17 | 18 | 19 | public init(username: String, karma: Int, age: Date, aboutInfo: String? = nil, isNoob: Bool = false) { 20 | self.username = username 21 | self.age = age 22 | self.karma = karma 23 | self.aboutInfo = aboutInfo 24 | self.isNoob = isNoob 25 | } 26 | /** 27 | * - parameters: 28 | * - age: the number of days in the past relatively to current date. 29 | * 30 | */ 31 | public convenience init(username: String, karma: String, age: String, aboutInfo: String?, isNoob: Bool = false) { 32 | self.init(username: username, karma: Int(karma.replacingOccurrences(of: " ", with: "")) ?? 0, age: HNUser.dateFromFormat(date: age), aboutInfo: aboutInfo) 33 | } 34 | 35 | public convenience init?(fromHtml html: String, withParsingConfig parseConfig: [String : Any]) { 36 | var userDict: [String : Any]? = parseConfig["User"] != nil ? parseConfig["User"] as? [String: Any] : nil 37 | if (userDict == nil || userDict!["Parts"] == nil) { 38 | return nil 39 | } 40 | 41 | let scanner = Scanner(string: html) 42 | var uDict: [String: Any] = [:] 43 | var isNoob = false 44 | let parts = userDict!["Parts"] as! [[String : Any]] 45 | for dict in parts { 46 | var new: NSString? = "" 47 | let isTrash = (dict["I"] as! String) == "TRASH" 48 | let start = dict["S"] as! String 49 | let end = dict["E"] as! String 50 | if scanner.string.contains(start) && scanner.string.contains(end) { 51 | scanner.scanBetweenString(stringA: start, stringB: end, into: &new) 52 | if (!isTrash && (new?.length)! > 0) { 53 | if dict["I"] as! String == "user" { 54 | var newStr: String = String(describing: new!) 55 | isNoob = HNUser.cleanNoobUsername(username: &(newStr)) 56 | new = newStr as NSString 57 | /*if new!.contains("") { 58 | new = new!.replacingOccurrences(of: "", with: "") as NSString 59 | new = new!.replacingOccurrences(of: "", with: "") as NSString 60 | isNoob = true 61 | }*/ 62 | } 63 | uDict[dict["I"] as! String] = new 64 | } 65 | } 66 | } 67 | 68 | if uDict["user"] == nil { 69 | return nil 70 | } 71 | self.init(username: uDict["user"] as! String, karma: uDict["karma"] as? String ?? "", age: uDict["created"] as? String ?? "", aboutInfo: uDict["about"] as? String, isNoob: isNoob) 72 | 73 | } 74 | 75 | public static func cleanNoobUsername(username: inout String) -> Bool { 76 | if username.contains("") { 77 | username = username.replacingOccurrences(of: "", with: "") 78 | username = username.replacingOccurrences(of: "", with: "") 79 | return true 80 | } 81 | return false 82 | } 83 | 84 | /// Converts the number of days from current date to a Date instance. 85 | private static func dateFromNumberOfDays(_ numberOfDays: Int) -> Date { 86 | return Calendar.current.date(byAdding: .day, value: -numberOfDays, to: Date())! 87 | } 88 | 89 | private static func dateFromFormat(date: String, dateFormat: String = "yyyy-MM-dd") -> Date { 90 | let dateFormatter = DateFormatter() 91 | dateFormatter.dateFormat = dateFormat 92 | 93 | let date = dateFormatter.date(from: date) 94 | return date ?? Date() 95 | } 96 | 97 | } 98 | -------------------------------------------------------------------------------- /HNScraperTests/RessourceFetcherTest.swift: -------------------------------------------------------------------------------- 1 | // 2 | // RessourceFetcherTest.swift 3 | // HNScraperTests 4 | // 5 | // Created by Stéphane Sercu on 29/09/17. 6 | // Copyright © 2017 Stéphane Sercu. All rights reserved. 7 | // 8 | 9 | import XCTest 10 | @testable import HNScraper 11 | class RessourceFetcherTest: XCTestCase { 12 | 13 | override func setUp() { 14 | super.setUp() 15 | // Put setup code here. This method is called before the invocation of each test method in the class. 16 | } 17 | 18 | override func tearDown() { 19 | // Put teardown code here. This method is called after the invocation of each test method in the class. 20 | super.tearDown() 21 | } 22 | 23 | func testGetJson() { 24 | let exp = expectation(description: "No error & valid parsed data") 25 | RessourceFetcher.shared.getJson(url: "https://httpbin.org/headers") { (json, error) in 26 | XCTAssertNil(error) 27 | XCTAssertNotNil(json) 28 | XCTAssertNotNil(json!["headers"] as? [String: String]) 29 | exp.fulfill() 30 | } 31 | wait(for: [exp], timeout: HNScraperTest.defaultTimeOut) 32 | } 33 | func testPostRequest() { 34 | let exp = expectation(description: "Get the post data back as response in json format") 35 | let bodyData = "attr1=val1&attr2=val2".data(using: .utf8) 36 | let cookie = HTTPCookie(properties: [.value:"value", .name:"name", .domain:"httpbin.org", .path:"."]) 37 | 38 | RessourceFetcher.shared.post(urlString: "https://httpbin.org/post", data: bodyData!, cookies: [cookie!]) { (data, response, error) in 39 | XCTAssertNil(error) 40 | XCTAssertNotNil(response) 41 | XCTAssertNotNil(data) 42 | let json:[String: Any]? = try? JSONSerialization.jsonObject(with: data!, options: .mutableContainers) as! [String: Any] 43 | XCTAssertNotNil(json ?? nil) 44 | XCTAssertEqual((json!["form"] as! [String: String])["attr1"], "val1") 45 | XCTAssertEqual((json!["form"] as! [String: String])["attr2"], "val2") 46 | XCTAssertEqual(((json!["headers"] as! [String:String])["Cookie"]), "name=value") 47 | exp.fulfill() 48 | } 49 | wait(for: [exp], timeout: HNScraperTest.defaultTimeOut) 50 | 51 | } 52 | 53 | func testGetRequest() { 54 | let expWithoutCookie = expectation(description: "Get the post data back as response in json format") 55 | let expWithCookie = expectation(description: "Get the post data back as response in json format (containing a cookies field)") 56 | let cookie = HTTPCookie(properties: [.value:"value", .name:"name", .domain:"httpbin.org", .path:"."]) 57 | RessourceFetcher.shared.get(urlString: "https://httpbin.org/cookies", cookies: [cookie!]) { (data, response, error) in 58 | XCTAssertNil(error) 59 | XCTAssertNotNil(response) 60 | XCTAssertNotNil(data) 61 | let json:[String: Any]? = try? JSONSerialization.jsonObject(with: data!, options: .mutableContainers) as! [String: Any] 62 | XCTAssertNotNil(json ?? nil) 63 | XCTAssertEqual(json!["cookies"] as! [String:String], ["name":"value"]) 64 | expWithCookie.fulfill() 65 | } 66 | RessourceFetcher.shared.get(urlString: "https://httpbin.org/cookies") { (data, response, error) in 67 | XCTAssertNil(error) 68 | XCTAssertNotNil(response) 69 | XCTAssertNotNil(data) 70 | let json:[String: Any]? = try? JSONSerialization.jsonObject(with: data!, options: .mutableContainers) as! [String: Any] 71 | XCTAssertNotNil(json ?? nil) 72 | expWithoutCookie.fulfill() 73 | } 74 | wait(for: [expWithoutCookie, expWithCookie], timeout: HNScraperTest.defaultTimeOut) 75 | 76 | } 77 | 78 | func testBadGetRequest() { 79 | let exp = expectation(description: "Get a invalidURL error") 80 | RessourceFetcher.shared.get(urlString: "where?") { (data, response, error) in 81 | XCTAssertEqual(error, .invalidURL) 82 | exp.fulfill() 83 | } 84 | wait(for: [exp], timeout: HNScraperTest.defaultTimeOut) 85 | 86 | } 87 | 88 | 89 | 90 | } 91 | -------------------------------------------------------------------------------- /hn.json: -------------------------------------------------------------------------------- 1 | { 2 | "Post": { 3 | "CS": "", 4 | "Vote": { 5 | "R": "votearrow", 6 | "S": "href='", 7 | "E": "'" 8 | }, 9 | "Parts": [{ 10 | "S": "", 15 | "E": "<", 16 | "I": "Title" 17 | }, { 18 | "S": "score_", 19 | "E": "\"", 20 | "I": "TRASH" 21 | }, { 22 | "S": ">", 23 | "E": "", 24 | "I": "Points" 25 | }, { 26 | "S": "", 35 | "E": "", 39 | "E": "item?id", 40 | "I": "TRASH" 41 | }, { 42 | "S": "=", 43 | "E": "\"", 44 | "I": "TRASH" 45 | }, { 46 | "S": ">", 47 | "E": "<", 48 | "I": "Comments" 49 | }], 50 | "LinkForMore": { 51 | "S": "" 84 | }, 85 | "Upvote": { 86 | "R": "vote(event, this, \"up\")", 87 | "S": "href='", 88 | "E": "'" 89 | }, 90 | "Downvote": { 91 | "R": "dir=down", 92 | "S": "href=\"", 93 | "E": "\">" 94 | }, 95 | "Level": { 96 | "S": "height=\"1\" width=\"", 97 | "E": "\">", 98 | "I": "Level" 99 | }, 100 | "ParentPostId": { 101 | "S": "on: ", 103 | "I": "ParentPostId" 104 | }, 105 | "ASK": [{ 106 | "S": "class=\"storylink\">", 107 | "E": "", 108 | "I": "Title" 109 | }, { 110 | "S": "class=\"hnuser\">", 111 | "E": "", 112 | "I": "Username" 113 | }, { 114 | "S": "", 115 | "E": "", 119 | "E": "", 120 | "I": "Time" 121 | }, { 122 | "S": "href=\"item?id=", 123 | "E": "\"", 124 | "I": "CommentId" 125 | }, { 126 | "S": "\n ", 127 | "E": "\n \n ", 132 | "E": "", 133 | "I": "Text" 134 | }], 135 | "REG": [{ 136 | "S": "class=\"hnuser\">", 137 | "E": "", 138 | "I": "Username" 139 | }, { 140 | "S": "item?id=", 141 | "E": "\"", 142 | "I": "CommentId" 143 | }, { 144 | "S": ">", 145 | "E": "<", 146 | "I": "Time" 147 | }, { 148 | "S": "class=\"comment\">", 149 | "E": "class=\"c", 150 | "I": "TRASH" 151 | }, { 152 | "S": "\">", 153 | "E": "", 154 | "I": "Text" 155 | }, { 156 | "S": "href=\"", 157 | "E": "\"", 158 | "I": "ReplyUrl" 159 | }], 160 | "LinkForMore": { 161 | "S": "", 170 | "I": "parent" 171 | }, { 172 | "S": "name=\"hmac\" value=\"", 173 | "E": "\">", 174 | "I": "hmac" 175 | }] 176 | }, 177 | "Submit": { 178 | "Action": "r", 179 | "Parts": [{ 180 | "S": "name=\"fnid\" value=\"", 181 | "E": "\">", 182 | "I": "fnid" 183 | }], 184 | "Url": "url", 185 | "Title": "title", 186 | "Text": "text" 187 | }, 188 | "User": { 189 | "Parts": [{ 190 | "S": "class=\"hnuser\">", 191 | "E": "", 192 | "I": "user" 193 | }, { 194 | "S": "href=\"front?day=", 195 | "E": "&birth", 196 | "I": "created" 197 | }, { 198 | "S": "karma:", 199 | "E": "", 200 | "I": "karma" 201 | }, { 202 | "S": "name=\"about\">", 203 | "E": "", 204 | "I": "about" 205 | }, { 206 | "S": "about:\n", 207 | "E": "", 208 | "I": "about" 209 | }, { 210 | "S": "created:", 211 | "E": " ", 212 | "I": "age" 213 | }] 214 | }, 215 | "ParsingKeys": { 216 | "UserCreationDateFormat": "yyyy-MM-dd" 217 | } 218 | } -------------------------------------------------------------------------------- /HNScraper/HNPost.swift: -------------------------------------------------------------------------------- 1 | // 2 | // HNPost.swift 3 | // HackerNews2 4 | // 5 | // Created by Stéphane Sercu on 8/09/17. 6 | // Copyright © 2017 Stéphane Sercu. All rights reserved. 7 | // 8 | 9 | import Foundation 10 | 11 | 12 | /// Model used by the HN Scraper to store avery data about a post. 13 | open class HNPost { 14 | public enum PostType { 15 | case defaultType 16 | case askHN 17 | case jobs 18 | 19 | public init?(index: Int) { 20 | switch index { 21 | case 0: self = .defaultType 22 | case 1: self = .askHN 23 | case 2: self = .jobs 24 | default: return nil 25 | } 26 | } 27 | } 28 | 29 | public var type: PostType = .defaultType 30 | public var username: String = "" 31 | public var isOPNoob: Bool = false 32 | public var url: URL?// = URL(string: "")! 33 | public var urlDomain: String { 34 | get { 35 | if url == nil { 36 | return "" 37 | } 38 | var dom: String? = self.url!.host 39 | if dom != nil && dom!.hasPrefix("www.") { 40 | dom = String(dom!.dropFirst(4)) 41 | } 42 | return dom ?? "" 43 | } 44 | } 45 | public var title: String = "" 46 | public var points: Int = 0 47 | public var commentCount: Int = 0 48 | public var id: String = "" 49 | public var time: String = "" 50 | 51 | public var upvoted: Bool = false 52 | public var upvoteAdditionURL: String? 53 | 54 | public var favorited: Bool = false // TODO: there's no way to know from a "list page", but it could be filled from the discussion thread. 55 | 56 | public var replyAction: String? 57 | public var replyParent: String? 58 | public var replyGoto: String? 59 | public var replyHmac: String? 60 | public var replyText: String? 61 | 62 | 63 | public init() {} 64 | 65 | /** 66 | * Build the model by parsing the html of a post item on the HN website. 67 | * - parameters: 68 | * - html: the html code to parse 69 | * - parseConfig: the parameters from the json file containing all the needed parsing informations. 70 | */ 71 | public convenience init?(fromHtml html: String, withParsingConfig parseConfig: [String : Any]) { 72 | self.init() 73 | 74 | var postsConfig: [String : Any]? = (parseConfig["Post"] != nil) ? parseConfig["Post"] as? [String : Any] : nil 75 | if postsConfig == nil { 76 | return nil 77 | } 78 | 79 | if html.contains(" [dead] 0) { 104 | postDict[part["I"] as! String] = new 105 | } 106 | } 107 | 108 | 109 | // Set Values 110 | self.url = postDict["UrlString"] != nil ? URL(string: postDict["UrlString"] as! String) : nil 111 | self.title = postDict["Title"] as? String ?? "" 112 | self.points = Int(((postDict["Points"] as? String ?? "").components(separatedBy: " ")[0])) ?? 0 113 | self.username = postDict["Username"] as? String ?? "" 114 | self.isOPNoob = HNUser.cleanNoobUsername(username: &self.username) 115 | self.id = postDict["PostId"] as? String ?? "" 116 | self.time = postDict["Time"] as? String ?? "" 117 | if self.id != "" && html.contains(" 0) { 115 | cDict[dict["I"] as! String] = new 116 | } 117 | } 118 | 119 | self.id = cDict["CommentId"] as? String ?? "" 120 | self.text = cDict["Text"] as? String ?? "" 121 | self.username = cDict["Username"] as? String ?? "" 122 | self.isOPNoob = HNUser.cleanNoobUsername(username: &(self.username!)) 123 | self.created = cDict["Time"] as? String ?? "" 124 | self.replyUrl = cDict["ReplyUrl"] as? String ?? "" 125 | 126 | if self.id != "" && html.contains(" HNComment? { 134 | var cDict: [String : Any] = [:] 135 | var commentDict: [String : Any]? = parseConfig["Comment"] != nil ? parseConfig["Comment"] as? [String: Any] : nil 136 | if commentDict == nil { 137 | return nil 138 | } 139 | 140 | let scanner = Scanner(string: html) 141 | var upvoteUrl: NSString? = "" 142 | 143 | 144 | if html.contains((commentDict!["Upvote"] as! [String: String])["R"]!) { 145 | scanner.scanBetweenString(stringA: (commentDict!["Upvote"] as! [String: String])["S"]!, stringB: (commentDict!["Upvote"] as! [String: String])["E"]!, into: &upvoteUrl) 146 | if (upvoteUrl != nil) { 147 | upvoteUrl = upvoteUrl!.replacingOccurrences(of: "&", with: "&") as NSString 148 | } 149 | } 150 | let asks = commentDict!["ASK"] as! [[String : Any]] 151 | for dict in asks { 152 | var new: NSString? = "" 153 | let isTrash = dict["I"] as! String == "TRASH" 154 | scanner.scanBetweenString(stringA: dict["S"] as! String, stringB: dict["E"] as! String, into: &new) 155 | if (!isTrash && (new?.length)! > 0) { 156 | cDict[dict["I"] as! String] = new 157 | } 158 | } 159 | 160 | let newComment = HNComment() 161 | newComment.level = 0 162 | newComment.username = cDict["Username"] as? String ?? "" 163 | newComment.isOPNoob = HNUser.cleanNoobUsername(username: &(newComment.username!)) 164 | newComment.created = cDict["Time"] as? String ?? "" 165 | newComment.text = cDict["Text"] as? String ?? "" 166 | //newComment.links = ... 167 | newComment.type = .askHN 168 | if upvoteUrl != nil { 169 | newComment.upvoteUrl = String(describing: upvoteUrl!) as String //(upvoteUrl?.length)! > 0 ? upvoteUrl : ""; 170 | } 171 | newComment.id = cDict["CommentId"] as? String ?? "" 172 | return newComment 173 | } 174 | public static func parseJobComment(html: String, withParsingConfig parseConfig: [String : Any]) -> HNComment? { 175 | var commentDict: [String : Any]? = parseConfig["Comment"] != nil ? parseConfig["Comment"] as? [String: Any] : nil 176 | if commentDict == nil { 177 | return nil 178 | } 179 | 180 | let scanner = Scanner(string: html) 181 | var cDict: [String : Any] = [:] 182 | 183 | let jobs = commentDict!["JOBS"] as! [[String : Any]] 184 | for dict in jobs { 185 | var new: NSString? = "" 186 | let isTrash = dict["I"] as! String == "TRASH" 187 | scanner.scanBetweenString(stringA: dict["S"] as! String, stringB: dict["E"] as! String, into: &new) 188 | if (!isTrash && (new?.length)! > 0) { 189 | cDict[dict["I"] as! String] = new 190 | } 191 | } 192 | 193 | let newComment = HNComment() 194 | newComment.level = 0 195 | newComment.text = cDict["Text"] as? String ?? "" 196 | //newComment.links = ... 197 | newComment.type = .jobs 198 | 199 | return newComment 200 | } 201 | } 202 | -------------------------------------------------------------------------------- /HNScraper/HNLogin.swift: -------------------------------------------------------------------------------- 1 | // 2 | // HNLogin.swift 3 | // HackerNews2 4 | // 5 | // Created by Stéphane Sercu on 8/09/17. 6 | // Copyright © 2017 Stéphane Sercu. All rights reserved. 7 | // 8 | 9 | import Foundation 10 | 11 | 12 | // abdurhtl 13 | // !Bullshit?Psw$ 14 | 15 | public protocol HNLoginDelegate { 16 | func didLogin(user: HNUser, cookie: HTTPCookie) 17 | } 18 | 19 | public class HNLogin { 20 | private var observers: [HNLoginDelegate] = [] 21 | 22 | public func addObserver(_ observer: HNLoginDelegate) { 23 | self.observers.append(observer) 24 | } 25 | private init() { 26 | if let cookie = self.retrieveSessionCookie() { 27 | self._sessionCookie = cookie 28 | self.getUsernameFromCookie(cookie, completion: {(user, cookie, error) -> Void in 29 | if cookie != nil { 30 | self._user = user 31 | self._sessionCookie = cookie 32 | if self.isLoggedIn() { 33 | for observer in self.observers { 34 | observer.didLogin(user: user!, cookie: cookie!) 35 | } 36 | } 37 | } 38 | }) 39 | } 40 | } 41 | 42 | public enum HNLoginError { 43 | case badCredentials 44 | case serverUnreachable 45 | case noInternet 46 | case unknown 47 | 48 | init?(_ error: RessourceFetcher.RessourceFetchingError?) { 49 | self.init(HNScraper.HNScraperError(error)) 50 | } 51 | init?(_ error: HNScraper.HNScraperError?) { 52 | if error == nil { 53 | return nil 54 | } 55 | if error == .noInternet { 56 | self = .noInternet 57 | } else if error == .serverUnreachable || error == .noData { 58 | self = .serverUnreachable 59 | } else { 60 | self = .unknown 61 | } 62 | } 63 | } 64 | 65 | public static let shared = HNLogin() 66 | 67 | private var _sessionCookie: HTTPCookie? 68 | private var _user: HNUser? 69 | 70 | public var sessionCookie: HTTPCookie? { 71 | get { 72 | return _sessionCookie 73 | } 74 | } 75 | public var user: HNUser? { 76 | get { 77 | return _user 78 | } 79 | } 80 | 81 | 82 | /** 83 | * Log a user in useing the specified credentials. In case of success, 84 | * a HNUser instance is built with the information of the conected 85 | * user and passed as paramater to the completion handler along with a 86 | * cookie containing the session data. 87 | */ 88 | public func login(username: String, psw: String, completion: @escaping ((HNUser?, HTTPCookie?, HNLoginError?) -> Void)) { 89 | let url = HNScraper.baseUrl + "login" 90 | let encodedPass = psw.addingPercentEncoding(withAllowedCharacters: CharacterSet(charactersIn: "!*'();:@&=+$,/?%#[]").inverted) 91 | let bodyString = "acct=\(username)&pw=\(encodedPass!)&whence=news" 92 | guard let bodyData = bodyString.data(using: .utf8) else { 93 | completion(nil, nil, .badCredentials) 94 | return 95 | } 96 | 97 | RessourceFetcher.shared.post(urlString: url, data: bodyData, completion: {data, reponse, error -> Void in 98 | if data == nil { 99 | completion(nil, nil, HNLoginError(error) ?? .unknown) 100 | return 101 | } 102 | if let html = String(data: data!, encoding: .utf8) { 103 | if (!html.contains("Bad login.") && !html.contains("Unknown or expired link.")) { 104 | let scanner = Scanner(string: html) 105 | var trash: NSString? = "" 106 | var karma: NSString? = "" 107 | 108 | scanner.scanUpTo("/a> (", into: &trash) // TODO: use config file 109 | scanner.scanString("/a> (", into: &trash) 110 | scanner.scanUpTo(")", into: &karma) 111 | self._user = HNUser(username: username, karma: karma as String!, age: "", aboutInfo: "") 112 | 113 | self.getLoggedInUser(user: self._user!, completion: {(user, cookie, error) -> Void in 114 | 115 | if self.isLoggedIn() { 116 | for observer in self.observers { 117 | observer.didLogin(user: user!, cookie: cookie!) 118 | } 119 | } 120 | completion(user, cookie, error) 121 | }) 122 | 123 | 124 | } else { 125 | print("Probably wrong password") // TODO: logging 126 | completion(nil, nil, .badCredentials) 127 | } 128 | 129 | 130 | } else { 131 | print("Post request failed") 132 | completion(nil, nil, HNLoginError(error) ?? .unknown) 133 | } 134 | 135 | }) 136 | 137 | } 138 | 139 | public func logout() { 140 | if self._sessionCookie != nil { 141 | HTTPCookieStorage.shared.deleteCookie(self._sessionCookie!) 142 | self._sessionCookie = nil 143 | 144 | } 145 | self._user = nil 146 | } 147 | 148 | // TODO: clean this up, use the HNScraper's getUser method 149 | /** 150 | * Fetch the informations about a user. 151 | * - parameters: 152 | * - user: a HNUser object with at least the username of the user you want the info about. 153 | All the other properties are just copied in the result object or replaced with the newly fetched informations. 154 | * - completion: 155 | */ 156 | private func getLoggedInUser(user: HNUser, completion: @escaping ((HNUser?, HTTPCookie?, HNLoginError?) -> Void)) { 157 | let url = "https://news.ycombinator.com/user?id=\(user.username!)" 158 | 159 | RessourceFetcher.shared.fetchData(urlString: url, completion: {(data, error) -> Void in 160 | 161 | if let html = String(data: data!, encoding: .utf8) { 162 | var newUser: HNUser? 163 | // Getting user info 164 | if !(html.contains("We've limited requests for this url.")) { 165 | HNParseConfig.shared.getDictionnary(completion: {(parsingConfig, configFileError) -> Void in 166 | if parsingConfig != nil { 167 | newUser = HNUser(fromHtml: html, withParsingConfig: parsingConfig!) 168 | if newUser == nil { 169 | newUser = user 170 | } 171 | } else { 172 | print("couldn't fetch the configFile") 173 | } 174 | // Getting cookie 175 | self._sessionCookie = self.retrieveSessionCookie() 176 | self._user = newUser 177 | completion(newUser, self._sessionCookie, HNLoginError(error)) 178 | 179 | }) 180 | } else { 181 | print("Couldn't fetch user informations") 182 | completion(nil, nil, HNLoginError(error) ?? .serverUnreachable) 183 | } 184 | } else { 185 | completion(nil, nil, HNLoginError(error) ?? .unknown) 186 | } 187 | 188 | }) 189 | } 190 | 191 | // TODO: better error gesture & logging 192 | private func getUsernameFromCookie(_ cookie: HTTPCookie, completion: @escaping ((HNUser?, HTTPCookie?, HNLoginError?) -> Void)) { 193 | let url = "https://news.ycombinator.com/user?id=pg" // any valid url would do 194 | 195 | RessourceFetcher.shared.fetchData(urlString: url, completion: {(data, error) -> Void in 196 | if data != nil { 197 | if let html = String(data: data!, encoding: .utf8) { 198 | if (!html.contains("", into: &userString) 207 | scanner.scanUpTo(" (", into: &trash) 208 | scanner.scanString(" (", into: &trash) 209 | scanner.scanUpTo(")", into: &karma) 210 | 211 | let user = HNUser(username: userString as String!, karma: karma as String!, age: "", aboutInfo: "") 212 | 213 | self.getLoggedInUser(user: user, completion: completion) 214 | 215 | 216 | } else { 217 | print("getUsernameFromCookie: bad cookie?") // TODO: Logging 218 | completion(nil, nil, HNLoginError(error)) 219 | } 220 | 221 | 222 | } else { 223 | print("getUsernameFromCookie: Get request failed: not html?") 224 | completion(nil, nil, HNLoginError(error)) 225 | } 226 | } else { 227 | print("getUsernameFromCookie: Get request failed: no data") 228 | completion(nil, nil, HNLoginError(error)) 229 | } 230 | 231 | 232 | }) 233 | 234 | 235 | 236 | } 237 | 238 | private func retrieveSessionCookie() -> HTTPCookie? { 239 | if let cookieArray = HTTPCookieStorage.shared.cookies(for: URL(string: HNScraper.baseUrl)!) { 240 | if cookieArray.count > 0 { 241 | for cookie in cookieArray { 242 | if cookie.name == "user" { 243 | return cookie 244 | } 245 | } 246 | 247 | } 248 | } 249 | 250 | return nil 251 | } 252 | 253 | public func isLoggedIn() -> Bool { 254 | return self.sessionCookie != nil && self._user != nil 255 | } 256 | 257 | 258 | } 259 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # HNScraper 2 | 3 |

4 | 5 | 6 | 7 | 8 | 9 | License 10 | 11 |

12 | 13 | 14 | HNScraper is a scraping library for hackernews, written in swift. It allows you to fetch all the stories, comments and user informations directly from the website. It also supports logging in and vote on/favorite posts and comments. 15 | 16 | This library is similar to the [LibHN](https://github.com/bennyguitar/libHN) library. Actually, some parts of the project, such as the post, comment and user models and the parsing rules are basically a swift translation of the LibHN library. 17 | 18 | 19 | Before going further, note that there is an [official API](https://github.com/HackerNews/API) that implements the fundamentals features such as grabbing the new, top and best stories, user informations and comments. However, it doesn't support login and any related functionalities and suffers from a lot of restrictions. If it fits your needs though, I would recommend you use the official API over this scraper. 20 | 21 | 22 | ## Table of content 23 | 24 | * [Installation](#installation) 25 | * [Documentation](#documentation) 26 | * [Completion handlers & error gesture](#completion-handlers-&-error-gesture) 27 | * [Fetch list of posts](#fetch-list-of-posts) 28 | * [Fetch comments about a post](#fetch-comments-about-a-post) 29 | * [Fetch informations about a user](#fetch-informations-about-a-user) 30 | * [Fetch the submissions, comments and favorites of a user](#fetch-the-submissions,-comments-and-favorites-of-a-user) 31 | * [Login / Logout](#login-/-logout) 32 | * [Vote on posts/comments](#vote-on-posts/comments) 33 | * [Favorite a post](#favorite-a-post) 34 | * [Structure of the project](#structure-of-the-project) 35 | * [Contribution](#contribution) 36 | * [TODO](#todo) 37 | * [License](#license) 38 | 39 | 40 | ## Installation 41 | 42 | #### Pod 43 | 44 | Ensure you have at least the following code in your `Podfile`: 45 | 46 | ``` 47 | use_frameworks! 48 | 49 | target 'YourAppName' do 50 | pod 'HNScraper', '~> 0.2.1' 51 | end 52 | ``` 53 | 54 | Run `pod install` in your project's folder. 55 | 56 | Then just add `import HNScraper` wherever you need the scraper. 57 | 58 | 59 | #### Manually 60 | 61 | Just add all the `.swift` files from the `HNScraper` folder in your project. 62 | 63 | ## Documentation 64 | 65 | ### Completion handlers & error gesture 66 | All the following actions are performed in the same way: you call a method on the right instance with the required parameters and a completion handler which will give you back the results of your request and, eventually, the produced error. 67 | 68 | ```swift 69 | func getSomething(dependingOn: , completion: ((results?, error?) -> Void)) 70 | ``` 71 | 72 | The possible errors are defined by `HNScrapperError ` and `HNLoginError `. Those structures simplifies the error handling by classifying the most recurrent errors into distinct, self-explanatory error types. This abstraction allows you to handle common problems, such as no Internet connection, bad credentials, non-existing post-id, etc, without worrying about "low level" errors such as URLErrors and JSON errors. 73 | 74 | 75 | ### Fetch list of posts 76 | A list of posts is any of the HN pages defined in `HNScraper.PostListPageName` 77 | 78 | [`news`](https://news.ycombinator.com/news) 79 | [`front`](https://news.ycombinator.com/front) 80 | [`new`](https://news.ycombinator.com/newest) 81 | [`jobs`](https://news.ycombinator.com/jobs) 82 | [`asks`](https://news.ycombinator.com/ask) 83 | [`shows`](https://news.ycombinator.com/show) 84 | [`newshows`](https://news.ycombinator.com/shownew) 85 | [`active`](https://news.ycombinator.com/active) 86 | [`best`](https://news.ycombinator.com/best) 87 | [`noob`](https://news.ycombinator.com/noobstories) 88 | 89 | To scrap the 30 first posts of one of them, you have to use `getPostsList` (from `HNScraper`) which will give you an array of `HNPost` objects and the link to the next page that you can use to fetch the 30 following items. 90 | 91 | ```swift 92 | typealias PostListDownloadCompletionHandler = (([HNPost], String?, HNScrapperError?) -> Void) 93 | 94 | func getPostsList(page: PostListPageName, completion: PostListDownloadCompletionHandler) 95 | ``` 96 | 97 | For example: 98 | 99 | ```swift 100 | HNScraper.shared.getPostsList(page: .news) { (posts, linkForMore, error) in 101 | // Don't forget to handle the eventual error 102 | for post in posts { 103 | print(post.title) 104 | } 105 | // You also may want to save the linkForMore somewhere. 106 | } 107 | ``` 108 | 109 | For the 30+ items, you have to use `getMoreItems` at which you pass the "link for more" you got with the 30 first items. This will also give you a list of `HNPost` instances and a link for the next page: 110 | 111 | ```swift 112 | func getMoreItems(linkForMore: String, completionHandler: PostListDownloadCompletionHandler) 113 | ``` 114 | 115 | For example: 116 | 117 | ```swift 118 | HNScraper.shared.getMoreItems(linkForMore: "s") { (posts, linkForMore, error) in 119 | // do whatever you want with the stories 120 | } 121 | 122 | ``` 123 | 124 | 125 | 126 | 127 | 128 | ### Fetch comments about a post 129 | The comments are parsed from a discussion thread (at `news.ycombinator.com/item?id=`*``*). You can fetch those in 2 ways: either with 130 | 131 | ```swift 132 | func getComments(ByPostId postId: String, buildHierarchy: Bool = true, completion: @escaping ((HNPost?, [HNComment], HNScraperError?) -> Void)) 133 | ``` 134 | 135 | or with 136 | 137 | ```swift 138 | func getComments(ForPost post: HNPost, buildHierarchy: Bool = true, completion: @escaping ((HNPost, [HNComment], HNScraperError?) -> Void)) 139 | ``` 140 | 141 | 142 | The parameter `buildHierarchy` indicates if the comments have to be returned in nested (meaning that only the root comments are in the resulting array and they are pointing to their replies) or in linear (flat) format. 143 | 144 | With the `ByPostId` method, the data about the post itself will be parsed to build a `HNPost` object that is passed to the completion closure. 145 | 146 | With the `ForPost`method, the `HNPost` instance given to the completion closure is the same (unmodified) post you passed to the `getComments`method. 147 | 148 | 149 | For a `askHN` type of posts, the first comment is the OP's ask itself. 150 | 151 | As for `job` type of posts, there should be no comments. 152 | 153 | ### Fetch informations about a user 154 | You can get the karma, description and age of any user by giving its username to the `getUser`method: 155 | 156 | ```swift 157 | func getUserFrom(Username username: String, completion: ((HNUser?, HNScraperError?) -> Void)?) 158 | ``` 159 | 160 | ### Fetch the submissions, comments and favorites of a user 161 | 162 | Use the following methods (from `HNScraper`) according to which list you want to grab: 163 | 164 | ```swift 165 | func getFavorites(ForUserWithUsername username: String, completion: @escaping PostListDownloadCompletionHandler) 166 | 167 | func getSubmissions(ForUserWithUsername username: String, completion: PostListDownloadCompletionHandler) 168 | ``` 169 | 170 | 171 | In the same way as for the list of post described earlier, the completion closure will give you a "link for more" that you can use to fetch more items (in the case there are more than 30 items to fetch of course). You can use the `getMoreItems(linkForMore: completionHandler:)` method as earlier. 172 | 173 | 174 | 175 | ### Login / Logout 176 | 177 | Those actions are handled by the singleton class `HNLogin`. 178 | 179 | 180 | You can login with 181 | 182 | ```swift 183 | func login(username: String, psw: String, completion: @escaping ((HNUser?, HTTPCookie?, HNLoginError?) -> Void)) 184 | ``` 185 | 186 | 187 | Once a user has logged in, the `HNLogin`class takes care to store the session cookie and make it available to the other classes. In addition, with that cookie saved, the html retrieved from the website by any of the requests made by the scrapper will be as if the user was logged in. Which means that it will contain all the upvote (and eventual downvote, if the user has more than 500 points) links, favorite links, comment links, etc. 188 | 189 | 190 | The `HNLogin`class comes with its own error enum, `HNLoginError` , which contains the `badCredentials` case. It's returned as an error in the case of, ... well, wrong credentials. 191 | 192 | Example: 193 | 194 | ```swift 195 | HNLogin.shared.login(username: "username", psw: "pass") { (user, cookie, error) in 196 | if let connected_user = user { 197 | print("logged in user: " + connected_user.username) 198 | } else { 199 | // Handle error 200 | if error == .badCredentials { 201 | print("wrong creds") 202 | } else { 203 | // Check other types of error 204 | } 205 | } 206 | } 207 | ``` 208 | 209 | You can logout a user by calling 210 | 211 | ```swift 212 | func logout() 213 | ``` 214 | 215 | All this method does is delete the stored session cookie, which make the retrieved html from the HN website looking as it would to an unsigned visitor. 216 | 217 | ### Up/Down/Un vote a post/comment 218 | 219 | Simply use one of the following methods: 220 | 221 | ```swift 222 | func upvote(Comment comment: HNComment, completion: ((HNScraperError?) -> Void)) 223 | func upvote(Post post: HNPost, completion: ((HNScraperError?) -> Void)) 224 | func unvote(Post post: HNPost, completion: ((HNScraperError?) -> Void)) 225 | func unvote(Comment comment: HNComment, completion: ((HNScraperError?) -> Void)) 226 | ``` 227 | 228 | The user obviously needs to be logged in to do that. Otherwise, an error of type `.notLoggedIn` is passed to the completion closure. 229 | 230 | If the `error` parameter of the completion closure is `nil`, then the action was succesfull. Otherwise there was a problem. 231 | 232 | **Note**: an action is considered *succesfull* when the final state of the item is the one itended by the request. So if you try to unvote a post that hasn't been upvoted, there will be no error. Same if you try to upvote an upvoted item. 233 | 234 | 235 | 236 | ### (Un)Favorite a post 237 | 238 | Use: 239 | 240 | ```swift 241 | func favorite(Post post: HNPost, completion: ((HNScraperError?) -> Void)) 242 | func unfavorite(Post post: HNPost, completion: ((HNScraperError?) -> Void)) 243 | 244 | ``` 245 | 246 | 247 | Those methods works in the same ways as the ones for voting on items. 248 | 249 | 250 | Again, the user needs to be logged in, otherwise an error of type `.notLoggedIn` is passed to the completion closure. 251 | 252 | ## Structure of the project 253 | 254 | ### Models 255 | 256 | The scraper uses 3 models: 257 | 258 | * `HNPost` 259 | * `HNComment` 260 | * `HNUser` 261 | 262 | ### Endpoints 263 | 264 | There are basically 2 singleton classes that you'll use to make requests: 265 | 266 | - **`HNScraper`** 267 | - **`HNLogin`** 268 | 269 | ### Parsing configuration file 270 | 271 | `hn.json` contains most of the informations needed to parse every HN pages. It was introduced in LibHN to 272 | 273 | Although some things changed/have been added in it, its structure is the same as in LibHN. The following section is an update of the original version of the LibHN documentation. 274 | 275 | 276 | This file is downloaded and stored by the singleton class `HNParseConfig`. 277 | 278 | 279 | ### Tests 280 | 281 | There are tests for most of the methods in the `HNScraperTests` folder. 282 | 283 | 284 | # Contribution 285 | 286 | Contribution of any kind is welcome. 287 | 288 | If you spot an error, you think of an amelioration, you have a suggestion or you g just open an issue or post directly a PR. 289 | 290 | Also, I'm not a native English speaker, so don't hesitate to correct some of my sentences :) 291 | 292 | # TODO 293 | 294 | - Complete the hn.json config file with the rest of hardcoded strings needed for parsing 295 | - Submit story 296 | - Post comments 297 | - Downvote 298 | - Edit account (about, options, mail, ...) 299 | - Search 300 | - Test for mac os apps 301 | 302 | 303 | # License 304 | 305 | HNScraper is licensed under the standard MIT License. 306 | 307 | **Copyright (C) 2017-2018 by Stéphane Sercu** 308 | 309 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 310 | 311 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 312 | 313 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 314 | -------------------------------------------------------------------------------- /HNScraperTests/HNScraperTest.swift: -------------------------------------------------------------------------------- 1 | // 2 | // HNScraperTest.swift 3 | // HNScraperTests 4 | // 5 | // Created by Stéphane Sercu on 25/09/17. 6 | // Copyright © 2017 Stéphane Sercu. All rights reserved. 7 | // 8 | 9 | import XCTest 10 | @testable import HNScraper 11 | 12 | class HNScraperLoginNeededTest: XCTestCase { 13 | override func setUp() { 14 | super.setUp() 15 | let exp = expectation(description: "Successfull login") 16 | login(completion: {(success) -> Void in 17 | XCTAssertTrue(success, "HNLogin is probably broken") 18 | exp.fulfill() 19 | }) 20 | wait(for: [exp], timeout: HNScraperTest.defaultTimeOut) 21 | } 22 | 23 | 24 | func login(completion: @escaping ((Bool) -> Void)) { 25 | if !HNLogin.shared.isLoggedIn() { 26 | let username = HNScraperTest.validCredential["username"]! 27 | let password = HNScraperTest.validCredential["password"]! 28 | HNLogin.shared.login(username: username, psw: password, completion: {(user, cookie, error) -> Void in 29 | completion(error == nil) 30 | }) 31 | } else { 32 | completion(true) 33 | } 34 | } 35 | 36 | func getFirstPost(completion: @escaping ((HNPost?) -> Void)) { 37 | HNScraper.shared.getPostsList(page: .news, completion: {(posts, linkForMore, error) -> Void in 38 | XCTAssertGreaterThan(posts.count, 0, "The getPostLists method is probably broken. Or hackernews is down...") 39 | completion(posts[0]) 40 | }) 41 | } 42 | func getPost(id: String, completion: @escaping ((HNPost?) -> Void)) { 43 | HNScraper.shared.getPost(ById: id) { (post, comments, error) in 44 | XCTAssertNotNil(post, "The getPostbyId method is probably broken. Or hackernews is down...") 45 | completion(post) 46 | } 47 | } 48 | 49 | // Try to upvote the first post of the home page 50 | func testUpvotePost() { 51 | let exp = expectation(description: "get no error") 52 | getFirstPost() { (post) in 53 | HNScraper.shared.upvote(Post: post!, completion: {(error) -> Void in 54 | XCTAssertNil(error) 55 | exp.fulfill() 56 | }) 57 | } 58 | wait(for: [exp], timeout: HNScraperTest.defaultTimeOut) 59 | } 60 | // Try to upvote the first post of the home page after it upvotes it. 61 | func testUpvoteUpvotedPost() { 62 | let exp = expectation(description: "get no error") 63 | getFirstPost(completion: {(post) -> Void in 64 | HNScraper.shared.upvote(Post: post!, completion: {(error) -> Void in 65 | XCTAssertNil(error, "If this fails, it probably means that the upvotePost method is broken.") 66 | HNScraper.shared.upvote(Post: post!, completion: {(error) -> Void in 67 | XCTAssertNil(error) 68 | exp.fulfill() 69 | }) 70 | 71 | }) 72 | }) 73 | wait(for: [exp], timeout: HNScraperTest.defaultTimeOut) 74 | } 75 | func testUpvoteBadPostId() { 76 | let post = HNPost() 77 | post.id = "where?" 78 | post.upvoteAdditionURL = "somewhereFarFarAway" 79 | let exp = expectation(description: "get a invalidUrl error") 80 | HNScraper.shared.upvote(Post: post, completion: {(error) -> Void in 81 | XCTAssertEqual(error, .invalidURL) 82 | exp.fulfill() 83 | }) 84 | 85 | wait(for: [exp], timeout: HNScraperTest.defaultTimeOut) 86 | } 87 | func testFavoritePost() { 88 | let exp = expectation(description: "get no error") 89 | getFirstPost(completion: {(post) -> Void in 90 | HNScraper.shared.favorite(Post: post!, completion: {(error) -> Void in 91 | XCTAssertNil(error) 92 | exp.fulfill() 93 | }) 94 | }) 95 | wait(for: [exp], timeout: 2*HNScraperTest.defaultTimeOut) 96 | } 97 | func testUnFavoriteFavoritedPost() { 98 | let exp = expectation(description: "get no error") 99 | getFirstPost(completion: {(post) -> Void in 100 | HNScraper.shared.favorite(Post: post!, completion: {(error) -> Void in 101 | XCTAssertNil(error, "If this fails, it probably means that the favoritePost method is broken.") 102 | HNScraper.shared.unfavorite(Post: post!, completion: {(error) -> Void in 103 | XCTAssertNil(error) 104 | exp.fulfill() 105 | }) 106 | 107 | }) 108 | }) 109 | wait(for: [exp], timeout: 3*HNScraperTest.defaultTimeOut) 110 | } 111 | func testUnFavoriteNonFavoritedPost() { 112 | let exp = expectation(description: "get no error") 113 | getPost(id: "15364646") { (post) in 114 | HNScraper.shared.unfavorite(Post: post!, completion: {(error) -> Void in 115 | XCTAssertNil(error) 116 | exp.fulfill() 117 | }) 118 | } 119 | wait(for: [exp], timeout: 2*HNScraperTest.defaultTimeOut) 120 | } 121 | func testUnVoteVotedPost() { 122 | let exp = expectation(description: "get no error") 123 | getPost(id: "15350139") { (post) in 124 | HNScraper.shared.upvote(Post: post!, completion: {(error) -> Void in 125 | XCTAssertNil(error, "If this fails, it probably means that the upvotePost method is broken.") 126 | HNScraper.shared.unvote(Post: post!, completion: {(error) -> Void in 127 | XCTAssertNil(error) 128 | exp.fulfill() 129 | }) 130 | 131 | }) 132 | } 133 | wait(for: [exp], timeout: 200*HNScraperTest.defaultTimeOut) 134 | } 135 | func testUnvoteNonVotedPost() { 136 | let exp = expectation(description: "get no error") 137 | getPost(id: "15350139") { (post) in 138 | HNScraper.shared.unvote(Post: post!, completion: {(error) -> Void in 139 | XCTAssertNil(error) 140 | exp.fulfill() 141 | }) 142 | } 143 | wait(for: [exp], timeout: 2*HNScraperTest.defaultTimeOut) 144 | } 145 | 146 | func testUpvoteComment() { 147 | let exp = expectation(description: "Get no error") 148 | getFirstPost() { (post) in // Will fail if the top post has no comments... 149 | HNScraper.shared.getComments(ForPost: post!) { (post, comments, error) in 150 | XCTAssertNil(error, "getComments methdod probably broken") 151 | XCTAssertGreaterThan(comments.count, 0) 152 | HNScraper.shared.upvote(Comment: comments[0], completion: { (error) in 153 | XCTAssertNil(error) 154 | HNScraper.shared.getComments(ForPost: post) { (post, comments, error) in 155 | XCTAssertNil(error) 156 | XCTAssertGreaterThan(comments.count, 0) 157 | XCTAssertTrue(comments[0].upvoted) 158 | exp.fulfill() 159 | } 160 | 161 | 162 | }) 163 | } 164 | } 165 | 166 | wait(for: [exp], timeout: 2*HNScraperTest.defaultTimeOut) 167 | } 168 | 169 | // TODO 170 | /// tests that the favorited attribute is correctly filled when parsing a post from the home page. 171 | /*func testFavoritedAttribute() { 172 | let exp = expectation(description: "the retrieved post has favorited=true") 173 | getFirstPost(completion: {(post) -> Void in 174 | HNScraper.shared.favorite(Post: post!, completion: {(error) -> Void in 175 | XCTAssertNil(error) 176 | self.getFirstPost(completion: { (post) in 177 | XCTAssertNotNil(post?.favorited) 178 | XCTAssertTrue((post?.favorited)!) 179 | exp.fulfill() 180 | }) 181 | 182 | }) 183 | }) 184 | wait(for: [exp], timeout: 2*HNScraperTest.defaultTimeOut) 185 | }*/ 186 | 187 | func testHasLoggedInUserVotedOnPost() { 188 | 189 | } 190 | 191 | // Same test as in HNScraperTest, but at some point, there was a parsing error when the user was logged in, so I added this test here. 192 | func testGetUser() { 193 | let exp = expectation(description: "get a entirely filled HNUser instance") 194 | HNScraper.shared.getUserFrom(Username: HNScraperTest.validFilledUsername, completion: { (user, error) in 195 | XCTAssertEqual(user?.username, HNScraperTest.validFilledUsername) 196 | XCTAssertEqual(String(describing: user!.age!).prefix(7), "2010-08") 197 | XCTAssertNotEqual(user?.karma, 0) 198 | XCTAssertNotNil(user?.aboutInfo) 199 | XCTAssertNotEqual(user!.aboutInfo!, "") 200 | exp.fulfill() 201 | }) 202 | wait(for: [exp], timeout: HNScraperTest.defaultTimeOut) 203 | } 204 | } 205 | class HNScraperTest: XCTestCase { 206 | static let defaultTimeOut: TimeInterval = 10 207 | static let validFilledUsername = "kposehn" // Chose him randomly 208 | static let invalidUsername = "ToBeOrNotToBeSureThatNoOneHasThatUsername" // *Resisting to the urge to create a new account with that username just to mess with these tests.* 209 | static let validCredential = ["username": "abdurhtl", "password": "!Bullshit?Psw$"] 210 | 211 | static let validPostId = "15331016" 212 | override func setUp() { 213 | super.setUp() 214 | // Put setup code here. This method is called before the invocation of each test method in the class. 215 | } 216 | 217 | override func tearDown() { 218 | // Put teardown code here. This method is called after the invocation of each test method in the class. 219 | super.tearDown() 220 | } 221 | 222 | 223 | 224 | func testGetUserByWrongUsername() { 225 | let exp = expectation(description: "Returns noSuchUser error") 226 | HNScraper.shared.getUserFrom(Username: HNScraperTest.invalidUsername, completion: {(user, error) -> Void in 227 | 228 | XCTAssertNil(user) 229 | XCTAssertEqual(error, HNScraper.HNScraperError.noSuchUser) 230 | exp.fulfill() 231 | 232 | }) 233 | wait(for: [exp], timeout: HNScraperTest.defaultTimeOut) 234 | } 235 | 236 | func testGetHomePage() { 237 | let exp = expectation(description: "get 30 items") 238 | HNScraper.shared.getPostsList(page: .news, completion: {(posts, linkForMore, error) -> Void in 239 | XCTAssertEqual(posts.count, 30) 240 | XCTAssertNotNil(linkForMore) 241 | XCTAssertNil(error) 242 | exp.fulfill() 243 | }) 244 | wait(for: [exp], timeout: HNScraperTest.defaultTimeOut) 245 | } 246 | 247 | func testGetAskHN() { // TODO: test askHN comment parsing 248 | let exp = expectation(description: "get 30 items") 249 | HNScraper.shared.getComments(ByPostId: "15465252") { (post, comments, error) in 250 | XCTAssertNil(error) 251 | XCTAssertEqual(comments.count, 1) 252 | XCTAssertGreaterThan(comments[0].text.count, 0) 253 | XCTAssertGreaterThan(comments[0].username.count, 0) 254 | XCTAssertGreaterThan((comments[0].replies[0] as! HNComment).text.count, 0) 255 | XCTAssertGreaterThan((comments[0].replies[0] as! HNComment).username.count, 0) 256 | exp.fulfill() 257 | } 258 | 259 | wait(for: [exp], timeout: 100*HNScraperTest.defaultTimeOut) 260 | } 261 | 262 | func testGet90ItemsFromHomePage() { 263 | let exp = expectation(description: "get 90 items") 264 | HNScraper.shared.getPostsList(page: .news) { (posts, linkForMore, error) in 265 | XCTAssertEqual(posts.count, 30, "the getPostsList method is probably broken") 266 | XCTAssertNotNil(linkForMore, "the getPostsList method is probably broken") 267 | XCTAssertNil(error, "the getPostsList method is probably broken") 268 | HNScraper.shared.getMoreItems(linkForMore: linkForMore!, completionHandler: { (posts, linkForMore, error) in 269 | XCTAssertEqual(posts.count, 30) 270 | XCTAssertNotNil(linkForMore) 271 | XCTAssertNil(error) 272 | HNScraper.shared.getMoreItems(linkForMore: linkForMore!, completionHandler: { (posts, linkForMore, error) in 273 | XCTAssertEqual(posts.count, 30) 274 | XCTAssertNotNil(linkForMore) 275 | XCTAssertNil(error) 276 | exp.fulfill() 277 | }) 278 | }) 279 | } 280 | wait(for: [exp], timeout: HNScraperTest.defaultTimeOut) 281 | } 282 | 283 | func testGetSubmissionOfNonExistingUser() { 284 | let exp = expectation(description: "get noSuchUser error") 285 | HNScraper.shared.getSubmissions(ForUserWithUsername: HNScraperTest.invalidUsername, completion: {(posts, linkForMore, error) -> Void in 286 | XCTAssertEqual(posts.count, 0) 287 | XCTAssertNil(linkForMore) 288 | XCTAssertEqual(error, HNScraper.HNScraperError.noSuchUser) 289 | exp.fulfill() 290 | }) 291 | wait(for: [exp], timeout: HNScraperTest.defaultTimeOut) 292 | } 293 | 294 | func testGetSubmissions() { 295 | let exp = expectation(description: "get some items") 296 | HNScraper.shared.getSubmissions(ForUserWithUsername: HNScraperTest.validFilledUsername, completion: {(posts, linkForMore, error) -> Void in 297 | XCTAssertEqual(posts.count, 30) 298 | XCTAssertNotNil(linkForMore) 299 | XCTAssertNil(error) 300 | exp.fulfill() 301 | }) 302 | wait(for: [exp], timeout: HNScraperTest.defaultTimeOut) 303 | } 304 | func testGetEmptySubmissionList() { 305 | 306 | let exp = expectation(description: "get 0 items") 307 | HNScraper.shared.getSubmissions(ForUserWithUsername: HNScraperTest.validCredential["username"]!, completion: {(posts, linkForMore, error) -> Void in 308 | XCTAssertEqual(posts.count, 0) 309 | XCTAssertNil(linkForMore) 310 | XCTAssertNil(error) 311 | exp.fulfill() 312 | }) 313 | wait(for: [exp], timeout: HNScraperTest.defaultTimeOut) 314 | } 315 | 316 | func testGetUser() { 317 | let exp = expectation(description: "get a entirely filled HNUser instance") 318 | HNLogin.shared.logout() 319 | HNScraper.shared.getUserFrom(Username: HNScraperTest.validFilledUsername, completion: { (user, error) in 320 | XCTAssertEqual(user?.username, HNScraperTest.validFilledUsername) 321 | XCTAssertEqual(String(String(describing: user!.age!).prefix(7)), "2010-08") 322 | XCTAssertNotEqual(user?.karma, 0) 323 | XCTAssertNotNil(user?.aboutInfo) 324 | XCTAssertNotEqual(user!.aboutInfo!, "") 325 | exp.fulfill() 326 | }) 327 | wait(for: [exp], timeout: HNScraperTest.defaultTimeOut) 328 | } 329 | 330 | func testUpvoteWithoutLogin() { 331 | let exp = expectation(description: "get notLoggedIn error") 332 | HNScraper.shared.getPostsList(page: .news, completion: {(posts, linkForMore, error) -> Void in 333 | if posts.count == 0 { 334 | XCTFail("The getPostLists method is probably broken. Or the hackernews is down...") 335 | exp.fulfill() 336 | } 337 | let postToUpvote = posts[0] // first post of the home page 338 | HNScraper.shared.upvote(Post: postToUpvote, completion: {(error) -> Void in 339 | XCTAssertEqual(error, .notLoggedIn) 340 | exp.fulfill() 341 | }) 342 | }) 343 | wait(for: [exp], timeout: HNScraperTest.defaultTimeOut) 344 | } 345 | 346 | 347 | func testGetCommentForValidPostId() { 348 | let exp = expectation(description: "Get some comments") 349 | HNScraper.shared.getComments(ByPostId: HNScraperTest.validPostId) { (post, comments, error) in 350 | XCTAssertNil(error) 351 | XCTAssertGreaterThan(comments.count, 0) 352 | XCTAssertGreaterThan(comments[0].text.count, 0) 353 | XCTAssertGreaterThan(comments[0].username.count, 0) 354 | exp.fulfill() 355 | } 356 | wait(for: [exp], timeout: HNScraperTest.defaultTimeOut) 357 | } 358 | 359 | func testGetCommentsForBadPostId() { 360 | let exp = expectation(description: "Get noSuchPost error") 361 | HNScraper.shared.getComments(ByPostId: "whatpostId") { (post, comments, error) in 362 | XCTAssertEqual(error, .noSuchPost) 363 | exp.fulfill() 364 | } 365 | wait(for: [exp], timeout: HNScraperTest.defaultTimeOut) 366 | } 367 | 368 | func testGetCommentsForUser() { 369 | let exp = expectation(description: "Get some comments with parentId filled") 370 | HNScraper.shared.getComments(ForUserWithUsername: "yoda_sl") { (comments, linkForMore, error) in 371 | XCTAssertNil(error) 372 | XCTAssertGreaterThan(comments.count, 0) 373 | XCTAssertNotNil(linkForMore) 374 | XCTAssertNotEqual(comments[0].parentId, "") 375 | XCTAssertGreaterThan(comments[0].text.count, 0) 376 | XCTAssertGreaterThan(comments[0].username.count, 0) 377 | exp.fulfill() 378 | } 379 | wait(for: [exp], timeout: HNScraperTest.defaultTimeOut) 380 | } 381 | 382 | func testGetMoreComments() { 383 | let exp = expectation(description: "Get some comments with parentId filled") 384 | HNScraper.shared.getComments(ForUserWithUsername: "yoda_sl") { (comments, linkForMore, error) in 385 | XCTAssertNil(error) 386 | XCTAssertGreaterThan(comments.count, 0) 387 | XCTAssertNotNil(linkForMore) 388 | HNScraper.shared.getMoreComments(linkForMore: linkForMore!, completionHandler: { (comments, linkForMore, error) in 389 | XCTAssertNil(error) 390 | XCTAssertGreaterThan(comments.count, 0) 391 | XCTAssertNotNil(linkForMore) 392 | exp.fulfill() 393 | }) 394 | 395 | } 396 | wait(for: [exp], timeout: HNScraperTest.defaultTimeOut) 397 | } 398 | } 399 | -------------------------------------------------------------------------------- /HNScraper.xcodeproj/project.pbxproj: -------------------------------------------------------------------------------- 1 | // !$*UTF8*$! 2 | { 3 | archiveVersion = 1; 4 | classes = { 5 | }; 6 | objectVersion = 48; 7 | objects = { 8 | 9 | /* Begin PBXBuildFile section */ 10 | 97DA10621F81466400ADF5D8 /* HNScraper.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 97DA10591F81466300ADF5D8 /* HNScraper.framework */; }; 11 | 97DA10701F81468C00ADF5D8 /* RessourceFetcher.swift in Sources */ = {isa = PBXBuildFile; fileRef = 97D0D8671F78EFCE007DE08A /* RessourceFetcher.swift */; }; 12 | 97DA10711F81468C00ADF5D8 /* Scanner+ScanBetweenString.swift in Sources */ = {isa = PBXBuildFile; fileRef = 97D0D8821F7E440F007DE08A /* Scanner+ScanBetweenString.swift */; }; 13 | 97DA10731F81468C00ADF5D8 /* HNComment.swift in Sources */ = {isa = PBXBuildFile; fileRef = 97D0D85C1F78EF83007DE08A /* HNComment.swift */; }; 14 | 97DA10741F81468C00ADF5D8 /* HNLogin.swift in Sources */ = {isa = PBXBuildFile; fileRef = 97D0D85E1F78EF83007DE08A /* HNLogin.swift */; }; 15 | 97DA10751F81468C00ADF5D8 /* HNParseConfig.swift in Sources */ = {isa = PBXBuildFile; fileRef = 97D0D85D1F78EF83007DE08A /* HNParseConfig.swift */; }; 16 | 97DA10761F81468C00ADF5D8 /* HNPost.swift in Sources */ = {isa = PBXBuildFile; fileRef = 97D0D85F1F78EF83007DE08A /* HNPost.swift */; }; 17 | 97DA10771F81468C00ADF5D8 /* HNScraper.swift in Sources */ = {isa = PBXBuildFile; fileRef = 97D0D8601F78EF83007DE08A /* HNScraper.swift */; }; 18 | 97DA10781F81468C00ADF5D8 /* HNUser.swift in Sources */ = {isa = PBXBuildFile; fileRef = 97D0D85B1F78EF83007DE08A /* HNUser.swift */; }; 19 | 97DA10791F8147C100ADF5D8 /* HNLoginTest.swift in Sources */ = {isa = PBXBuildFile; fileRef = 97D0D8691F793FD6007DE08A /* HNLoginTest.swift */; }; 20 | 97DA107A1F8147C100ADF5D8 /* HNScraperTest.swift in Sources */ = {isa = PBXBuildFile; fileRef = 97D0D87A1F795291007DE08A /* HNScraperTest.swift */; }; 21 | 97DA107B1F8147C100ADF5D8 /* HNPostTest.swift in Sources */ = {isa = PBXBuildFile; fileRef = 97D0D88B1F7E613D007DE08A /* HNPostTest.swift */; }; 22 | 97DA107C1F8147C100ADF5D8 /* RessourceFetcherTest.swift in Sources */ = {isa = PBXBuildFile; fileRef = 97D0D8881F7E464D007DE08A /* RessourceFetcherTest.swift */; }; 23 | /* End PBXBuildFile section */ 24 | 25 | /* Begin PBXContainerItemProxy section */ 26 | 97DA10631F81466400ADF5D8 /* PBXContainerItemProxy */ = { 27 | isa = PBXContainerItemProxy; 28 | containerPortal = 97D0D83E1F78EF42007DE08A /* Project object */; 29 | proxyType = 1; 30 | remoteGlobalIDString = 97DA10581F81466300ADF5D8; 31 | remoteInfo = HNScraper; 32 | }; 33 | /* End PBXContainerItemProxy section */ 34 | 35 | /* Begin PBXFileReference section */ 36 | 97D0D8551F78EF42007DE08A /* Info.plist */ = {isa = PBXFileReference; lastKnownFileType = text.plist.xml; path = Info.plist; sourceTree = ""; }; 37 | 97D0D85B1F78EF83007DE08A /* HNUser.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = HNUser.swift; sourceTree = ""; }; 38 | 97D0D85C1F78EF83007DE08A /* HNComment.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = HNComment.swift; sourceTree = ""; }; 39 | 97D0D85D1F78EF83007DE08A /* HNParseConfig.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = HNParseConfig.swift; sourceTree = ""; }; 40 | 97D0D85E1F78EF83007DE08A /* HNLogin.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = HNLogin.swift; sourceTree = ""; }; 41 | 97D0D85F1F78EF83007DE08A /* HNPost.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = HNPost.swift; sourceTree = ""; }; 42 | 97D0D8601F78EF83007DE08A /* HNScraper.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = HNScraper.swift; sourceTree = ""; }; 43 | 97D0D8671F78EFCE007DE08A /* RessourceFetcher.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = RessourceFetcher.swift; sourceTree = ""; }; 44 | 97D0D8691F793FD6007DE08A /* HNLoginTest.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = HNLoginTest.swift; sourceTree = ""; }; 45 | 97D0D8731F79400C007DE08A /* Info.plist */ = {isa = PBXFileReference; lastKnownFileType = text.plist.xml; path = Info.plist; sourceTree = ""; }; 46 | 97D0D87A1F795291007DE08A /* HNScraperTest.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = HNScraperTest.swift; sourceTree = ""; }; 47 | 97D0D8821F7E440F007DE08A /* Scanner+ScanBetweenString.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = "Scanner+ScanBetweenString.swift"; sourceTree = ""; }; 48 | 97D0D8881F7E464D007DE08A /* RessourceFetcherTest.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = RessourceFetcherTest.swift; sourceTree = ""; }; 49 | 97D0D88B1F7E613D007DE08A /* HNPostTest.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = HNPostTest.swift; sourceTree = ""; }; 50 | 97DA10591F81466300ADF5D8 /* HNScraper.framework */ = {isa = PBXFileReference; explicitFileType = wrapper.framework; includeInIndex = 0; path = HNScraper.framework; sourceTree = BUILT_PRODUCTS_DIR; }; 51 | 97DA10611F81466300ADF5D8 /* HNScraperTests.xctest */ = {isa = PBXFileReference; explicitFileType = wrapper.cfbundle; includeInIndex = 0; path = HNScraperTests.xctest; sourceTree = BUILT_PRODUCTS_DIR; }; 52 | /* End PBXFileReference section */ 53 | 54 | /* Begin PBXFrameworksBuildPhase section */ 55 | 97DA10551F81466300ADF5D8 /* Frameworks */ = { 56 | isa = PBXFrameworksBuildPhase; 57 | buildActionMask = 2147483647; 58 | files = ( 59 | ); 60 | runOnlyForDeploymentPostprocessing = 0; 61 | }; 62 | 97DA105E1F81466300ADF5D8 /* Frameworks */ = { 63 | isa = PBXFrameworksBuildPhase; 64 | buildActionMask = 2147483647; 65 | files = ( 66 | 97DA10621F81466400ADF5D8 /* HNScraper.framework in Frameworks */, 67 | ); 68 | runOnlyForDeploymentPostprocessing = 0; 69 | }; 70 | /* End PBXFrameworksBuildPhase section */ 71 | 72 | /* Begin PBXGroup section */ 73 | 97D0D83D1F78EF42007DE08A = { 74 | isa = PBXGroup; 75 | children = ( 76 | 97D0D8481F78EF42007DE08A /* HNScraper */, 77 | 97D0D8701F79400C007DE08A /* HNScraperTests */, 78 | 97D0D8471F78EF42007DE08A /* Products */, 79 | ); 80 | sourceTree = ""; 81 | }; 82 | 97D0D8471F78EF42007DE08A /* Products */ = { 83 | isa = PBXGroup; 84 | children = ( 85 | 97DA10591F81466300ADF5D8 /* HNScraper.framework */, 86 | 97DA10611F81466300ADF5D8 /* HNScraperTests.xctest */, 87 | ); 88 | name = Products; 89 | sourceTree = ""; 90 | }; 91 | 97D0D8481F78EF42007DE08A /* HNScraper */ = { 92 | isa = PBXGroup; 93 | children = ( 94 | 97D0D88A1F7E5F4D007DE08A /* Helpers */, 95 | 97D0D85C1F78EF83007DE08A /* HNComment.swift */, 96 | 97D0D85E1F78EF83007DE08A /* HNLogin.swift */, 97 | 97D0D85D1F78EF83007DE08A /* HNParseConfig.swift */, 98 | 97D0D85F1F78EF83007DE08A /* HNPost.swift */, 99 | 97D0D8601F78EF83007DE08A /* HNScraper.swift */, 100 | 97D0D85B1F78EF83007DE08A /* HNUser.swift */, 101 | 97D0D8551F78EF42007DE08A /* Info.plist */, 102 | ); 103 | path = HNScraper; 104 | sourceTree = ""; 105 | }; 106 | 97D0D8701F79400C007DE08A /* HNScraperTests */ = { 107 | isa = PBXGroup; 108 | children = ( 109 | 97D0D8691F793FD6007DE08A /* HNLoginTest.swift */, 110 | 97D0D87A1F795291007DE08A /* HNScraperTest.swift */, 111 | 97D0D88B1F7E613D007DE08A /* HNPostTest.swift */, 112 | 97D0D8881F7E464D007DE08A /* RessourceFetcherTest.swift */, 113 | 97D0D8731F79400C007DE08A /* Info.plist */, 114 | ); 115 | path = HNScraperTests; 116 | sourceTree = ""; 117 | }; 118 | 97D0D88A1F7E5F4D007DE08A /* Helpers */ = { 119 | isa = PBXGroup; 120 | children = ( 121 | 97D0D8671F78EFCE007DE08A /* RessourceFetcher.swift */, 122 | 97D0D8821F7E440F007DE08A /* Scanner+ScanBetweenString.swift */, 123 | ); 124 | path = Helpers; 125 | sourceTree = ""; 126 | }; 127 | /* End PBXGroup section */ 128 | 129 | /* Begin PBXHeadersBuildPhase section */ 130 | 97DA10561F81466300ADF5D8 /* Headers */ = { 131 | isa = PBXHeadersBuildPhase; 132 | buildActionMask = 2147483647; 133 | files = ( 134 | ); 135 | runOnlyForDeploymentPostprocessing = 0; 136 | }; 137 | /* End PBXHeadersBuildPhase section */ 138 | 139 | /* Begin PBXNativeTarget section */ 140 | 97DA10581F81466300ADF5D8 /* HNScraper */ = { 141 | isa = PBXNativeTarget; 142 | buildConfigurationList = 97DA106A1F81466400ADF5D8 /* Build configuration list for PBXNativeTarget "HNScraper" */; 143 | buildPhases = ( 144 | 97DA10541F81466300ADF5D8 /* Sources */, 145 | 97DA10551F81466300ADF5D8 /* Frameworks */, 146 | 97DA10561F81466300ADF5D8 /* Headers */, 147 | 97DA10571F81466300ADF5D8 /* Resources */, 148 | ); 149 | buildRules = ( 150 | ); 151 | dependencies = ( 152 | ); 153 | name = HNScraper; 154 | productName = HNScraper; 155 | productReference = 97DA10591F81466300ADF5D8 /* HNScraper.framework */; 156 | productType = "com.apple.product-type.framework"; 157 | }; 158 | 97DA10601F81466300ADF5D8 /* HNScraperTests */ = { 159 | isa = PBXNativeTarget; 160 | buildConfigurationList = 97DA106D1F81466400ADF5D8 /* Build configuration list for PBXNativeTarget "HNScraperTests" */; 161 | buildPhases = ( 162 | 97DA105D1F81466300ADF5D8 /* Sources */, 163 | 97DA105E1F81466300ADF5D8 /* Frameworks */, 164 | 97DA105F1F81466300ADF5D8 /* Resources */, 165 | ); 166 | buildRules = ( 167 | ); 168 | dependencies = ( 169 | 97DA10641F81466400ADF5D8 /* PBXTargetDependency */, 170 | ); 171 | name = HNScraperTests; 172 | productName = HNScraperTests; 173 | productReference = 97DA10611F81466300ADF5D8 /* HNScraperTests.xctest */; 174 | productType = "com.apple.product-type.bundle.unit-test"; 175 | }; 176 | /* End PBXNativeTarget section */ 177 | 178 | /* Begin PBXProject section */ 179 | 97D0D83E1F78EF42007DE08A /* Project object */ = { 180 | isa = PBXProject; 181 | attributes = { 182 | LastSwiftUpdateCheck = 0900; 183 | LastUpgradeCheck = 0900; 184 | ORGANIZATIONNAME = "Stéphane Sercu"; 185 | TargetAttributes = { 186 | 97DA10581F81466300ADF5D8 = { 187 | CreatedOnToolsVersion = 9.0; 188 | ProvisioningStyle = Automatic; 189 | }; 190 | 97DA10601F81466300ADF5D8 = { 191 | CreatedOnToolsVersion = 9.0; 192 | ProvisioningStyle = Automatic; 193 | }; 194 | }; 195 | }; 196 | buildConfigurationList = 97D0D8411F78EF42007DE08A /* Build configuration list for PBXProject "HNScraper" */; 197 | compatibilityVersion = "Xcode 8.0"; 198 | developmentRegion = en; 199 | hasScannedForEncodings = 0; 200 | knownRegions = ( 201 | en, 202 | Base, 203 | ); 204 | mainGroup = 97D0D83D1F78EF42007DE08A; 205 | productRefGroup = 97D0D8471F78EF42007DE08A /* Products */; 206 | projectDirPath = ""; 207 | projectRoot = ""; 208 | targets = ( 209 | 97DA10581F81466300ADF5D8 /* HNScraper */, 210 | 97DA10601F81466300ADF5D8 /* HNScraperTests */, 211 | ); 212 | }; 213 | /* End PBXProject section */ 214 | 215 | /* Begin PBXResourcesBuildPhase section */ 216 | 97DA10571F81466300ADF5D8 /* Resources */ = { 217 | isa = PBXResourcesBuildPhase; 218 | buildActionMask = 2147483647; 219 | files = ( 220 | ); 221 | runOnlyForDeploymentPostprocessing = 0; 222 | }; 223 | 97DA105F1F81466300ADF5D8 /* Resources */ = { 224 | isa = PBXResourcesBuildPhase; 225 | buildActionMask = 2147483647; 226 | files = ( 227 | ); 228 | runOnlyForDeploymentPostprocessing = 0; 229 | }; 230 | /* End PBXResourcesBuildPhase section */ 231 | 232 | /* Begin PBXSourcesBuildPhase section */ 233 | 97DA10541F81466300ADF5D8 /* Sources */ = { 234 | isa = PBXSourcesBuildPhase; 235 | buildActionMask = 2147483647; 236 | files = ( 237 | 97DA10771F81468C00ADF5D8 /* HNScraper.swift in Sources */, 238 | 97DA10741F81468C00ADF5D8 /* HNLogin.swift in Sources */, 239 | 97DA10701F81468C00ADF5D8 /* RessourceFetcher.swift in Sources */, 240 | 97DA10751F81468C00ADF5D8 /* HNParseConfig.swift in Sources */, 241 | 97DA10781F81468C00ADF5D8 /* HNUser.swift in Sources */, 242 | 97DA10761F81468C00ADF5D8 /* HNPost.swift in Sources */, 243 | 97DA10711F81468C00ADF5D8 /* Scanner+ScanBetweenString.swift in Sources */, 244 | 97DA10731F81468C00ADF5D8 /* HNComment.swift in Sources */, 245 | ); 246 | runOnlyForDeploymentPostprocessing = 0; 247 | }; 248 | 97DA105D1F81466300ADF5D8 /* Sources */ = { 249 | isa = PBXSourcesBuildPhase; 250 | buildActionMask = 2147483647; 251 | files = ( 252 | 97DA107B1F8147C100ADF5D8 /* HNPostTest.swift in Sources */, 253 | 97DA107C1F8147C100ADF5D8 /* RessourceFetcherTest.swift in Sources */, 254 | 97DA107A1F8147C100ADF5D8 /* HNScraperTest.swift in Sources */, 255 | 97DA10791F8147C100ADF5D8 /* HNLoginTest.swift in Sources */, 256 | ); 257 | runOnlyForDeploymentPostprocessing = 0; 258 | }; 259 | /* End PBXSourcesBuildPhase section */ 260 | 261 | /* Begin PBXTargetDependency section */ 262 | 97DA10641F81466400ADF5D8 /* PBXTargetDependency */ = { 263 | isa = PBXTargetDependency; 264 | target = 97DA10581F81466300ADF5D8 /* HNScraper */; 265 | targetProxy = 97DA10631F81466400ADF5D8 /* PBXContainerItemProxy */; 266 | }; 267 | /* End PBXTargetDependency section */ 268 | 269 | /* Begin XCBuildConfiguration section */ 270 | 97D0D8561F78EF42007DE08A /* Debug */ = { 271 | isa = XCBuildConfiguration; 272 | buildSettings = { 273 | ALWAYS_SEARCH_USER_PATHS = NO; 274 | CLANG_ANALYZER_NONNULL = YES; 275 | CLANG_ANALYZER_NUMBER_OBJECT_CONVERSION = YES_AGGRESSIVE; 276 | CLANG_CXX_LANGUAGE_STANDARD = "gnu++14"; 277 | CLANG_CXX_LIBRARY = "libc++"; 278 | CLANG_ENABLE_MODULES = YES; 279 | CLANG_ENABLE_OBJC_ARC = YES; 280 | CLANG_WARN_BLOCK_CAPTURE_AUTORELEASING = YES; 281 | CLANG_WARN_BOOL_CONVERSION = YES; 282 | CLANG_WARN_COMMA = YES; 283 | CLANG_WARN_CONSTANT_CONVERSION = YES; 284 | CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR; 285 | CLANG_WARN_DOCUMENTATION_COMMENTS = YES; 286 | CLANG_WARN_EMPTY_BODY = YES; 287 | CLANG_WARN_ENUM_CONVERSION = YES; 288 | CLANG_WARN_INFINITE_RECURSION = YES; 289 | CLANG_WARN_INT_CONVERSION = YES; 290 | CLANG_WARN_NON_LITERAL_NULL_CONVERSION = YES; 291 | CLANG_WARN_OBJC_LITERAL_CONVERSION = YES; 292 | CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR; 293 | CLANG_WARN_RANGE_LOOP_ANALYSIS = YES; 294 | CLANG_WARN_STRICT_PROTOTYPES = YES; 295 | CLANG_WARN_SUSPICIOUS_MOVE = YES; 296 | CLANG_WARN_UNGUARDED_AVAILABILITY = YES_AGGRESSIVE; 297 | CLANG_WARN_UNREACHABLE_CODE = YES; 298 | CLANG_WARN__DUPLICATE_METHOD_MATCH = YES; 299 | CODE_SIGN_IDENTITY = "iPhone Developer"; 300 | COPY_PHASE_STRIP = NO; 301 | DEBUG_INFORMATION_FORMAT = dwarf; 302 | ENABLE_STRICT_OBJC_MSGSEND = YES; 303 | ENABLE_TESTABILITY = YES; 304 | GCC_C_LANGUAGE_STANDARD = gnu11; 305 | GCC_DYNAMIC_NO_PIC = NO; 306 | GCC_NO_COMMON_BLOCKS = YES; 307 | GCC_OPTIMIZATION_LEVEL = 0; 308 | GCC_PREPROCESSOR_DEFINITIONS = ( 309 | "DEBUG=1", 310 | "$(inherited)", 311 | ); 312 | GCC_WARN_64_TO_32_BIT_CONVERSION = YES; 313 | GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR; 314 | GCC_WARN_UNDECLARED_SELECTOR = YES; 315 | GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE; 316 | GCC_WARN_UNUSED_FUNCTION = YES; 317 | GCC_WARN_UNUSED_VARIABLE = YES; 318 | IPHONEOS_DEPLOYMENT_TARGET = 9.1; 319 | MTL_ENABLE_DEBUG_INFO = YES; 320 | ONLY_ACTIVE_ARCH = YES; 321 | SDKROOT = iphoneos; 322 | SWIFT_ACTIVE_COMPILATION_CONDITIONS = DEBUG; 323 | SWIFT_OPTIMIZATION_LEVEL = "-Onone"; 324 | }; 325 | name = Debug; 326 | }; 327 | 97D0D8571F78EF42007DE08A /* Release */ = { 328 | isa = XCBuildConfiguration; 329 | buildSettings = { 330 | ALWAYS_SEARCH_USER_PATHS = NO; 331 | CLANG_ANALYZER_NONNULL = YES; 332 | CLANG_ANALYZER_NUMBER_OBJECT_CONVERSION = YES_AGGRESSIVE; 333 | CLANG_CXX_LANGUAGE_STANDARD = "gnu++14"; 334 | CLANG_CXX_LIBRARY = "libc++"; 335 | CLANG_ENABLE_MODULES = YES; 336 | CLANG_ENABLE_OBJC_ARC = YES; 337 | CLANG_WARN_BLOCK_CAPTURE_AUTORELEASING = YES; 338 | CLANG_WARN_BOOL_CONVERSION = YES; 339 | CLANG_WARN_COMMA = YES; 340 | CLANG_WARN_CONSTANT_CONVERSION = YES; 341 | CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR; 342 | CLANG_WARN_DOCUMENTATION_COMMENTS = YES; 343 | CLANG_WARN_EMPTY_BODY = YES; 344 | CLANG_WARN_ENUM_CONVERSION = YES; 345 | CLANG_WARN_INFINITE_RECURSION = YES; 346 | CLANG_WARN_INT_CONVERSION = YES; 347 | CLANG_WARN_NON_LITERAL_NULL_CONVERSION = YES; 348 | CLANG_WARN_OBJC_LITERAL_CONVERSION = YES; 349 | CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR; 350 | CLANG_WARN_RANGE_LOOP_ANALYSIS = YES; 351 | CLANG_WARN_STRICT_PROTOTYPES = YES; 352 | CLANG_WARN_SUSPICIOUS_MOVE = YES; 353 | CLANG_WARN_UNGUARDED_AVAILABILITY = YES_AGGRESSIVE; 354 | CLANG_WARN_UNREACHABLE_CODE = YES; 355 | CLANG_WARN__DUPLICATE_METHOD_MATCH = YES; 356 | CODE_SIGN_IDENTITY = "iPhone Developer"; 357 | COPY_PHASE_STRIP = NO; 358 | DEBUG_INFORMATION_FORMAT = "dwarf-with-dsym"; 359 | ENABLE_NS_ASSERTIONS = NO; 360 | ENABLE_STRICT_OBJC_MSGSEND = YES; 361 | GCC_C_LANGUAGE_STANDARD = gnu11; 362 | GCC_NO_COMMON_BLOCKS = YES; 363 | GCC_WARN_64_TO_32_BIT_CONVERSION = YES; 364 | GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR; 365 | GCC_WARN_UNDECLARED_SELECTOR = YES; 366 | GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE; 367 | GCC_WARN_UNUSED_FUNCTION = YES; 368 | GCC_WARN_UNUSED_VARIABLE = YES; 369 | IPHONEOS_DEPLOYMENT_TARGET = 9.1; 370 | MTL_ENABLE_DEBUG_INFO = NO; 371 | SDKROOT = iphoneos; 372 | SWIFT_OPTIMIZATION_LEVEL = "-Owholemodule"; 373 | VALIDATE_PRODUCT = YES; 374 | }; 375 | name = Release; 376 | }; 377 | 97DA106B1F81466400ADF5D8 /* Debug */ = { 378 | isa = XCBuildConfiguration; 379 | buildSettings = { 380 | CODE_SIGN_IDENTITY = ""; 381 | CODE_SIGN_STYLE = Automatic; 382 | CURRENT_PROJECT_VERSION = 1; 383 | DEFINES_MODULE = YES; 384 | DEVELOPMENT_TEAM = 5P2WT92MAV; 385 | DYLIB_COMPATIBILITY_VERSION = 1; 386 | DYLIB_CURRENT_VERSION = 1; 387 | DYLIB_INSTALL_NAME_BASE = "@rpath"; 388 | INFOPLIST_FILE = HNScraper/Info.plist; 389 | INSTALL_PATH = "$(LOCAL_LIBRARY_DIR)/Frameworks"; 390 | IPHONEOS_DEPLOYMENT_TARGET = 9.1; 391 | LD_RUNPATH_SEARCH_PATHS = "$(inherited) @executable_path/Frameworks @loader_path/Frameworks"; 392 | PRODUCT_BUNDLE_IDENTIFIER = StephSercu.HNScraper; 393 | PRODUCT_NAME = "$(TARGET_NAME:c99extidentifier)"; 394 | SKIP_INSTALL = NO; 395 | SWIFT_VERSION = 4.0; 396 | TARGETED_DEVICE_FAMILY = "1,2"; 397 | VERSIONING_SYSTEM = "apple-generic"; 398 | VERSION_INFO_PREFIX = ""; 399 | }; 400 | name = Debug; 401 | }; 402 | 97DA106C1F81466400ADF5D8 /* Release */ = { 403 | isa = XCBuildConfiguration; 404 | buildSettings = { 405 | CODE_SIGN_IDENTITY = ""; 406 | CODE_SIGN_STYLE = Automatic; 407 | CURRENT_PROJECT_VERSION = 1; 408 | DEFINES_MODULE = YES; 409 | DEVELOPMENT_TEAM = 5P2WT92MAV; 410 | DYLIB_COMPATIBILITY_VERSION = 1; 411 | DYLIB_CURRENT_VERSION = 1; 412 | DYLIB_INSTALL_NAME_BASE = "@rpath"; 413 | INFOPLIST_FILE = HNScraper/Info.plist; 414 | INSTALL_PATH = "$(LOCAL_LIBRARY_DIR)/Frameworks"; 415 | IPHONEOS_DEPLOYMENT_TARGET = 9.1; 416 | LD_RUNPATH_SEARCH_PATHS = "$(inherited) @executable_path/Frameworks @loader_path/Frameworks"; 417 | PRODUCT_BUNDLE_IDENTIFIER = StephSercu.HNScraper; 418 | PRODUCT_NAME = "$(TARGET_NAME:c99extidentifier)"; 419 | SKIP_INSTALL = NO; 420 | SWIFT_VERSION = 4.0; 421 | TARGETED_DEVICE_FAMILY = "1,2"; 422 | VERSIONING_SYSTEM = "apple-generic"; 423 | VERSION_INFO_PREFIX = ""; 424 | }; 425 | name = Release; 426 | }; 427 | 97DA106E1F81466400ADF5D8 /* Debug */ = { 428 | isa = XCBuildConfiguration; 429 | buildSettings = { 430 | ALWAYS_EMBED_SWIFT_STANDARD_LIBRARIES = YES; 431 | CODE_SIGN_STYLE = Automatic; 432 | DEVELOPMENT_TEAM = 5P2WT92MAV; 433 | INFOPLIST_FILE = HNScraperTests/Info.plist; 434 | LD_RUNPATH_SEARCH_PATHS = "$(inherited) @executable_path/Frameworks @loader_path/Frameworks"; 435 | PRODUCT_BUNDLE_IDENTIFIER = StephSercu.HNScraperTests; 436 | PRODUCT_NAME = "$(TARGET_NAME)"; 437 | SWIFT_VERSION = 4.0; 438 | TARGETED_DEVICE_FAMILY = "1,2"; 439 | }; 440 | name = Debug; 441 | }; 442 | 97DA106F1F81466400ADF5D8 /* Release */ = { 443 | isa = XCBuildConfiguration; 444 | buildSettings = { 445 | ALWAYS_EMBED_SWIFT_STANDARD_LIBRARIES = YES; 446 | CODE_SIGN_STYLE = Automatic; 447 | DEVELOPMENT_TEAM = 5P2WT92MAV; 448 | INFOPLIST_FILE = HNScraperTests/Info.plist; 449 | LD_RUNPATH_SEARCH_PATHS = "$(inherited) @executable_path/Frameworks @loader_path/Frameworks"; 450 | PRODUCT_BUNDLE_IDENTIFIER = StephSercu.HNScraperTests; 451 | PRODUCT_NAME = "$(TARGET_NAME)"; 452 | SWIFT_VERSION = 4.0; 453 | TARGETED_DEVICE_FAMILY = "1,2"; 454 | }; 455 | name = Release; 456 | }; 457 | /* End XCBuildConfiguration section */ 458 | 459 | /* Begin XCConfigurationList section */ 460 | 97D0D8411F78EF42007DE08A /* Build configuration list for PBXProject "HNScraper" */ = { 461 | isa = XCConfigurationList; 462 | buildConfigurations = ( 463 | 97D0D8561F78EF42007DE08A /* Debug */, 464 | 97D0D8571F78EF42007DE08A /* Release */, 465 | ); 466 | defaultConfigurationIsVisible = 0; 467 | defaultConfigurationName = Release; 468 | }; 469 | 97DA106A1F81466400ADF5D8 /* Build configuration list for PBXNativeTarget "HNScraper" */ = { 470 | isa = XCConfigurationList; 471 | buildConfigurations = ( 472 | 97DA106B1F81466400ADF5D8 /* Debug */, 473 | 97DA106C1F81466400ADF5D8 /* Release */, 474 | ); 475 | defaultConfigurationIsVisible = 0; 476 | defaultConfigurationName = Release; 477 | }; 478 | 97DA106D1F81466400ADF5D8 /* Build configuration list for PBXNativeTarget "HNScraperTests" */ = { 479 | isa = XCConfigurationList; 480 | buildConfigurations = ( 481 | 97DA106E1F81466400ADF5D8 /* Debug */, 482 | 97DA106F1F81466400ADF5D8 /* Release */, 483 | ); 484 | defaultConfigurationIsVisible = 0; 485 | defaultConfigurationName = Release; 486 | }; 487 | /* End XCConfigurationList section */ 488 | }; 489 | rootObject = 97D0D83E1F78EF42007DE08A /* Project object */; 490 | } 491 | -------------------------------------------------------------------------------- /HNScraper/HNScraper.swift: -------------------------------------------------------------------------------- 1 | // 2 | // HNScraper.swift 3 | // HackerNews2 4 | // 5 | // Created by Stéphane Sercu on 8/09/17. 6 | // Copyright © 2017 Stéphane Sercu. All rights reserved. 7 | // 8 | 9 | import Foundation 10 | public class HNScraper { 11 | // ================================================== 12 | // MARK: Private members 13 | // ================================================== 14 | private init() {} 15 | 16 | 17 | // TODO: put those canstant in the parsingConfig json file 18 | public static let baseUrl = "https://news.ycombinator.com/" 19 | 20 | // Following variables are used by the parsing function. 21 | // They have to be filled before those function are called. 22 | private var postsHtmlToBeParsed: String? 23 | private var commentsHtmlToBeParsed: String? 24 | 25 | 26 | // ================================================== 27 | // MARK: - types definition 28 | // ================================================== 29 | 30 | /** 31 | * The first parameter is the list of downloaded posts. It can be empty 32 | * in case of error or if no posts were found on the particular page. 33 | * The second parameter is the (relative) link to the next page of the list. 34 | * The third parameter contains the eventual error produced by either the 35 | * request processing or the parsing of the response. 36 | */ 37 | public typealias PostListDownloadCompletionHandler = (([HNPost], String?, HNScraperError?) -> Void) // (list, linkForMore, error) 38 | 39 | /// Supported post list pages 40 | public enum PostListPageName { 41 | /// Home page 42 | case news 43 | // Today's front page 44 | case front 45 | /// Latest submissions 46 | case new 47 | /// Jobs only (new first) 48 | case jobs 49 | /// Asks only (new first) 50 | case asks 51 | /// Shows only (top) 52 | case shows 53 | /// Shows only (latest) 54 | case newshows 55 | /// All news with most active discussion thread first 56 | case active 57 | /// Highest (recent) score 58 | case best 59 | /// More recent, only by new users 60 | case noob 61 | } 62 | 63 | /// Errors thrown by the scraper 64 | public enum HNScraperError: Error { 65 | /// The configuration file is needed but couldn't be downloaded/find locally 66 | case missingOrCorruptedConfigFile 67 | /// When a method fails to parse structured data 68 | case parsingError 69 | /// A specified url is either malformed or point to a non-existing ressource 70 | case invalidURL 71 | /// No internet connection 72 | case noInternet 73 | /// The user isn't logged in while the action he asked needs him to be. 74 | case notLoggedIn 75 | /// No data could be retrieved from the specified location 76 | case noData 77 | /// Problem on server side 78 | case serverUnreachable 79 | /// When the username used to make a request doesn't exist (doesn't apply to login attempts) 80 | case noSuchUser 81 | /// When the post id used to make a request doesn't exist 82 | case noSuchPost 83 | case unknown 84 | 85 | init?(_ error: RessourceFetcher.RessourceFetchingError?) { 86 | if error == nil { 87 | return nil 88 | } 89 | if error == .noIternet { 90 | self = .noInternet 91 | } else if error == .noData { 92 | self = .noData 93 | } else if error == .invalidURL || error == .badHTTPRequest400Range { 94 | self = .invalidURL 95 | } else if error == .serverError500Range || error == .serverUnreachable || error == .securityIssue { 96 | self = .serverUnreachable 97 | } else if error == .parsingError { 98 | self = .parsingError 99 | } else { 100 | self = .unknown 101 | } 102 | } 103 | } 104 | 105 | /// Dictionnary that associates a name of a post list page with its url 106 | let postListPages: [PostListPageName: String] = [.news: baseUrl + "news", 107 | .front: baseUrl + "front", 108 | .new: baseUrl + "newest", 109 | .jobs: baseUrl + "jobs", 110 | .asks: baseUrl + "ask", 111 | .shows: baseUrl + "show", 112 | .newshows: baseUrl + "shownew", 113 | .active: baseUrl + "active", 114 | .best: baseUrl + "best", 115 | .noob: baseUrl + "noobstories"] 116 | 117 | 118 | 119 | public static let shared = HNScraper() 120 | 121 | 122 | 123 | // ================================================== 124 | // MARK: - Download list of posts 125 | // ================================================== 126 | 127 | /** 128 | Fetch the parseConfig file and the html of a page containing a 129 | list of hn posts. Parse this page and build a list a HNPost objects. 130 | - paramaters: 131 | - page: the list to download 132 | - completion: handler called with the list of HNPosts as parameter when completed 133 | */ 134 | public func getPostsList(page: PostListPageName, completion: @escaping PostListDownloadCompletionHandler) { 135 | let url: String! = postListPages[page]! 136 | self.getPostsList(url: url, completion: completion) 137 | 138 | } 139 | /** 140 | Fetch the parseConfig file and the html of a page containing a 141 | list of hn posts. Parse this page and build a list a HNPost objects. 142 | - parameters: 143 | - url: the url of the page to download and parse 144 | - completion: handler called with the list of HNPosts as parameter when completed 145 | */ 146 | private func getPostsList(url: String, completion: @escaping PostListDownloadCompletionHandler) { 147 | self.getHtmlAndParsingConfig(url: url) { (html, error) in 148 | if html == nil { 149 | completion([], nil, error ?? .noData) 150 | return 151 | } 152 | self.postsHtmlToBeParsed = html 153 | self.parseDownloadedPosts(completion: completion) 154 | } 155 | } 156 | 157 | /** 158 | Fetches the discussion page of a post and the parseConfig file. Parses the webpage and builds an HNPost object from it. 159 | - parameters: 160 | - id: the id of the post to retrieve 161 | - completion: handler called with the HNPost and the associated comments as parameter when completed 162 | */ 163 | public func getPost(ById id: String, buildHierarchy: Bool = true, completion: @escaping ((HNPost?, [HNComment], HNScraperError?) -> Void)) { 164 | self.getHtmlAndParsingConfig(url: HNScraper.baseUrl + "item?id=\(id)") { (html, error) in 165 | if html == nil { 166 | completion(nil, [], error ?? .noData) 167 | return 168 | } 169 | if html! == "No such item." { 170 | completion(nil, [], .noSuchPost) 171 | return 172 | } 173 | guard let parseConfig = HNParseConfig.shared.data else { 174 | completion(nil, [], error ?? .missingOrCorruptedConfigFile) 175 | return 176 | } 177 | self.commentsHtmlToBeParsed = html 178 | if let post = HNPost(fromHtml: html!, withParsingConfig: parseConfig) { 179 | self.parseDownloadedComments(ForPost: post, buildHierarchy: buildHierarchy, completion: { post, comments, linkFormore, error -> Void in completion(post, comments, error) }) 180 | } else { 181 | completion(nil, [], .parsingError) 182 | } 183 | 184 | } 185 | } 186 | 187 | /** 188 | Download the a page's html and pass it to the completion handler 189 | - parameters: 190 | - url: the url of the page 191 | - completion: the closure called with the html content as 192 | parameter when the download is completed 193 | */ 194 | private func downloadHtmlPage(urlString: String, cookie: HTTPCookie? = nil, completion: @escaping ((String?, HNScraperError?) -> Void)) { 195 | RessourceFetcher.shared.fetchData(urlString: urlString, completion: {(data, error) -> Void in 196 | if data == nil { 197 | completion(nil, HNScraperError(error) ?? .noData) 198 | } else { 199 | if let decodedHtml = String(data: data!, encoding: .utf8) { 200 | completion(decodedHtml, HNScraperError(error)) 201 | } else { 202 | completion(nil, HNScraperError(error) ?? .parsingError) 203 | } 204 | } 205 | 206 | }) 207 | } 208 | 209 | public func testPostFromDiscussionThread(urlString: String, completion: @escaping ((String?, HNScraperError?) -> Void)) { 210 | self.getHtmlAndParsingConfig(url: urlString, completion: completion) 211 | } 212 | 213 | /** 214 | * Parse the html of a list of posts, contained in the 215 | * var postsHtmlToBeParsed, turn it into a list 216 | * of HNPosts and passes it to the completion handler 217 | * - Note: This method needs the configFile to be accessible 218 | * and the postsHtmlToBeParsed variable to be correctly 219 | * filled. For better error management, you may want 220 | * check that before calling it. 221 | */ 222 | private func parseDownloadedPosts(completion: PostListDownloadCompletionHandler) { 223 | let parseConfig = HNParseConfig.shared.data 224 | let html = self.postsHtmlToBeParsed 225 | if html == nil { 226 | completion([], nil, .noData) 227 | return 228 | } 229 | if parseConfig == nil { 230 | completion([], nil, .missingOrCorruptedConfigFile) 231 | return 232 | } 233 | 234 | var postAr: [HNPost] = [] // stores the results 235 | var linkForMore: String? = nil // link to next page ofthe list 236 | var postsConfig: [String : Any]? = (parseConfig != nil && parseConfig!["Post"] != nil) ? parseConfig!["Post"] as? [String : Any] : nil 237 | var htmlComponents: Array = [] 238 | if postsConfig != nil && postsConfig!["CS"] != nil { 239 | htmlComponents = html!.components(separatedBy: postsConfig!["CS"] as! String) 240 | } else { 241 | completion([], nil, .missingOrCorruptedConfigFile) 242 | return 243 | } 244 | 245 | if htmlComponents.count == 0 { 246 | completion([], nil, nil) 247 | return 248 | } 249 | 250 | var htmlComponentCounter = 0 251 | htmlComponents.remove(at: 0) 252 | for htmlComponent in htmlComponents { 253 | 254 | if let newPost = HNPost(fromHtml: htmlComponent, withParsingConfig: parseConfig!) { 255 | postAr.append(newPost) 256 | } else { 257 | // TODO: better logging 258 | print("There was an error while parsing a downloaded post.") // returns a parsingError only if all the components fail to be parsed. 259 | } 260 | 261 | // If last item of the page, try to grab the link for next page. 262 | if (htmlComponentCounter == htmlComponents.count - 1) { 263 | linkForMore = parseLinkForMoreFromPostsList(html: htmlComponent, withParsingConfig: parseConfig!) 264 | } 265 | 266 | htmlComponentCounter += 1 267 | } 268 | if postAr.count == 0 && htmlComponents.count > 0{ 269 | completion([], linkForMore, .parsingError) 270 | } else { 271 | completion(postAr, linkForMore, nil) 272 | } 273 | } 274 | 275 | /// Fetch the next page of a post list page using the "link for more" provided by the getPostsList method. 276 | public func getMoreItems(linkForMore: String, completionHandler: @escaping PostListDownloadCompletionHandler) { 277 | self.getPostsList(url: HNScraper.baseUrl + linkForMore, completion: completionHandler) 278 | } 279 | 280 | public func getMoreComments(linkForMore: String, completionHandler: @escaping (([HNComment], String?, HNScraperError?) -> Void)) { 281 | self.getComments(FromURl: HNScraper.baseUrl + linkForMore, completion: completionHandler) 282 | } 283 | 284 | /** 285 | * Parse the last part of the html page of the posts/coments list to find the link to the next page. 286 | * - parameters: 287 | * - html: the part of the html of the list page containing the link to the next page. 288 | * - parseConfigS: the identifying string just before the link (found in the parseConfig file) 289 | * - parseConfigE: the identifying string just after the link (found in the parseConfig file) 290 | * - note: In case of an error, it will just return nil, no error is reported. 291 | */ 292 | private func parseLinkForMore(html: String, parseConfigS: String, parseConfigE: String) -> String? { 293 | let scanner: Scanner = Scanner(string: html) 294 | var trash: NSString? = "" 295 | 296 | scanner.scanUpTo(parseConfigS, into: &trash) 297 | var linkForMore: NSString? = "" 298 | scanner.scanString(parseConfigS, into: &trash) 299 | scanner.scanUpTo(parseConfigE, into: &linkForMore) 300 | var finalLinkForMore = (linkForMore?.replacingOccurrences(of: "/", with: ""))! 301 | finalLinkForMore = finalLinkForMore.replacingOccurrences(of: "&", with: "&") 302 | return finalLinkForMore 303 | } 304 | /** 305 | * Parse the last part of the html page of the posts list to find the link to the next page. 306 | * - parameters: 307 | * - html: the part of the html of the list page containing the link to the next page. 308 | * - note: In case of an error, it will just return nil, no error is reported. 309 | */ 310 | private func parseLinkForMoreFromPostsList(html: String, withParsingConfig parseConfig: [String: Any]) -> String? { 311 | let postsConfig: [String : Any]? = parseConfig["Post"] as? [String: Any] 312 | if postsConfig == nil { 313 | return nil 314 | } 315 | var linkConfig: [String : String]? = postsConfig!["LinkForMore"] as? [String: String] 316 | if linkConfig == nil { 317 | return nil 318 | } 319 | guard let parseConfigS = linkConfig!["S"], let parseConfigE = linkConfig!["E"] else { 320 | return nil 321 | } 322 | return parseLinkForMore(html: html, parseConfigS: parseConfigS, parseConfigE: parseConfigE) 323 | } 324 | 325 | /** 326 | * Parse the last part of a list of comment (typically the list of comments submitted by a user) to find the "More" link 327 | * - Parameters: 328 | * - html: the part of the html of the list page containing the link to the next page. 329 | * - parseConfigS 330 | */ 331 | private func parseLinkForMoreFromCommentsList(html: String, withParsingConfig parseConfig: [String: Any]) -> String? { 332 | let postsConfig: [String : Any]? = parseConfig["Comment"] as? [String: Any] 333 | if postsConfig == nil { 334 | return nil 335 | } 336 | var linkConfig: [String : String]? = postsConfig!["LinkForMore"] as? [String: String] 337 | if linkConfig == nil { 338 | return nil 339 | } 340 | guard let parseConfigS = linkConfig!["S"], let parseConfigE = linkConfig!["E"] else { 341 | return nil 342 | } 343 | return parseLinkForMore(html: html, parseConfigS: parseConfigS, parseConfigE: parseConfigE) 344 | } 345 | 346 | /** 347 | Download the html page pointed by the specified url and 348 | retrieve the parsing configuration json file symultaniously, 349 | then call the completion handler when the two actions are 350 | completed. 351 | This method is usefull to any other method that needs to 352 | download a webpage and parse it useing the configuration file. 353 | */ 354 | private func getHtmlAndParsingConfig(url: String, completion: @escaping ((String?, HNScraperError?) -> Void)) { 355 | let group = DispatchGroup() 356 | var _html: String? 357 | var parsingError: HNScraperError? 358 | // Fetch the page 359 | group.enter() 360 | downloadHtmlPage(urlString: url, completion: {(html, error) -> Void in 361 | parsingError = error 362 | _html = html 363 | group.leave() 364 | }) 365 | // Check for the parsing configuration data. If not locally found, download it 366 | if HNParseConfig.shared.data == nil { 367 | group.enter() 368 | HNParseConfig.shared.getDictionnary(completion: {(config, error) -> Void in 369 | group.leave() // TODO: what if an error occurs here? 370 | }) 371 | } 372 | 373 | // Call the completion handler when the two files are downloaded 374 | group.notify(queue: .main) { 375 | completion(_html, parsingError) 376 | } 377 | 378 | } 379 | 380 | 381 | // ================================================== 382 | // MARK: - Download discussion threads 383 | // ================================================== 384 | 385 | /// - Note: this is an alias for the method `getPost(ById:buildHierarchy:completion)` 386 | public func getComments(ByPostId postId: String, buildHierarchy: Bool = true, completion: @escaping ((HNPost?, [HNComment], HNScraperError?) -> Void)) { 387 | self.getPost(ById: postId, buildHierarchy: buildHierarchy, completion: completion) 388 | } 389 | 390 | private func getComments(FromURl url: String, buildHierarchy: Bool = true, completion: @escaping (([HNComment], String?, HNScraperError?) -> Void)) { 391 | getHtmlAndParsingConfig(url: url, completion: { html, error -> Void in 392 | if html == nil { 393 | completion([], nil, error ?? .noData) 394 | return 395 | } 396 | self.commentsHtmlToBeParsed = html 397 | self.parseDownloadedComments(ForPost: HNPost(), buildHierarchy: buildHierarchy, completion: {(post, comments, linkForMore, error) in 398 | completion(comments, linkForMore, error) 399 | }) 400 | }) 401 | } 402 | 403 | /** 404 | Fetches the comments assiciated to the specified post. 405 | - parameters: 406 | - post: the post to retrieve the comments for 407 | - buildHierarchy: indicates if the comments must be nested or must all be placed at the root of the array 408 | - Note: the type of the post has to be specified in order to handle a askHN or a job correctly 409 | */ 410 | public func getComments(ForPost post: HNPost, buildHierarchy: Bool = true, completion: @escaping ((HNPost, [HNComment], HNScraperError?) -> Void)) { 411 | let url = HNScraper.baseUrl + "item?id=\(post.id)" 412 | 413 | getHtmlAndParsingConfig(url: url, completion: { html, error -> Void in 414 | if html == nil { 415 | completion(post, [], error ?? .noData) 416 | return 417 | } 418 | if html! == "No such item." { 419 | completion(post, [], .noSuchPost) 420 | return 421 | } 422 | self.commentsHtmlToBeParsed = html 423 | self.parseDownloadedComments(ForPost: post, buildHierarchy: buildHierarchy, completion: { post, comments, linkFormore, error -> Void in completion(post, comments, error) }) 424 | }) 425 | } 426 | 427 | // TODO: That method is ugly, That method is ugly, That method's ugly, 'at method's ugly, thod's ugly, thod's gly, thod's y, Hodor 428 | private func parseDownloadedComments(ForPost post:HNPost, buildHierarchy: Bool = true, completion: ((HNPost, [HNComment], String?, HNScraperError?) -> Void)) { 429 | let parseConfig = HNParseConfig.shared.data 430 | let html = self.commentsHtmlToBeParsed 431 | if html == nil { 432 | completion(post, [], nil, .noData) 433 | return 434 | } 435 | if parseConfig == nil { 436 | completion(post, [], nil, .missingOrCorruptedConfigFile) 437 | return 438 | } 439 | 440 | var rootComments: [HNComment] = [] // parsed comments in hierarchical form 441 | var allComments: [HNComment] = [] // parsed comments in linear form 442 | var lastCommentByLevel: [Int: HNComment] = [:] // Last parsed comment for each level, used to find the parent of a comment 443 | 444 | 445 | // Set Up 446 | var commentDict: [String : Any]? = (parseConfig != nil && parseConfig!["Comment"] != nil) ? parseConfig!["Comment"] as? [String: Any] : nil 447 | if (commentDict == nil) { 448 | completion(post, [], nil, .missingOrCorruptedConfigFile) 449 | return 450 | } 451 | 452 | var htmlComponents = commentDict!["CS"] != nil ? html!.components(separatedBy: commentDict!["CS"] as! String) : nil 453 | if (htmlComponents == nil) { 454 | completion(post, [], nil, .missingOrCorruptedConfigFile) 455 | return 456 | } 457 | 458 | 459 | if commentDict!["Reply"] != nil && (commentDict!["Reply"] as! [String: Any])["R"] != nil && html!.contains((commentDict!["Reply"] as! [String: Any])["R"]! as! String) { 460 | var cDict: [String: Any] = [:] 461 | let scanner = Scanner(string: html!) 462 | 463 | let parts = (commentDict!["Reply"] as! [String: Any])["Parts"] as! [[String : Any]] 464 | for part in parts { 465 | var new: NSString? = "" 466 | let isTrash = part["I"] as! String == "TRASH" 467 | scanner.scanBetweenString(stringA: part["S"] as! String, stringB: part["E"] as! String, into: &new) 468 | if (!isTrash && (new?.length)! > 0) { 469 | cDict[part["I"] as! String] = new 470 | } 471 | 472 | } 473 | post.replyAction = cDict["action"] as? String ?? "" 474 | post.replyParent = cDict["parent"] as? String ?? "" 475 | post.replyHmac = cDict["hmac"] as? String ?? "" 476 | post.replyText = cDict["replyText"] as? String ?? "" 477 | post.replyGoto = cDict["goto"] as? String ?? "" 478 | } 479 | 480 | // For a post of type Job or Ask, the first and only rootComment will be the question/job itself 481 | if post.type == .askHN { 482 | if let newComment = HNComment.parseAskHNComment(html: htmlComponents![0], withParsingConfig: parseConfig!) { 483 | allComments.append(newComment) 484 | rootComments.append(newComment) 485 | lastCommentByLevel[0] = newComment 486 | } else { 487 | print("error parsing AskHN comment") 488 | completion(post, [], nil, .parsingError) 489 | return 490 | } 491 | 492 | } 493 | 494 | if post.type == .jobs { 495 | if let newComment = HNComment.parseJobComment(html: htmlComponents![0], withParsingConfig: parseConfig!) { 496 | allComments.append(newComment) 497 | rootComments.append(newComment) 498 | lastCommentByLevel[0] = newComment 499 | } else { 500 | print("error parsing Job comment") 501 | completion(post, [], nil, .parsingError) 502 | return 503 | } 504 | 505 | } 506 | 507 | var linkForMore : String? 508 | // 1st object is garbage. 509 | htmlComponents?.remove(at: 0) 510 | for (index, htmlComponent) in htmlComponents!.enumerated() { 511 | 512 | if let newComment = HNComment(fromHtml: htmlComponent, withParsingConfig: parseConfig!, levelOffset: (post.type == .jobs || post.type == .askHN) ? 1 : 0) { 513 | if newComment.level == 0 { // If root comment 514 | rootComments.append(newComment) 515 | } else { // looking for parent 516 | if let parent = lastCommentByLevel[newComment.level-1] { 517 | newComment.replyTo = parent 518 | parent.addReply(newComment) 519 | } 520 | } 521 | allComments.append(newComment) 522 | lastCommentByLevel[newComment.level] = newComment 523 | 524 | // If last item of the page, try to grab the link for next page. 525 | if index == htmlComponents!.count - 1 { 526 | linkForMore = parseLinkForMoreFromCommentsList(html: htmlComponent, withParsingConfig: parseConfig!) 527 | } 528 | } else { 529 | print("error parsing comment") 530 | } 531 | 532 | 533 | } 534 | // TODO: return error if every comment fail to be parsed (i.e. htmlComponents.count> 0 && comments.count == 0) 535 | if buildHierarchy { 536 | completion(post, rootComments, linkForMore, nil) 537 | } else { 538 | completion(post, allComments, linkForMore, nil) 539 | } 540 | 541 | 542 | } 543 | 544 | 545 | // ================================================== 546 | // MARK: - Download User specifi data 547 | // ================================================== 548 | 549 | /* 550 | Fetch the list of posts submited by the user with 551 | the specified username and pass it to the completion 552 | handler when done. 553 | */ 554 | public func getSubmissions(ForUserWithUsername username: String, completion: @escaping PostListDownloadCompletionHandler) { 555 | let url = HNScraper.baseUrl + "submitted?id=\(username)" 556 | getUserSpecificPostList(urlString: url, completion: completion) 557 | } 558 | 559 | /* 560 | Fetch the list of posts favorited by the user with 561 | the specified username and pass it to the completion 562 | handler when done. 563 | */ 564 | public func getFavorites(ForUserWithUsername username: String, completion: @escaping PostListDownloadCompletionHandler) { 565 | let url = HNScraper.baseUrl + "favorites?id=\(username)" 566 | getUserSpecificPostList(urlString: url, completion: completion) 567 | } 568 | 569 | /* 570 | Check that the user exists and fetch the list of submission/favorite (according to specified url) 571 | */ 572 | private func getUserSpecificPostList(urlString: String, completion: @escaping PostListDownloadCompletionHandler) { 573 | getHtmlAndParsingConfig(url: urlString, completion: { html, error -> Void in 574 | if html == nil { 575 | completion([], nil, error ?? .noData) 576 | return 577 | } 578 | if html! == "No such user." { 579 | completion([], nil, .noSuchUser) 580 | return 581 | } 582 | self.postsHtmlToBeParsed = html 583 | self.parseDownloadedPosts(completion: completion) 584 | }) 585 | } 586 | 587 | 588 | /* 589 | Fetch the list of comments written by the user with 590 | the specified username and pass it to the completion 591 | handler when done. 592 | */ 593 | public func getComments(ForUserWithUsername username: String, completion: @escaping (([HNComment], String?, HNScraperError?) -> Void)) { 594 | let url = HNScraper.baseUrl + "threads?id=\(username)" 595 | getComments(FromURl: url, completion: completion)/* 596 | getHtmlAndParsingConfig(url: url, completion: { html, error -> Void in 597 | if html == nil { 598 | completion([], nil, error ?? .noData) 599 | return 600 | } 601 | self.commentsHtmlToBeParsed = html 602 | self.parseDownloadedComments(ForPost: HNPost(), completion: { (post, comments, linkForMore, error) in 603 | completion(comments, linkForMore, error) 604 | }) 605 | })*/ 606 | } // TODO 607 | 608 | // ================================================== 609 | // MARK: - Actions on posts/comments 610 | // ================================================== 611 | 612 | 613 | private func voteOnHNObject(AtUrl urlString: String, objectId: String, up: Bool, completion: @escaping ((HNScraperError?) -> Void)) { 614 | if !HNLogin.shared.isLoggedIn() { 615 | completion(.notLoggedIn) 616 | return 617 | } 618 | // The upvote url is change so that the goto parameter points to the post page. 619 | // That allows the verification to work even if the link is old and that the post isn't anymore on the page pointed by goto. 620 | var urlComponent = URLComponents(string: urlString) 621 | for (index, param) in (urlComponent?.queryItems ?? []).enumerated() { 622 | if param.name == "goto" { 623 | urlComponent?.queryItems![index].value = "item?id=" + objectId 624 | } 625 | } 626 | if let newUrlString = urlComponent?.string { 627 | downloadHtmlPage(urlString: newUrlString, cookie: HNLogin.shared.sessionCookie, completion: { html, error -> Void in 628 | if html == nil { 629 | completion(error ?? .noData) 630 | } else { 631 | if html!.contains(" Void)) { 713 | if !HNLogin.shared.isLoggedIn() { 714 | completion(.notLoggedIn) 715 | return 716 | } 717 | if post.upvoteAdditionURL != nil { 718 | let url = HNScraper.baseUrl + post.upvoteAdditionURL!.replacingOccurrences(of: "&", with: "&").replacingOccurrences(of: "how=up", with: "un=t").replacingOccurrences(of: "vote?id=", with: "fave?id=") 719 | 720 | downloadHtmlPage(urlString: url, cookie: HNLogin.shared.sessionCookie, completion: { html, error -> Void in 721 | // The id of the unfavorited post must be absent from the html 722 | if html == nil { 723 | completion(error ?? .noData) 724 | } else { 725 | if !html!.contains("id='" + post.id + "'") { 726 | completion(nil) 727 | } else { 728 | completion(error ?? .unknown) 729 | } 730 | } 731 | }) 732 | } else { 733 | completion(.invalidURL) 734 | } 735 | 736 | } 737 | 738 | public func getUserFrom(Username username: String, completion: ((HNUser?, HNScraperError?) -> Void)?) { 739 | getHtmlAndParsingConfig(url: HNScraper.baseUrl + "user?id=" + username, completion: { html, error -> Void in 740 | if html == nil { 741 | completion?(nil, error ?? .noData) 742 | return 743 | } 744 | if HNParseConfig.shared.data == nil { 745 | completion?(nil, .missingOrCorruptedConfigFile) 746 | return 747 | } 748 | if html!.contains("No such user.") { 749 | completion?(nil, .noSuchUser) 750 | return 751 | } 752 | completion?(HNUser(fromHtml: html!, withParsingConfig: HNParseConfig.shared.data!), error) 753 | }) 754 | } 755 | 756 | 757 | } 758 | --------------------------------------------------------------------------------