├── .gitignore ├── Sources ├── LinkPreview │ ├── URL+Extensions.swift │ ├── Processors │ │ ├── MetadataProcessor.swift │ │ ├── OpenGraphProcessor.swift │ │ ├── WikipediaAPIProcessor.swift │ │ └── GenericHTMLProcessor.swift │ ├── LinkPreviewURLRequest.swift │ ├── LinkPreviewMetadata.swift │ ├── LinkPreviewProvider.swift │ └── LinkPreview.swift └── LinkPreviewCLI │ └── Main.swift ├── .github └── workflows │ └── swift.yml ├── LICENSE ├── Package.swift ├── README.md ├── Tests └── LinkPreviewTests │ └── LinkPreviewTests.swift └── Package.resolved /.gitignore: -------------------------------------------------------------------------------- 1 | .DS_Store 2 | /.build 3 | /Packages 4 | xcuserdata/ 5 | DerivedData/ 6 | .swiftpm 7 | .netrc 8 | -------------------------------------------------------------------------------- /Sources/LinkPreview/URL+Extensions.swift: -------------------------------------------------------------------------------- 1 | // 2 | // URL+Extensions.swift 3 | // LinkPreview 4 | // 5 | // Created by Harlan Haskins on 2/9/25. 6 | // 7 | import Foundation 8 | 9 | extension URL { 10 | var baseHostName: String? { 11 | host?.split(separator: ".").suffix(2).joined(separator: ".") 12 | } 13 | } 14 | -------------------------------------------------------------------------------- /Sources/LinkPreview/Processors/MetadataProcessor.swift: -------------------------------------------------------------------------------- 1 | // 2 | // MetadataProcessor.swift 3 | // LinkPreview 4 | // 5 | // Created by Harlan Haskins on 2/5/25. 6 | // 7 | 8 | public import Foundation 9 | public import SwiftSoup 10 | 11 | public struct MetadataProcessingOptions: Sendable { 12 | public var allowAdditionalRequests: Bool = true 13 | public init() {} 14 | } 15 | 16 | public protocol MetadataProcessor { 17 | static func updateLinkPreview( 18 | _ preview: inout LinkPreview, 19 | for url: URL, 20 | document: Document?, 21 | options: MetadataProcessingOptions 22 | ) async 23 | } 24 | -------------------------------------------------------------------------------- /.github/workflows/swift.yml: -------------------------------------------------------------------------------- 1 | 2 | # This workflow will build a Swift project 3 | # For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-swift 4 | 5 | name: Swift 6 | 7 | on: 8 | push: 9 | branches: [ "main" ] 10 | pull_request: 11 | branches: [ "main" ] 12 | 13 | jobs: 14 | build: 15 | name: LinkPreview on ${{ matrix.os }} 16 | strategy: 17 | matrix: 18 | os: [ubuntu-latest, macos-latest] 19 | swift: ["6.0"] 20 | runs-on: ${{ matrix.os }} 21 | steps: 22 | - uses: swift-actions/setup-swift@v2.2.0 23 | with: 24 | swift-version: ${{ matrix.swift }} 25 | - uses: actions/checkout@v4 26 | - name: Build 27 | run: swift build -v 28 | - name: Run tests 29 | run: swift test -v -------------------------------------------------------------------------------- /Sources/LinkPreviewCLI/Main.swift: -------------------------------------------------------------------------------- 1 | // 2 | // Main.swift 3 | // LinkPreview 4 | // 5 | // Created by Harlan Haskins on 2/5/25. 6 | // 7 | 8 | import ArgumentParser 9 | import Foundation 10 | import LinkPreview 11 | 12 | @main 13 | struct LinkPreviewCLI: AsyncParsableCommand { 14 | enum Error: Swift.Error { 15 | case invalidURL 16 | } 17 | @Argument(help: "The URL to print metadata for") 18 | var url: String 19 | 20 | mutating func run() async throws { 21 | var urlString = url 22 | if !urlString.hasPrefix("http") { 23 | urlString = "https://\(urlString)" 24 | } 25 | guard let url = URL(string: urlString) else { 26 | throw Error.invalidURL 27 | } 28 | 29 | let provider = LinkPreviewProvider() 30 | let preview = try await provider.load(from: url) 31 | print(preview.debugDescription) 32 | } 33 | } 34 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2025 Harlan Haskins 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. -------------------------------------------------------------------------------- /Package.swift: -------------------------------------------------------------------------------- 1 | // swift-tools-version: 6.0 2 | 3 | import PackageDescription 4 | 5 | let package = Package( 6 | name: "LinkPreview", 7 | platforms: [.iOS(.v13), .macOS(.v11)], 8 | products: [ 9 | .library(name: "LinkPreview", targets: ["LinkPreview"]), 10 | .executable(name: "linkpreviewcli", targets: ["LinkPreviewCLI"]) 11 | ], 12 | dependencies: [ 13 | .package(url: "https://github.com/scinfu/SwiftSoup.git", from: "2.6.0"), 14 | .package(url: "https://github.com/apple/swift-argument-parser", from: "1.3.0"), 15 | .package(url: "https://github.com/swift-server/async-http-client", from: "1.24.0") 16 | ], 17 | targets: [ 18 | .target( 19 | name: "LinkPreview", 20 | dependencies: [ 21 | "SwiftSoup", 22 | .product(name: "AsyncHTTPClient", package: "async-http-client") 23 | ], 24 | swiftSettings: [ 25 | .enableUpcomingFeature("InternalImportsByDefault") 26 | ] 27 | ), 28 | .executableTarget(name: "LinkPreviewCLI", dependencies: [ 29 | "LinkPreview", 30 | .product(name: "ArgumentParser", package: "swift-argument-parser") 31 | ]), 32 | .testTarget(name: "LinkPreviewTests", dependencies: ["LinkPreview"]) 33 | ] 34 | ) 35 | -------------------------------------------------------------------------------- /Sources/LinkPreview/LinkPreviewURLRequest.swift: -------------------------------------------------------------------------------- 1 | // 2 | // File.swift 3 | // LinkPreview 4 | // 5 | // Created by Harlan Haskins on 2/9/25. 6 | // 7 | 8 | import Foundation 9 | import AsyncHTTPClient 10 | 11 | struct LinkPreviewURLRequest { 12 | enum Output { 13 | case html(Data) 14 | case fileURL(URL, String) 15 | } 16 | var request: HTTPClientRequest 17 | let url: URL 18 | 19 | init(url: URL) { 20 | self.url = url 21 | self.request = .init(url: url.absoluteString) 22 | } 23 | 24 | mutating func setValue(_ value: String, forHTTPHeaderField field: String) { 25 | if !request.headers.contains(name: field) { 26 | request.headers.add(name: field, value: value) 27 | } 28 | } 29 | 30 | func load() async throws -> Output { 31 | let response = try await HTTPClient.shared.execute(request, timeout: .seconds(5)) 32 | guard response.status == .ok else { 33 | throw LinkPreviewError.unsuccessfulHTTPStatus(Int(response.status.code), response) 34 | } 35 | 36 | let contentTypes = response.headers["Content-Type"] 37 | for contentType in contentTypes { 38 | let isHTML = contentType.localizedCaseInsensitiveContains("text/html") 39 | 40 | if isHTML { 41 | let body = try await response.body.collect(upTo: 10 * 1024 * 1024) // 10 MB 42 | return .html(Data(body.readableBytesView)) 43 | } 44 | 45 | return .fileURL(url, contentType) 46 | } 47 | 48 | let contentTypeString = contentTypes.joined(separator: ", ") 49 | throw LinkPreviewError.unableToHandleContentType(contentTypeString, response) 50 | } 51 | } 52 | -------------------------------------------------------------------------------- /Sources/LinkPreview/Processors/OpenGraphProcessor.swift: -------------------------------------------------------------------------------- 1 | // 2 | // OpenGraphProcessor.swift 3 | // LinkPreview 4 | // 5 | // Created by Harlan Haskins on 2/5/25. 6 | // 7 | 8 | public import Foundation 9 | public import SwiftSoup 10 | 11 | public enum OpenGraphProcessor: MetadataProcessor { 12 | public static func updateLinkPreview( 13 | _ preview: inout LinkPreview, 14 | for url: URL, 15 | document: Document?, 16 | options: MetadataProcessingOptions 17 | ) async { 18 | guard let document else { 19 | return 20 | } 21 | let metaTags = try? document.select("meta") 22 | for metaTag in metaTags?.array() ?? [] { 23 | let propertyTag = try? metaTag.attr("property") 24 | let nameTag = try? metaTag.attr("name") 25 | guard let propertyNameTag = propertyTag?.nonEmpty ?? nameTag else { 26 | continue 27 | } 28 | var components = propertyNameTag.split(separator: ":") 29 | if components.count < 2 { continue } 30 | 31 | var isOpenGraph = false 32 | if components.first == "og" { 33 | isOpenGraph = true 34 | components.removeFirst() 35 | } 36 | 37 | guard let content = try? metaTag.attr("content") else { 38 | continue 39 | } 40 | let name = String(components.removeFirst()) 41 | var property = preview.properties[name, default: .init(name: name)] 42 | 43 | // Ignore redundant values, but treat OpenGraph data as authoritative. 44 | if components.isEmpty { 45 | if property.content == nil || isOpenGraph { 46 | property.content = content 47 | } 48 | } else { 49 | let metadataName = String(components.removeFirst()) 50 | if property.metadata[metadataName] == nil || isOpenGraph { 51 | property.metadata[metadataName] = content 52 | } 53 | } 54 | 55 | preview.properties[name] = property 56 | } 57 | } 58 | } 59 | 60 | extension String { 61 | var nonEmpty: String? { 62 | isEmpty ? nil : self 63 | } 64 | } 65 | -------------------------------------------------------------------------------- /Sources/LinkPreview/LinkPreviewMetadata.swift: -------------------------------------------------------------------------------- 1 | // 2 | // LinkPreviewProperty.swift 3 | // LinkPreview 4 | // 5 | // Created by Harlan Haskins on 2/5/25. 6 | // 7 | 8 | public import Foundation 9 | import SwiftSoup 10 | 11 | public struct LinkPreviewPropertyName: Sendable, Equatable { 12 | public var rawValue: String 13 | public init(_ rawValue: String) { 14 | self.rawValue = rawValue 15 | } 16 | 17 | public static var description: LinkPreviewPropertyName { .init("description") } 18 | public static var title: LinkPreviewPropertyName { .init("title") } 19 | public static var canonicalURL: LinkPreviewPropertyName { .init("url") } 20 | public static var imageURL: LinkPreviewPropertyName { .init("image") } 21 | public static var videoURL: LinkPreviewPropertyName { .init("video") } 22 | public static var audioURL: LinkPreviewPropertyName { .init("audio") } 23 | public static var siteName: LinkPreviewPropertyName { .init("site_name") } 24 | 25 | public static var faviconURL: LinkPreviewPropertyName { .init("icon") } 26 | } 27 | 28 | /// A property within a link preview, including any associated metadata. 29 | public struct LinkPreviewProperty: Sendable { 30 | /// The name of the property ("title", "description", etc). 31 | public var name: String 32 | 33 | /// The content extracted from the page for this property. 34 | public var content: String? 35 | 36 | /// Any sub-properties associated with this property. 37 | public var metadata: [String: String] = [:] 38 | 39 | mutating func merge(with property: LinkPreviewProperty) { 40 | if let content = property.content { 41 | self.content = content 42 | } 43 | 44 | for (key, value) in property.metadata { 45 | metadata[key] = value 46 | } 47 | } 48 | } 49 | 50 | public protocol LinkPreviewPropertyValue: Sendable { 51 | init?(content: String, at url: URL) 52 | var content: String { get } 53 | } 54 | 55 | extension String: LinkPreviewPropertyValue { 56 | public init?(content: String, at url: URL) { 57 | self = content 58 | } 59 | public var content: String { 60 | self 61 | } 62 | } 63 | 64 | extension Int: LinkPreviewPropertyValue { 65 | public init?(content: String, at url: URL) { 66 | self.init(content) 67 | } 68 | public var content: String { 69 | "\(self)" 70 | } 71 | } 72 | 73 | extension UInt: LinkPreviewPropertyValue { 74 | public init?(content: String, at url: URL) { 75 | self.init(content) 76 | } 77 | public var content: String { 78 | "\(self)" 79 | } 80 | } 81 | 82 | extension URL: LinkPreviewPropertyValue { 83 | public init?(content: String, at url: URL) { 84 | self.init(string: content, relativeTo: url.rootURL) 85 | } 86 | var rootURL: URL { 87 | guard var components = URLComponents(url: self, resolvingAgainstBaseURL: true) else { 88 | return self 89 | } 90 | components.path = "" 91 | return components.url ?? self 92 | } 93 | public var content: String { 94 | absoluteString 95 | } 96 | } 97 | -------------------------------------------------------------------------------- /Sources/LinkPreview/Processors/WikipediaAPIProcessor.swift: -------------------------------------------------------------------------------- 1 | // 2 | // WikipediaParser.swift 3 | // LinkPreview 4 | // 5 | // Created by Harlan Haskins on 2/5/25. 6 | // 7 | 8 | import AsyncHTTPClient 9 | public import Foundation 10 | public import SwiftSoup 11 | 12 | /// A Wikipedia-specific processor that hits the Wikipedia API in order to 13 | /// extract text from the articles. 14 | public enum WikipediaAPIProcessor: MetadataProcessor { 15 | public static func updateLinkPreview( 16 | _ preview: inout LinkPreview, 17 | for url: URL, 18 | document: Document?, 19 | options: MetadataProcessingOptions 20 | ) async { 21 | // Only apply this to Wikipedia.org URLs 22 | guard url.baseHostName == "wikipedia.org" else { 23 | return 24 | } 25 | 26 | guard let document else { 27 | return 28 | } 29 | 30 | // No need to fetch a new description if there's one there. 31 | if preview.description != nil { 32 | return 33 | } 34 | 35 | if let shortDescription = try? document.select("div.shortdescription"), 36 | let text = try? shortDescription.text(), !text.isEmpty { 37 | preview.description = text 38 | return 39 | } 40 | 41 | // Skip if the client requested no new requests. 42 | if !options.allowAdditionalRequests { 43 | return 44 | } 45 | 46 | var components = URLComponents() 47 | components.scheme = "https" 48 | components.host = url.host 49 | components.path = "/w/api.php" 50 | components.queryItems = [ 51 | URLQueryItem(name: "action", value: "query"), 52 | URLQueryItem(name: "titles", value: url.lastPathComponent), 53 | URLQueryItem(name: "exintro", value: nil), 54 | URLQueryItem(name: "format", value: "json"), 55 | URLQueryItem(name: "prop", value: "extracts"), 56 | URLQueryItem(name: "explaintext", value: nil) 57 | ] 58 | 59 | guard let url = components.url else { 60 | return 61 | } 62 | 63 | do { 64 | let request = HTTPClientRequest(url: url.absoluteString) 65 | let response = try await HTTPClient.shared.execute(request, timeout: .seconds(5)) 66 | guard response.status == .ok else { 67 | return 68 | } 69 | 70 | let buffer = try await response.body.collect(upTo: 10 * 1024 * 1024) // 10 MB 71 | let data = Data(buffer.readableBytesView) 72 | let wikipediaResponse = try JSONSerialization.jsonObject(with: data) 73 | guard let dict = wikipediaResponse as? [String: Any], 74 | let query = dict["query"] as? [String: Any], 75 | let pages = query["pages"] as? [String: Any], 76 | let (_, result) = pages.first, 77 | let page = result as? [String: Any] else { 78 | return 79 | } 80 | 81 | if preview.title == nil, let title = page["title"] as? String { 82 | preview.title = title 83 | } 84 | 85 | if preview.description == nil, let extract = page["extract"] as? String { 86 | preview.description = extract 87 | } 88 | } catch { 89 | return 90 | } 91 | } 92 | 93 | } 94 | -------------------------------------------------------------------------------- /Sources/LinkPreview/LinkPreviewProvider.swift: -------------------------------------------------------------------------------- 1 | // 2 | // LinkPreviewProvider.swift 3 | // LinkPreview 4 | // 5 | // Created by Harlan Haskins on 2/5/25. 6 | // 7 | 8 | public import Foundation 9 | import SwiftSoup 10 | 11 | /// Loads and extracts metadata from web URLs. 12 | public final class LinkPreviewProvider { 13 | static let defaultProcessors: [any MetadataProcessor.Type] = [ 14 | OpenGraphProcessor.self, 15 | GenericHTMLProcessor.self, 16 | WikipediaAPIProcessor.self 17 | ] 18 | var registeredProcessors: [any MetadataProcessor.Type] = LinkPreviewProvider.defaultProcessors 19 | public var options: MetadataProcessingOptions = .init() 20 | 21 | public init() { 22 | } 23 | 24 | public func registerProcessor(_ type: any MetadataProcessor.Type) { 25 | if registeredProcessors.contains(where: { ObjectIdentifier($0) == ObjectIdentifier(type) }) { 26 | return 27 | } 28 | registeredProcessors.append(type) 29 | } 30 | 31 | public func unregisterProcessor(_ type: any MetadataProcessor.Type) { 32 | guard let index = registeredProcessors.firstIndex(where: { ObjectIdentifier($0) == ObjectIdentifier(type) }) else { 33 | return 34 | } 35 | registeredProcessors.remove(at: index) 36 | } 37 | 38 | public func load(html: String, url: URL) async throws -> LinkPreview { 39 | var preview = LinkPreview(url: url) 40 | let document = try SwiftSoup.parse(html, url.absoluteString) 41 | await runProcessors(&preview, document: document, url: url) 42 | return preview 43 | } 44 | 45 | func runProcessors(_ preview: inout LinkPreview, document: Document?, url: URL) async { 46 | for processor in registeredProcessors { 47 | await processor.updateLinkPreview( 48 | &preview, 49 | for: url, 50 | document: document, 51 | options: options 52 | ) 53 | } 54 | } 55 | 56 | private func bestUserAgent(for url: URL) -> String { 57 | switch url.baseHostName { 58 | case "spotify.com": 59 | "Twitterbot/1.0" 60 | default: 61 | "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_1) AppleWebKit/601.2.4 (KHTML, like Gecko) Version/9.0.1 Safari/601.2.4 facebookexternalhit/1.1 Facebot Twitterbot/1.0" 62 | } 63 | } 64 | 65 | /// Loads a link preview from the provided URL, optionally providing a set 66 | /// of custom headers. 67 | public func load( 68 | from url: URL, 69 | headers: [String: String] = [:] 70 | ) async throws -> LinkPreview { 71 | var httpRequest = LinkPreviewURLRequest(url: url) 72 | for (header, value) in headers { 73 | httpRequest.setValue(value, forHTTPHeaderField: header) 74 | } 75 | httpRequest.setValue(bestUserAgent(for: url), forHTTPHeaderField: "User-Agent") 76 | httpRequest.setValue("en-US,en;q=0.9", forHTTPHeaderField: "Accept-Language") 77 | 78 | var preview = LinkPreview(url: url) 79 | var document: Document? 80 | switch try await httpRequest.load() { 81 | case let .html(data): 82 | let html = String(decoding: data, as: UTF8.self) 83 | document = try SwiftSoup.parse(html, url.absoluteString) 84 | case let .fileURL(url, contentType): 85 | preview.canonicalURL = url 86 | if contentType.localizedCaseInsensitiveContains("image/") { 87 | preview.imageURL = url 88 | } else if contentType.localizedCaseInsensitiveContains("audio/") { 89 | preview.audioURL = url 90 | } else if contentType.localizedCaseInsensitiveContains("video/") { 91 | preview.videoURL = url 92 | } 93 | preview.title = url.lastPathComponent 94 | } 95 | await runProcessors(&preview, document: document, url: url) 96 | return preview 97 | } 98 | } 99 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # LinkPreviewSwift 2 | 3 | LinkPreviewSwift is an in-progress implementation of link previews in Swift that works client-side or server-side. 4 | 5 | ## Usage 6 | 7 | Similarly to the [LinkPresentation](https://developer.apple.com/documentation/linkpresentation) framework, you create an instance of `LinkPreviewProvider` 8 | that you can configure to load link previews for you. 9 | 10 | ```swift 11 | let provider = LinkPreviewProvider() 12 | 13 | // Optionally, configure the provider: 14 | 15 | // Turn off processing that requires making additional requests for more information, 16 | // instead choosing only to read data from the single page that's loaded. 17 | provider.options.allowAdditionalRequests 18 | 19 | // Load the preview 20 | let preview = try await provider.load(from: url) 21 | 22 | // You can also provide custom headers to attach to the request 23 | 24 | let preview = try await provider.load(from: url, headers: [ 25 | "Authorization": "Bearer ..." 26 | ]) 27 | 28 | // You can also load directly from HTML 29 | let html = "Title" 30 | let preview = try await provider.load(html: html, url: URL(string: "example.com")!) 31 | ``` 32 | 33 | The `LinkPreview` type has several accessors for common OpenGraph metadata: 34 | 35 | ```swift 36 | let imageURL = preview.imageURL 37 | let title = preview.title 38 | let description = preview.description 39 | ``` 40 | 41 | But you can also read custom OpenGraph fields directly from the properties: 42 | 43 | ```swift 44 | // Parses `og:image`, `og:image:width`, and `og:image:height` tags. 45 | let imageURLProperty = preview.property(named: "image") 46 | let imageURL = imageURLProperty.content 47 | 48 | if let width = imageURLProperty.metadata["width"], 49 | let height = imageURLProperty.metadata["height"] { 50 | // Parse size as integers. 51 | } 52 | ``` 53 | 54 | ### Custom Processors 55 | 56 | By default all the extraction is performed by `MetadataProcessor` objects. There 57 | are currently three, `OpenGraphProcessor`, `GenericHTMLProcessor`, and 58 | `WikipediaAPIProcessor`, the latter of which is specific to `wikipedia.org` URLs. 59 | 60 | You can implement your own processor to recognize special pages and perform more 61 | specific scraping tasks; you will be handed a `Document` from [SwiftSoup](https://github.com/scinfu/SwiftSoup) 62 | that you can extract data from. 63 | 64 | For example, you can add a processor that adds the URL to the end of the title like so: 65 | 66 | ```swift 67 | enum CustomProcessor: MetadataProcessor { 68 | static func updateLinkPreview( 69 | _ preview: inout LinkPreview, 70 | for url: URL, 71 | document: Document?, 72 | options: MetadataProcessingOptions 73 | ) async { 74 | let title = preview.title ?? "" 75 | if let host = url.host { 76 | if !title.isEmpty { 77 | title += " • " 78 | } 79 | title += host 80 | } 81 | if !title.isEmpty { 82 | preview.title = title 83 | } 84 | } 85 | } 86 | 87 | // Tell the provider to run this processor along with the others. 88 | provider.registerProcessor(CustomProcessor.self) 89 | 90 | let preview = try await provider.load(from: URL(string: "https://example.com")!) 91 | 92 | print(preview.title) // prints 'Example Domain • example.com' 93 | ``` 94 | 95 | ## Installation 96 | 97 | LinkPreviewSwift can be added to your project using Swift Package Manager. For more 98 | information on using SwiftPM in Xcode, see [Apple's guide](https://developer.apple.com/documentation/xcode/adding-package-dependencies-to-your-app) 99 | 100 | If you're using package dependencies directly, you can add this as one of your dependencies: 101 | 102 | ```swift 103 | dependencies: [ 104 | .package(url: "https://github.com/harlanhaskins/LinkPreviewSwift.git", branch: "main") 105 | ] 106 | ``` 107 | 108 | ## Author 109 | 110 | Harlan Haskins ([harlan@harlanhaskins.com](mailto:harlan@harlanhaskins.com)) 111 | -------------------------------------------------------------------------------- /Sources/LinkPreview/LinkPreview.swift: -------------------------------------------------------------------------------- 1 | public import Foundation 2 | public import AsyncHTTPClient 3 | import SwiftSoup 4 | 5 | public enum LinkPreviewError: Error { 6 | case unableToHandleContentType(String, HTTPClientResponse) 7 | case unsuccessfulHTTPStatus(Int, HTTPClientResponse) 8 | case unableToParseResponse(Error) 9 | } 10 | 11 | public struct LinkPreview: CustomDebugStringConvertible, Sendable { 12 | public let url: URL 13 | public internal(set) var properties: [String: LinkPreviewProperty] 14 | 15 | public init(url: URL, properties: [String: LinkPreviewProperty] = [:]) { 16 | self.url = url 17 | self.properties = properties 18 | } 19 | 20 | public func property(named name: LinkPreviewPropertyName) -> LinkPreviewProperty? { 21 | properties[name.rawValue] 22 | } 23 | 24 | public func property(named name: String) -> LinkPreviewProperty? { 25 | properties[name] 26 | } 27 | 28 | public subscript( 29 | name: LinkPreviewPropertyName 30 | ) -> T? { 31 | get { 32 | guard let value = properties[name.rawValue], let content = value.content else { 33 | return nil 34 | } 35 | 36 | return T.init(content: content, at: url) 37 | } 38 | set { 39 | let property = LinkPreviewProperty(name: name.rawValue, content: newValue?.content) 40 | if properties.keys.contains(name.rawValue) { 41 | properties[name.rawValue]?.merge(with: property) 42 | } else { 43 | properties[name.rawValue] = property 44 | } 45 | } 46 | } 47 | 48 | public var hostFaviconURL: URL? { 49 | URL(string: "favicon.ico", relativeTo: url.rootURL) 50 | } 51 | 52 | public var canonicalURL: URL? { 53 | get { self[.canonicalURL] } 54 | set { self[.canonicalURL] = newValue } 55 | } 56 | 57 | public var faviconURL: URL? { 58 | get { self[.faviconURL] } 59 | set { self[.faviconURL] = newValue } 60 | } 61 | 62 | public var description: String? { 63 | get { self[.description] } 64 | set { self[.description] = newValue } 65 | } 66 | 67 | public var title: String? { 68 | get { self[.title] } 69 | set { self[.title] = newValue } 70 | } 71 | 72 | public var imageURL: URL? { 73 | get { self[.imageURL] } 74 | set { self[.imageURL] = newValue } 75 | } 76 | 77 | public var videoURL: URL? { 78 | get { self[.videoURL] } 79 | set { self[.videoURL] = newValue } 80 | } 81 | 82 | public var audioURL: URL? { 83 | get { self[.audioURL] } 84 | set { self[.audioURL] = newValue } 85 | } 86 | 87 | public var siteName: String? { 88 | get { self[.siteName] } 89 | set { self[.siteName] = newValue } 90 | } 91 | 92 | public var debugDescription: String { 93 | var description = "" 94 | let propertyNames = properties.keys.sorted() 95 | var numberPrinted = 0 96 | for key in propertyNames { 97 | let property = properties[key]! 98 | if numberPrinted != 0 { 99 | description += "\n" 100 | } 101 | let content = property.content ?? "" 102 | numberPrinted += 1 103 | description += "\(property.name): " 104 | if property.name == "description" && content.count > 200 { 105 | description += "\"\(content.prefix(200))\" [truncated]" 106 | } else { 107 | description += "\"\(content)\"" 108 | } 109 | if !property.metadata.isEmpty { 110 | let keys = property.metadata.keys.sorted() 111 | for key in keys { 112 | description += "\n \(key): \(property.metadata[key]!)" 113 | } 114 | } 115 | } 116 | return description 117 | } 118 | } 119 | -------------------------------------------------------------------------------- /Sources/LinkPreview/Processors/GenericHTMLProcessor.swift: -------------------------------------------------------------------------------- 1 | // 2 | // GenericHTMLProcessor.swift 3 | // LinkPreview 4 | // 5 | // Created by Harlan Haskins on 2/5/25. 6 | // 7 | 8 | import AsyncHTTPClient 9 | public import Foundation 10 | public import SwiftSoup 11 | 12 | /// A metadata processor that tries to extract data from HTML outside of 13 | /// OpenGraph, either by looking for `` tags, `<meta name="description">` tags, `<link rel="icon">` tags, etc. 14 | public enum GenericHTMLProcessor: MetadataProcessor { 15 | static let faviconProperties = ["icon", "shortcut icon", "apple-touch-icon", "apple-touch-icon-precomposed"] 16 | 17 | static func findFaviconURL(in document: Document) -> URL? { 18 | guard let tags = try? document.select("link[rel]") else { 19 | return nil 20 | } 21 | for tag in tags { 22 | let rel = (try? tag.attr("rel")) ?? "" 23 | if Self.faviconProperties.contains(rel), let content = try? tag.absUrl("href") { 24 | return URL(string: content) 25 | } 26 | } 27 | return nil 28 | } 29 | 30 | static func findCanonicalURL(in document: Document) -> URL? { 31 | guard let links = try? document.select("link[rel]") else { 32 | return nil 33 | } 34 | for link in links { 35 | do { 36 | if try link.attr("rel").caseInsensitiveCompare("canonical") == .orderedSame { 37 | let url = try link.absUrl("href") 38 | return URL(string: url) 39 | } 40 | } catch { 41 | continue 42 | } 43 | } 44 | return nil 45 | } 46 | 47 | private static func findDescription(in document: Document) -> String? { 48 | let metaTags = (try? document.select("meta[name]").array()) ?? [] 49 | for metaTag in metaTags { 50 | let name = (try? metaTag.attr("name")) ?? "" 51 | if name.caseInsensitiveCompare("description") == .orderedSame { 52 | return try? metaTag.attr("content") 53 | } 54 | } 55 | 56 | return nil 57 | } 58 | 59 | private static func findTitle(in document: Document) -> String? { 60 | try? document.select("title").text() 61 | } 62 | 63 | static func defaultFaviconIfExists( 64 | for url: URL 65 | ) async -> URL? { 66 | guard var components = URLComponents(url: url, resolvingAgainstBaseURL: false) else { 67 | return nil 68 | } 69 | components.path = "/favicon.ico" 70 | guard let url = components.url else { 71 | return nil 72 | } 73 | do { 74 | let request = HTTPClientRequest(url: url.absoluteString) 75 | let response = try await HTTPClient.shared.execute(request, timeout: .seconds(1)) 76 | guard response.status == .ok else { 77 | return nil 78 | } 79 | } catch { 80 | return nil 81 | } 82 | 83 | return url 84 | } 85 | 86 | public static func updateLinkPreview( 87 | _ preview: inout LinkPreview, 88 | for url: URL, 89 | document: Document?, 90 | options: MetadataProcessingOptions 91 | ) async { 92 | 93 | if preview.faviconURL == nil, let document { 94 | preview.faviconURL = findFaviconURL(in: document) 95 | } 96 | 97 | if preview.faviconURL == nil && options.allowAdditionalRequests { 98 | preview.faviconURL = await defaultFaviconIfExists(for: url) 99 | } 100 | 101 | if preview.canonicalURL == nil, let document { 102 | preview.canonicalURL = findCanonicalURL(in: document) 103 | } 104 | 105 | if preview.title == nil, let document { 106 | preview.title = findTitle(in: document) 107 | } 108 | 109 | if preview.description == nil, let document { 110 | preview.description = findDescription(in: document) 111 | } 112 | } 113 | } 114 | -------------------------------------------------------------------------------- /Tests/LinkPreviewTests/LinkPreviewTests.swift: -------------------------------------------------------------------------------- 1 | // 2 | // LinkPreviewTests.swift 3 | // LinkPreview 4 | // 5 | // Created by Harlan Haskins on 1/19/25. 6 | // 7 | 8 | import Foundation 9 | import LinkPreview 10 | import SwiftSoup 11 | import Testing 12 | 13 | @Suite 14 | struct LinkPreviewTests { 15 | @Test func simpleFile() async throws { 16 | let provider = LinkPreviewProvider() 17 | let preview = try await provider.load(from: URL(string: "https://apple.com")!) 18 | #expect(preview.description != nil) 19 | #expect(preview.title != nil) 20 | } 21 | 22 | @Test func descriptionFallback() async throws { 23 | let provider = LinkPreviewProvider() 24 | let preview = try await provider.load(html: """ 25 | <head> 26 | <meta property="og:title" content="Title" /> 27 | <meta name="description" content="Hello, world" /> 28 | </head> 29 | """, url: URL(string: "https://example.com")!) 30 | #expect(preview.title == "Title") 31 | #expect(preview.description == "Hello, world") 32 | } 33 | 34 | @Test func titleFallback() async throws { 35 | let provider = LinkPreviewProvider() 36 | let preview = try await provider.load(html: """ 37 | <head> 38 | <title>Title 39 | 40 | 41 | """, url: URL(string: "https://example.com")!) 42 | #expect(preview.title == "Title") 43 | #expect(preview.description == "Hello, world") 44 | } 45 | 46 | @Test func dropbox() async throws { 47 | let provider = LinkPreviewProvider() 48 | let preview = try await provider.load(from: URL(string: "https://www.dropbox.com/scl/fi/9zhr8oqh8d49vgkvtn6jo/IMG_3996.HEIC?rlkey=iw62xieb2yrxtn0ujczl2lmkb&st=yq524xne&dl=0")!) 49 | #expect(preview.description != nil) 50 | } 51 | 52 | @Test func wikipedia() async throws { 53 | let provider = LinkPreviewProvider() 54 | let url = URL(string: "https://en.wikipedia.org/wiki/Italian_language")! 55 | let preview = try await provider.load(from: url) 56 | #expect(preview.description != nil) 57 | } 58 | 59 | @Test func wikipediaNoAdditionalRequests() async throws { 60 | let provider = LinkPreviewProvider() 61 | provider.options.allowAdditionalRequests = false 62 | let url = URL(string: "https://en.wikipedia.org/wiki/Italian_language")! 63 | let preview = try await provider.load(from: url) 64 | 65 | // We'll use the short description here 66 | #expect(preview.description == "Romance language") 67 | } 68 | 69 | @Test func spotify() async throws { 70 | let provider = LinkPreviewProvider() 71 | let preview = try await provider.load(from: URL(string: "https://open.spotify.com/track/5TFD2bmFKGhoCRbX61nXY5")!) 72 | #expect(preview.audioURL != nil) 73 | } 74 | 75 | @Test func semana() async throws { 76 | let provider = LinkPreviewProvider() 77 | let preview = try await provider.load(from: URL(string: "https://www.semana.com/quien-remplaza-presidente-vicepresidente/265823-3/")!) 78 | #expect(preview.description != nil) 79 | } 80 | 81 | @Test func relativeImageURL() async throws { 82 | let provider = LinkPreviewProvider() 83 | let preview = try await provider.load(from: URL(string: "https://app.graphite.dev")!) 84 | let imageURL = try #require(preview.imageURL) 85 | #expect(imageURL.absoluteString.hasPrefix("https://app.graphite.dev")) 86 | } 87 | 88 | @Test func customProcessor() async throws { 89 | enum CustomProcessor: MetadataProcessor { 90 | static func updateLinkPreview( 91 | _ preview: inout LinkPreview, 92 | for url: URL, 93 | document: Document?, 94 | options: MetadataProcessingOptions 95 | ) async { 96 | var title = preview.title ?? "" 97 | if let host = url.host { 98 | if !title.isEmpty { 99 | title += " • " 100 | } 101 | title += host 102 | } 103 | if !title.isEmpty { 104 | preview.title = title 105 | } 106 | } 107 | } 108 | 109 | let provider = LinkPreviewProvider() 110 | provider.registerProcessor(CustomProcessor.self) 111 | let preview = try await provider.load(html: """ 112 | 113 | Example Domain 114 | 115 | """, url: URL(string: "https://example.com")! 116 | ) 117 | #expect(preview.title == "Example Domain • example.com") 118 | } 119 | } 120 | -------------------------------------------------------------------------------- /Package.resolved: -------------------------------------------------------------------------------- 1 | { 2 | "originHash" : "25cfbcef205dc273091916b10c1e5e1ea9c80bebaa109e215853c61b93ba3c58", 3 | "pins" : [ 4 | { 5 | "identity" : "async-http-client", 6 | "kind" : "remoteSourceControl", 7 | "location" : "https://github.com/swift-server/async-http-client", 8 | "state" : { 9 | "revision" : "60fa3dcfc52d09ed0411e9c4bc99928bf63656bd", 10 | "version" : "1.24.2" 11 | } 12 | }, 13 | { 14 | "identity" : "swift-algorithms", 15 | "kind" : "remoteSourceControl", 16 | "location" : "https://github.com/apple/swift-algorithms.git", 17 | "state" : { 18 | "revision" : "f6919dfc309e7f1b56224378b11e28bab5bccc42", 19 | "version" : "1.2.0" 20 | } 21 | }, 22 | { 23 | "identity" : "swift-argument-parser", 24 | "kind" : "remoteSourceControl", 25 | "location" : "https://github.com/apple/swift-argument-parser", 26 | "state" : { 27 | "revision" : "41982a3656a71c768319979febd796c6fd111d5c", 28 | "version" : "1.5.0" 29 | } 30 | }, 31 | { 32 | "identity" : "swift-atomics", 33 | "kind" : "remoteSourceControl", 34 | "location" : "https://github.com/apple/swift-atomics.git", 35 | "state" : { 36 | "revision" : "cd142fd2f64be2100422d658e7411e39489da985", 37 | "version" : "1.2.0" 38 | } 39 | }, 40 | { 41 | "identity" : "swift-collections", 42 | "kind" : "remoteSourceControl", 43 | "location" : "https://github.com/apple/swift-collections.git", 44 | "state" : { 45 | "revision" : "671108c96644956dddcd89dd59c203dcdb36cec7", 46 | "version" : "1.1.4" 47 | } 48 | }, 49 | { 50 | "identity" : "swift-http-types", 51 | "kind" : "remoteSourceControl", 52 | "location" : "https://github.com/apple/swift-http-types", 53 | "state" : { 54 | "revision" : "ef18d829e8b92d731ad27bb81583edd2094d1ce3", 55 | "version" : "1.3.1" 56 | } 57 | }, 58 | { 59 | "identity" : "swift-log", 60 | "kind" : "remoteSourceControl", 61 | "location" : "https://github.com/apple/swift-log.git", 62 | "state" : { 63 | "revision" : "96a2f8a0fa41e9e09af4585e2724c4e825410b91", 64 | "version" : "1.6.2" 65 | } 66 | }, 67 | { 68 | "identity" : "swift-nio", 69 | "kind" : "remoteSourceControl", 70 | "location" : "https://github.com/apple/swift-nio.git", 71 | "state" : { 72 | "revision" : "dff45738d84a53dbc8ee899c306b3a7227f54f89", 73 | "version" : "2.80.0" 74 | } 75 | }, 76 | { 77 | "identity" : "swift-nio-extras", 78 | "kind" : "remoteSourceControl", 79 | "location" : "https://github.com/apple/swift-nio-extras.git", 80 | "state" : { 81 | "revision" : "2e9746cfc57554f70b650b021b6ae4738abef3e6", 82 | "version" : "1.24.1" 83 | } 84 | }, 85 | { 86 | "identity" : "swift-nio-http2", 87 | "kind" : "remoteSourceControl", 88 | "location" : "https://github.com/apple/swift-nio-http2.git", 89 | "state" : { 90 | "revision" : "170f4ca06b6a9c57b811293cebcb96e81b661310", 91 | "version" : "1.35.0" 92 | } 93 | }, 94 | { 95 | "identity" : "swift-nio-ssl", 96 | "kind" : "remoteSourceControl", 97 | "location" : "https://github.com/apple/swift-nio-ssl.git", 98 | "state" : { 99 | "revision" : "0cc3528ff48129d64ab9cab0b1cd621634edfc6b", 100 | "version" : "2.29.3" 101 | } 102 | }, 103 | { 104 | "identity" : "swift-nio-transport-services", 105 | "kind" : "remoteSourceControl", 106 | "location" : "https://github.com/apple/swift-nio-transport-services.git", 107 | "state" : { 108 | "revision" : "3c394067c08d1225ba8442e9cffb520ded417b64", 109 | "version" : "1.23.1" 110 | } 111 | }, 112 | { 113 | "identity" : "swift-numerics", 114 | "kind" : "remoteSourceControl", 115 | "location" : "https://github.com/apple/swift-numerics.git", 116 | "state" : { 117 | "revision" : "0a5bc04095a675662cf24757cc0640aa2204253b", 118 | "version" : "1.0.2" 119 | } 120 | }, 121 | { 122 | "identity" : "swift-system", 123 | "kind" : "remoteSourceControl", 124 | "location" : "https://github.com/apple/swift-system.git", 125 | "state" : { 126 | "revision" : "c8a44d836fe7913603e246acab7c528c2e780168", 127 | "version" : "1.4.0" 128 | } 129 | }, 130 | { 131 | "identity" : "swiftsoup", 132 | "kind" : "remoteSourceControl", 133 | "location" : "https://github.com/scinfu/SwiftSoup.git", 134 | "state" : { 135 | "revision" : "0837db354faf9c9deb710dc597046edaadf5360f", 136 | "version" : "2.7.6" 137 | } 138 | } 139 | ], 140 | "version" : 3 141 | } 142 | --------------------------------------------------------------------------------