├── .swift-version ├── Tests └── PDFProcessorTests │ ├── TestResource │ ├── PDF Files │ │ ├── 1Page.pdf │ │ ├── 2Pages.pdf │ │ ├── 5Pages.pdf │ │ ├── LoremIpsum.pdf │ │ ├── Permissions.pdf │ │ └── 1Page-WithAttributes-WithAnnotations.pdf │ └── TestResource.swift │ ├── Pages │ ├── PDFPageRotation Tests.swift │ ├── PDFPagesFilter Tests.swift │ ├── PDFPageArea Tests.swift │ └── PDFPagesDescriptor Tests.swift │ ├── Utils Tests.swift │ └── PDFProcessor Operations Tests.swift ├── .swiftpm └── xcode │ ├── package.xcworkspace │ └── contents.xcworkspacedata │ └── xcshareddata │ └── xcschemes │ └── swift-pdf-processor.xcscheme ├── .github ├── ISSUE_TEMPLATE │ ├── config.yml │ └── bug_report.yml ├── FUNDING.yml └── workflows │ └── build.yml ├── Sources └── PDFProcessor │ ├── Pages │ ├── PDFPagesDescriptor FilterResult.swift │ ├── IndexesDiff.swift │ ├── PDFPageRotation.swift │ ├── PDFPageRotation Angle.swift │ ├── PDFPageInset.swift │ ├── PDFPagesFilter.swift │ ├── PDFPageArea.swift │ └── PDFPagesDescriptor.swift │ ├── PDFProcessorError.swift │ ├── Operations │ ├── PDFOperationResult.swift │ ├── PDFOperation InterchangeBehavior.swift │ ├── PDFTextPageBreak.swift │ ├── PDFOperation ChangeBehavior.swift │ ├── PDFOperation PageAndFilename.swift │ ├── PDFOperation PDFTextDestination.swift │ ├── PDFOperation PageRangeAndFilename.swift │ └── PDFOperation.swift │ ├── PDFProcessor VariableContent.swift │ ├── Files │ ├── PDFFileIntrospection.swift │ ├── PDFFileDescriptor.swift │ ├── PDFFile.swift │ ├── PDFFilenameDescriptor.swift │ ├── PDFFilesDescriptor.swift │ └── PDFFileSplitDescriptor.swift │ ├── Annotations │ └── PDFAnnotationFilter.swift │ ├── Utilties │ ├── Utils.swift │ └── PDFKit Extensions.swift │ ├── PDFProcessor Settings.swift │ ├── PDFProcessor.swift │ └── PDFProcessor Operations.swift ├── LICENSE ├── Package.swift ├── .gitignore ├── .swiftformat └── README.md /.swift-version: -------------------------------------------------------------------------------- 1 | 6.0 -------------------------------------------------------------------------------- /Tests/PDFProcessorTests/TestResource/PDF Files/1Page.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/orchetect/swift-pdf-processor/HEAD/Tests/PDFProcessorTests/TestResource/PDF Files/1Page.pdf -------------------------------------------------------------------------------- /Tests/PDFProcessorTests/TestResource/PDF Files/2Pages.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/orchetect/swift-pdf-processor/HEAD/Tests/PDFProcessorTests/TestResource/PDF Files/2Pages.pdf -------------------------------------------------------------------------------- /Tests/PDFProcessorTests/TestResource/PDF Files/5Pages.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/orchetect/swift-pdf-processor/HEAD/Tests/PDFProcessorTests/TestResource/PDF Files/5Pages.pdf -------------------------------------------------------------------------------- /Tests/PDFProcessorTests/TestResource/PDF Files/LoremIpsum.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/orchetect/swift-pdf-processor/HEAD/Tests/PDFProcessorTests/TestResource/PDF Files/LoremIpsum.pdf -------------------------------------------------------------------------------- /Tests/PDFProcessorTests/TestResource/PDF Files/Permissions.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/orchetect/swift-pdf-processor/HEAD/Tests/PDFProcessorTests/TestResource/PDF Files/Permissions.pdf -------------------------------------------------------------------------------- /.swiftpm/xcode/package.xcworkspace/contents.xcworkspacedata: -------------------------------------------------------------------------------- 1 | 2 | 4 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /Tests/PDFProcessorTests/TestResource/PDF Files/1Page-WithAttributes-WithAnnotations.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/orchetect/swift-pdf-processor/HEAD/Tests/PDFProcessorTests/TestResource/PDF Files/1Page-WithAttributes-WithAnnotations.pdf -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/config.yml: -------------------------------------------------------------------------------- 1 | blank_issues_enabled: false 2 | contact_links: 3 | - name: Feature request 4 | url: https://github.com/orchetect/swift-pdf-processor/discussions 5 | about: Suggest new features or improvements. 6 | - name: I need help setting up or troubleshooting 7 | url: https://github.com/orchetect/swift-pdf-processor/discussions 8 | about: Questions not answered in the documentation, discussions forum, or example projects. 9 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug_report.yml: -------------------------------------------------------------------------------- 1 | name: Bug report 2 | description: Create a bug report about a reproducible problem. 3 | labels: bug 4 | body: 5 | - type: textarea 6 | id: bug-description 7 | attributes: 8 | label: Bug Description, Steps to Reproduce, Crash Logs, Screenshots, etc. 9 | description: "A clear and concise description of the bug and steps to reproduce. Include system details (OS version) and build environment particulars (Xcode version, etc.)." 10 | -------------------------------------------------------------------------------- /Sources/PDFProcessor/Pages/PDFPagesDescriptor FilterResult.swift: -------------------------------------------------------------------------------- 1 | // 2 | // PDFPagesDescriptor FilterResult.swift 3 | // swift-pdf-processor • https://github.com/orchetect/swift-pdf-processor 4 | // © 2023-2024 Steffan Andrews • Licensed under MIT License 5 | // 6 | 7 | #if canImport(PDFKit) 8 | 9 | extension PDFPagesDescriptor { 10 | public struct FilterResult { 11 | let indexes: [Int] 12 | let isInclusive: Bool 13 | } 14 | } 15 | 16 | extension PDFPagesDescriptor.FilterResult: Equatable { } 17 | 18 | extension PDFPagesDescriptor.FilterResult: Hashable { } 19 | 20 | extension PDFPagesDescriptor.FilterResult: Sendable { } 21 | 22 | #endif 23 | -------------------------------------------------------------------------------- /Sources/PDFProcessor/PDFProcessorError.swift: -------------------------------------------------------------------------------- 1 | // 2 | // PDFProcessorError.swift 3 | // swift-pdf-processor • https://github.com/orchetect/swift-pdf-processor 4 | // © 2023-2024 Steffan Andrews • Licensed under MIT License 5 | // 6 | 7 | import Foundation 8 | 9 | public enum PDFProcessorError: LocalizedError { 10 | case validationError(String) 11 | case runtimeError(String) 12 | 13 | public var errorDescription: String? { 14 | switch self { 15 | case let .validationError(error): 16 | return "Validation error: \(error)" 17 | 18 | case let .runtimeError(error): 19 | return error 20 | } 21 | } 22 | } 23 | -------------------------------------------------------------------------------- /Sources/PDFProcessor/Operations/PDFOperationResult.swift: -------------------------------------------------------------------------------- 1 | // 2 | // PDFOperationResult.swift 3 | // swift-pdf-processor • https://github.com/orchetect/swift-pdf-processor 4 | // © 2023-2024 Steffan Andrews • Licensed under MIT License 5 | // 6 | 7 | import Foundation 8 | 9 | /// PDF editing operation result. 10 | public enum PDFOperationResult { 11 | /// The operation did not result in any change to the PDF file. 12 | case noChange(reason: String? = nil) 13 | 14 | /// The operation resulted in one or more changes to the PDF file. 15 | case changed 16 | } 17 | 18 | extension PDFOperationResult: Equatable { } 19 | 20 | extension PDFOperationResult: Hashable { } 21 | 22 | extension PDFOperationResult: Sendable { } 23 | -------------------------------------------------------------------------------- /.github/FUNDING.yml: -------------------------------------------------------------------------------- 1 | # These are supported funding model platforms 2 | 3 | github: orchetect 4 | # patreon: # Replace with a single Patreon username 5 | # open_collective: # Replace with a single Open Collective username 6 | # ko_fi: # Replace with a single Ko-fi username 7 | # tidelift: # Replace with a single Tidelift platform-name/package-name e.g., npm/babel 8 | # community_bridge: # Replace with a single Community Bridge project-name e.g., cloud-foundry 9 | # liberapay: # Replace with a single Liberapay username 10 | # issuehunt: # Replace with a single IssueHunt username 11 | # otechie: # Replace with a single Otechie username 12 | # lfx_crowdfunding: # Replace with a single LFX Crowdfunding project-name e.g., cloud-foundry 13 | # custom: # Replace with up to 4 custom sponsorship URLs e.g., ['link1', 'link2'] 14 | -------------------------------------------------------------------------------- /Sources/PDFProcessor/PDFProcessor VariableContent.swift: -------------------------------------------------------------------------------- 1 | // 2 | // PDFProcessor VariableContent.swift 3 | // swift-pdf-processor • https://github.com/orchetect/swift-pdf-processor 4 | // © 2023-2024 Steffan Andrews • Licensed under MIT License 5 | // 6 | 7 | #if canImport(PDFKit) 8 | 9 | import Foundation 10 | @preconcurrency import PDFKit 11 | 12 | extension PDFProcessor { 13 | public enum VariableContent { 14 | /// Plain text content. 15 | case string(String) 16 | 17 | /// Reference to a PDF page. 18 | case pdfPage(PDFPage) 19 | 20 | /// Reference to a PDF document. 21 | case pdfDocument(PDFDocument) 22 | } 23 | } 24 | 25 | extension PDFProcessor.VariableContent: Equatable { } 26 | 27 | extension PDFProcessor.VariableContent: Hashable { } 28 | 29 | extension PDFProcessor.VariableContent: Sendable { } 30 | 31 | #endif 32 | -------------------------------------------------------------------------------- /Sources/PDFProcessor/Operations/PDFOperation InterchangeBehavior.swift: -------------------------------------------------------------------------------- 1 | // 2 | // PDFOperation InterchangeBehavior.swift 3 | // swift-pdf-processor • https://github.com/orchetect/swift-pdf-processor 4 | // © 2023-2024 Steffan Andrews • Licensed under MIT License 5 | // 6 | 7 | #if canImport(PDFKit) 8 | 9 | import Foundation 10 | 11 | extension PDFOperation { 12 | /// PDF editing operation behavior. 13 | public enum InterchangeBehavior { 14 | case copy 15 | case move 16 | } 17 | } 18 | 19 | extension PDFOperation.InterchangeBehavior: Equatable { } 20 | 21 | extension PDFOperation.InterchangeBehavior: Hashable { } 22 | 23 | extension PDFOperation.InterchangeBehavior: Sendable { } 24 | 25 | extension PDFOperation.InterchangeBehavior { 26 | public var verboseDescription: String { 27 | switch self { 28 | case .copy: 29 | return "copying" 30 | case .move: 31 | return "moving" 32 | } 33 | } 34 | } 35 | 36 | #endif 37 | -------------------------------------------------------------------------------- /Sources/PDFProcessor/Operations/PDFTextPageBreak.swift: -------------------------------------------------------------------------------- 1 | // 2 | // PDFTextPageBreak.swift 3 | // swift-pdf-processor • https://github.com/orchetect/swift-pdf-processor 4 | // © 2023-2024 Steffan Andrews • Licensed under MIT License 5 | // 6 | 7 | #if canImport(PDFKit) 8 | 9 | import Foundation 10 | 11 | /// Character(s) to insert at PDF page breaks in plain text output. 12 | public enum PDFTextPageBreak: String { 13 | case none = "" 14 | case newLine = "\n" 15 | case doubleNewLine = "\n\n" 16 | } 17 | 18 | extension PDFTextPageBreak: Equatable { } 19 | 20 | extension PDFTextPageBreak: Hashable { } 21 | 22 | extension PDFTextPageBreak: Sendable { } 23 | 24 | extension PDFTextPageBreak { 25 | public var verboseDescription: String { 26 | switch self { 27 | case .none: 28 | return "none" 29 | case .newLine: 30 | return "new-line" 31 | case .doubleNewLine: 32 | return "double new-line" 33 | } 34 | } 35 | } 36 | 37 | #endif 38 | -------------------------------------------------------------------------------- /Sources/PDFProcessor/Pages/IndexesDiff.swift: -------------------------------------------------------------------------------- 1 | // 2 | // IndexesDiff.swift 3 | // swift-pdf-processor • https://github.com/orchetect/swift-pdf-processor 4 | // © 2023-2024 Steffan Andrews • Licensed under MIT License 5 | // 6 | 7 | import Foundation 8 | 9 | public struct IndexesDiff { 10 | let original: [Int] 11 | 12 | public let included: [Int] 13 | public let excluded: [Int] 14 | 15 | /// `true` if the diff operation's conditions were within the original indexes' bounds. 16 | /// `false` if the result does not contain all expected results. 17 | public let isInclusive: Bool 18 | 19 | public init(original: [Int], included: [Int], excluded: [Int], isInclusive: Bool) { 20 | self.original = original 21 | self.included = included 22 | self.excluded = excluded 23 | self.isInclusive = isInclusive 24 | } 25 | 26 | public var isIdentical: Bool { 27 | included == original 28 | } 29 | } 30 | 31 | extension IndexesDiff: Equatable { } 32 | 33 | extension IndexesDiff: Hashable { } 34 | 35 | extension IndexesDiff: Sendable { } 36 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2023 Steffan Andrews 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /Sources/PDFProcessor/Operations/PDFOperation ChangeBehavior.swift: -------------------------------------------------------------------------------- 1 | // 2 | // PDFOperation ChangeBehavior.swift 3 | // swift-pdf-processor • https://github.com/orchetect/swift-pdf-processor 4 | // © 2023-2024 Steffan Andrews • Licensed under MIT License 5 | // 6 | 7 | #if canImport(PDFKit) 8 | 9 | import Foundation 10 | 11 | extension PDFOperation { 12 | /// PDF editing operation value modification behavior. 13 | public enum ChangeBehavior { 14 | /// Set absolute page rotation value, replacing existing rotation if any. 15 | case absolute 16 | 17 | /// Relative to current page rotation, if any. 18 | /// If current page rotation is 0 degrees, this is identical to ``absolute``. 19 | case relative 20 | } 21 | } 22 | 23 | extension PDFOperation.ChangeBehavior: Equatable { } 24 | 25 | extension PDFOperation.ChangeBehavior: Hashable { } 26 | 27 | extension PDFOperation.ChangeBehavior: Sendable { } 28 | 29 | extension PDFOperation.ChangeBehavior { 30 | public var verboseDescription: String { 31 | switch self { 32 | case .absolute: 33 | return "absolute" 34 | case .relative: 35 | return "relative" 36 | } 37 | } 38 | } 39 | 40 | #endif 41 | -------------------------------------------------------------------------------- /Sources/PDFProcessor/Files/PDFFileIntrospection.swift: -------------------------------------------------------------------------------- 1 | // 2 | // PDFFileIntrospection.swift 3 | // swift-pdf-processor • https://github.com/orchetect/swift-pdf-processor 4 | // © 2023-2024 Steffan Andrews • Licensed under MIT License 5 | // 6 | 7 | #if canImport(PDFKit) 8 | 9 | import Foundation 10 | import PDFKit 11 | 12 | /// Provides a closure for custom introspection logic on a `PDFDocument` instance. 13 | public struct PDFFileIntrospection { 14 | let id: UUID = .init() 15 | 16 | public var description: String 17 | public var closure: @Sendable (_ pdf: PDFDocument) -> Bool 18 | 19 | public init( 20 | description: String, 21 | closure: @escaping @Sendable (_ pdf: PDFDocument) -> Bool 22 | ) { 23 | self.description = description 24 | self.closure = closure 25 | } 26 | } 27 | 28 | extension PDFFileIntrospection: Equatable { 29 | public static func == (lhs: Self, rhs: Self) -> Bool { 30 | lhs.id == rhs.id 31 | } 32 | } 33 | 34 | extension PDFFileIntrospection: Hashable { 35 | public func hash(into hasher: inout Hasher) { 36 | hasher.combine(description) 37 | // can't hash a closure 38 | } 39 | } 40 | 41 | extension PDFFileIntrospection: Sendable { } 42 | 43 | #endif 44 | -------------------------------------------------------------------------------- /Sources/PDFProcessor/Pages/PDFPageRotation.swift: -------------------------------------------------------------------------------- 1 | // 2 | // PDFPageRotation.swift 3 | // swift-pdf-processor • https://github.com/orchetect/swift-pdf-processor 4 | // © 2023-2024 Steffan Andrews • Licensed under MIT License 5 | // 6 | 7 | #if canImport(PDFKit) 8 | 9 | import Foundation 10 | 11 | /// PDF editing page rotation descriptor. 12 | public struct PDFPageRotation { 13 | public var angle: Angle 14 | public var changeBehavior: PDFOperation.ChangeBehavior 15 | 16 | public init(angle: Angle, apply changeBehavior: PDFOperation.ChangeBehavior = .relative) { 17 | self.angle = angle 18 | self.changeBehavior = changeBehavior 19 | } 20 | 21 | public func degrees(offsetting other: Angle = ._0degrees) -> Int { 22 | switch changeBehavior { 23 | case .absolute: return angle.degrees 24 | case .relative: return (angle + other).degrees 25 | } 26 | } 27 | } 28 | 29 | extension PDFPageRotation: Equatable { } 30 | 31 | extension PDFPageRotation: Hashable { } 32 | 33 | extension PDFPageRotation { 34 | public var verboseDescription: String { 35 | "\(changeBehavior == .relative ? "by" : "to") \(angle.verboseDescription)" 36 | } 37 | } 38 | 39 | extension PDFPageRotation: Sendable { } 40 | 41 | #endif 42 | -------------------------------------------------------------------------------- /Package.swift: -------------------------------------------------------------------------------- 1 | // swift-tools-version: 6.0 2 | // (be sure to update the .swift-version file when this Swift version changes) 3 | 4 | import PackageDescription 5 | 6 | let package = Package( 7 | name: "swift-pdf-processor", 8 | platforms: [ 9 | .macOS(.v11), .iOS(.v14), .tvOS(.v14), .watchOS(.v7) 10 | ], 11 | products: [ 12 | .library( 13 | name: "PDFProcessor", 14 | targets: ["PDFProcessor"] 15 | ) 16 | ], 17 | dependencies: [ 18 | .package(url: "https://github.com/orchetect/swift-extensions", from: "2.0.0"), 19 | .package(url: "https://github.com/orchetect/swift-testing-extensions.git", from: "0.2.4") 20 | ], 21 | targets: [ 22 | .target( 23 | name: "PDFProcessor", 24 | dependencies: [ 25 | .product(name: "SwiftExtensions", package: "swift-extensions") 26 | ] 27 | ), 28 | .testTarget( 29 | name: "PDFProcessorTests", 30 | dependencies: [ 31 | "PDFProcessor", 32 | .product(name: "SwiftExtensions", package: "swift-extensions"), 33 | .product(name: "TestingExtensions", package: "swift-testing-extensions") 34 | ], 35 | resources: [.copy("TestResource/PDF Files")] 36 | ) 37 | ] 38 | ) 39 | -------------------------------------------------------------------------------- /Tests/PDFProcessorTests/TestResource/TestResource.swift: -------------------------------------------------------------------------------- 1 | // 2 | // TestResource.swift 3 | // swift-pdf-processor • https://github.com/orchetect/swift-pdf-processor 4 | // © 2023-2024 Steffan Andrews • Licensed under MIT License 5 | // 6 | 7 | import Foundation 8 | import Testing 9 | import TestingExtensions 10 | 11 | // NOTE: DO NOT name any folders "Resources". Xcode will fail to build iOS targets. 12 | 13 | // MARK: - Constants 14 | 15 | /// Resources files on disk used for unit testing. 16 | extension TestResource { 17 | static let pdf1page = TestResource.File( 18 | name: "1Page", ext: "pdf", subFolder: "PDF Files" 19 | ) 20 | 21 | static let pdf2pages = TestResource.File( 22 | name: "2Pages", ext: "pdf", subFolder: "PDF Files" 23 | ) 24 | 25 | static let pdf5pages = TestResource.File( 26 | name: "5Pages", ext: "pdf", subFolder: "PDF Files" 27 | ) 28 | 29 | static let pdf1page_withAttributes_withAnnotations = TestResource.File( 30 | name: "1Page-WithAttributes-WithAnnotations", ext: "pdf", subFolder: "PDF Files" 31 | ) 32 | 33 | static let loremIpsum = TestResource.File( 34 | name: "LoremIpsum", ext: "pdf", subFolder: "PDF Files" 35 | ) 36 | 37 | static let permissions = TestResource.File( 38 | name: "Permissions", ext: "pdf", subFolder: "PDF Files" 39 | ) 40 | } 41 | -------------------------------------------------------------------------------- /Sources/PDFProcessor/Operations/PDFOperation PageAndFilename.swift: -------------------------------------------------------------------------------- 1 | // 2 | // PDFOperation PageAndFilename.swift 3 | // swift-pdf-processor • https://github.com/orchetect/swift-pdf-processor 4 | // © 2023-2024 Steffan Andrews • Licensed under MIT License 5 | // 6 | 7 | #if canImport(PDFKit) 8 | 9 | import Foundation 10 | 11 | extension PDFOperation { 12 | /// PDF editing operation page & filename descriptor. 13 | public struct PageAndFilename { 14 | public var pageIndex: Int 15 | public var filename: String? 16 | 17 | public init(_ pageIndex: Int, _ filename: String? = nil) { 18 | self.pageIndex = pageIndex 19 | self.filename = filename 20 | } 21 | } 22 | } 23 | 24 | extension PDFOperation.PageAndFilename: Equatable { } 25 | 26 | extension PDFOperation.PageAndFilename: Comparable { 27 | public static func < (lhs: Self, rhs: Self) -> Bool { 28 | lhs.pageIndex < rhs.pageIndex 29 | } 30 | } 31 | 32 | extension PDFOperation.PageAndFilename: Hashable { } 33 | 34 | extension PDFOperation.PageAndFilename: Sendable { } 35 | 36 | extension PDFOperation.PageAndFilename { 37 | public var verboseDescription: String { 38 | if let filename { 39 | return "page index \(pageIndex) with name \(filename.quoted)" 40 | } else { 41 | return "page index \(pageIndex)" 42 | } 43 | } 44 | } 45 | 46 | #endif 47 | -------------------------------------------------------------------------------- /Sources/PDFProcessor/Operations/PDFOperation PDFTextDestination.swift: -------------------------------------------------------------------------------- 1 | // 2 | // PDFOperation PDFTextDestination.swift 3 | // swift-pdf-processor • https://github.com/orchetect/swift-pdf-processor 4 | // © 2023-2024 Steffan Andrews • Licensed under MIT License 5 | // 6 | 7 | #if canImport(PDFKit) 8 | 9 | import Foundation 10 | 11 | /// A destination to transfer or save plain text. 12 | public enum PDFTextDestination { 13 | /// System pasteboard (clipboard). 14 | @available(tvOS, unavailable) 15 | @available(watchOS, unavailable) 16 | case pasteboard 17 | 18 | /// Save to a file on disk. 19 | case file(url: URL) 20 | 21 | /// Store in memory in the ``PDFProcessor`` instance's ``PDFProcessor/variables`` dictionary, 22 | /// keyed by the variable name. 23 | /// 24 | /// Appends or replaces variable. 25 | case variable(named: String) 26 | } 27 | 28 | extension PDFTextDestination: Equatable { } 29 | 30 | extension PDFTextDestination: Hashable { } 31 | 32 | extension PDFTextDestination: Sendable { } 33 | 34 | extension PDFTextDestination { 35 | public var verboseDescription: String { 36 | switch self { 37 | case .pasteboard: 38 | return "pasteboard" 39 | case let .file(url): 40 | return url.absoluteString 41 | case let .variable(name): 42 | return "variable named \(name.quoted)" 43 | } 44 | } 45 | } 46 | 47 | #endif 48 | -------------------------------------------------------------------------------- /Sources/PDFProcessor/Operations/PDFOperation PageRangeAndFilename.swift: -------------------------------------------------------------------------------- 1 | // 2 | // PDFOperation PageRangeAndFilename.swift 3 | // swift-pdf-processor • https://github.com/orchetect/swift-pdf-processor 4 | // © 2023-2024 Steffan Andrews • Licensed under MIT License 5 | // 6 | 7 | #if canImport(PDFKit) 8 | 9 | import Foundation 10 | 11 | extension PDFOperation { 12 | /// PDF editing operation page range & filename descriptor. 13 | public struct PageRangeAndFilename { 14 | public var pageRange: ClosedRange 15 | public var filename: String? 16 | 17 | public init(_ pageRange: ClosedRange, _ filename: String? = nil) { 18 | self.pageRange = pageRange 19 | self.filename = filename 20 | } 21 | } 22 | } 23 | 24 | extension PDFOperation.PageRangeAndFilename: Equatable { } 25 | 26 | extension PDFOperation.PageRangeAndFilename: Comparable { 27 | public static func < (lhs: Self, rhs: Self) -> Bool { 28 | // TODO: naïve sorting but mostly works, could be better 29 | lhs.pageRange.lowerBound < rhs.pageRange.lowerBound 30 | } 31 | } 32 | 33 | extension PDFOperation.PageRangeAndFilename: Hashable { } 34 | 35 | extension PDFOperation.PageRangeAndFilename: Sendable { } 36 | 37 | extension PDFOperation.PageRangeAndFilename { 38 | public var verboseDescription: String { 39 | if let filename { 40 | return "page range \(pageRange) with name \(filename.quoted)" 41 | } else { 42 | return "page range \(pageRange)" 43 | } 44 | } 45 | } 46 | 47 | #endif 48 | -------------------------------------------------------------------------------- /Sources/PDFProcessor/Pages/PDFPageRotation Angle.swift: -------------------------------------------------------------------------------- 1 | // 2 | // PDFPageRotation Angle.swift 3 | // swift-pdf-processor • https://github.com/orchetect/swift-pdf-processor 4 | // © 2023-2024 Steffan Andrews • Licensed under MIT License 5 | // 6 | 7 | #if canImport(PDFKit) 8 | 9 | import Foundation 10 | 11 | extension PDFPageRotation { 12 | /// PDF editing page rotation angle. 13 | public enum Angle: Int { 14 | case _0degrees = 0 15 | case _90degrees = 90 16 | case _180degrees = 180 17 | case _270degrees = 270 18 | 19 | public init?(degrees: Int) { 20 | if degrees < 0 { 21 | self.init(rawValue: 360 + (degrees % 360)) 22 | } else { 23 | self.init(rawValue: degrees % 360) 24 | } 25 | } 26 | } 27 | } 28 | 29 | extension PDFPageRotation.Angle: Equatable { } 30 | 31 | extension PDFPageRotation.Angle: Hashable { } 32 | 33 | extension PDFPageRotation.Angle { 34 | public var verboseDescription: String { 35 | "\(rawValue) degrees" 36 | } 37 | } 38 | 39 | extension PDFPageRotation.Angle: Sendable { } 40 | 41 | // MARK: - Properties 42 | 43 | extension PDFPageRotation.Angle { 44 | public var degrees: Int { 45 | rawValue 46 | } 47 | } 48 | 49 | // MARK: - Operators 50 | 51 | extension PDFPageRotation.Angle { 52 | public static func + (lhs: Self, rhs: Self) -> Self { 53 | Self(degrees: lhs.degrees + rhs.degrees) ?? ._0degrees 54 | } 55 | 56 | public static func - (lhs: Self, rhs: Self) -> Self { 57 | Self(degrees: lhs.degrees - rhs.degrees) ?? ._0degrees 58 | } 59 | } 60 | 61 | #endif 62 | -------------------------------------------------------------------------------- /Sources/PDFProcessor/Pages/PDFPageInset.swift: -------------------------------------------------------------------------------- 1 | // 2 | // PDFPageInset.swift 3 | // swift-pdf-processor • https://github.com/orchetect/swift-pdf-processor 4 | // © 2023-2024 Steffan Andrews • Licensed under MIT License 5 | // 6 | 7 | #if canImport(PDFKit) 8 | 9 | public enum PDFPageInset { 10 | /// A literal value in points. 11 | case points(Double) 12 | 13 | /// A scale factor. 14 | /// `1.0` represents 1:1 scale to original. 15 | case scale(factor: Double) 16 | 17 | /// Preserve the inset as-is, unchanged. 18 | case passthrough 19 | } 20 | 21 | extension PDFPageInset: Equatable { } 22 | 23 | extension PDFPageInset: Hashable { } 24 | 25 | extension PDFPageInset: Sendable { } 26 | 27 | extension PDFPageInset { 28 | public var verboseDescription: String { 29 | switch self { 30 | case let .points(value): 31 | "\(value)pts" 32 | case let .scale(factor): 33 | "scaled \(factor)x" 34 | case .passthrough: 35 | "same" 36 | } 37 | } 38 | } 39 | 40 | // MARK: - Utilities 41 | 42 | extension PDFPageInset { 43 | static func rotate( 44 | top: PDFPageInset, 45 | leading: PDFPageInset, 46 | bottom: PDFPageInset, 47 | trailing: PDFPageInset, 48 | by rotation: PDFPageRotation.Angle 49 | ) -> ( 50 | top: PDFPageInset, 51 | leading: PDFPageInset, 52 | bottom: PDFPageInset, 53 | trailing: PDFPageInset 54 | ) { 55 | switch rotation { 56 | case ._0degrees: 57 | (top, leading, bottom, trailing) 58 | case ._90degrees: 59 | (trailing, top, leading, bottom) 60 | case ._180degrees: 61 | (bottom, trailing, top, leading) 62 | case ._270degrees: 63 | (leading, bottom, trailing, top) 64 | } 65 | } 66 | } 67 | 68 | #endif 69 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Custom 2 | [Dd]ev/ 3 | 4 | # Xcode 5 | 6 | # macOS 7 | .DS_Store 8 | 9 | ## Build generated 10 | build/ 11 | DerivedData/ 12 | 13 | ## Various settings 14 | *.pbxuser 15 | !default.pbxuser 16 | *.mode1v3 17 | !default.mode1v3 18 | *.mode2v3 19 | !default.mode2v3 20 | *.perspectivev3 21 | !default.perspectivev3 22 | xcuserdata/ 23 | 24 | ## Other 25 | *.moved-aside 26 | *.xccheckout 27 | *.xcscmblueprint 28 | 29 | ## Obj-C/Swift specific 30 | *.hmap 31 | *.ipa 32 | *.dSYM.zip 33 | *.dSYM 34 | 35 | ## Playgrounds 36 | timeline.xctimeline 37 | playground.xcworkspace 38 | 39 | ## SPM support in Xcode 40 | # .swiftpm - for shared CI schemes we need these checked in: 41 | # -> .swiftpm/xcode/package.xcworkspace 42 | # -> .swiftpm/xcode/xcshareddata/xcschemes/*.* 43 | 44 | # Swift Package Manager 45 | # 46 | # Add this line if you want to avoid checking in source code from Swift Package Manager dependencies. 47 | Packages/ 48 | Package.pins 49 | Package.resolved 50 | .build/ 51 | 52 | # CocoaPods 53 | # 54 | # We recommend against adding the Pods directory to your .gitignore. However 55 | # you should judge for yourself, the pros and cons are mentioned at: 56 | # https://guides.cocoapods.org/using/using-cocoapods.html#should-i-check-the-pods-directory-into-source-control 57 | 58 | Pods/ 59 | 60 | # Carthage 61 | # 62 | # Add this line if you want to avoid checking in source code from Carthage dependencies. 63 | # Carthage/Checkouts 64 | 65 | Carthage/Build 66 | 67 | # fastlane 68 | # 69 | # It is recommended to not store the screenshots in the git repo. Instead, use fastlane to re-generate the 70 | # screenshots whenever they are needed. 71 | # For more information about the recommended setup visit: 72 | # https://docs.fastlane.tools/best-practices/source-control/#source-control 73 | 74 | fastlane/report.xml 75 | fastlane/Preview.html 76 | fastlane/screenshots/**/*.png 77 | fastlane/test_output 78 | -------------------------------------------------------------------------------- /.swiftformat: -------------------------------------------------------------------------------- 1 | --acronyms ID,URL,UUID 2 | --allman false 3 | --assetliterals visual-width 4 | --beforemarks 5 | --binarygrouping 8,8 6 | --categorymark "MARK: %c" 7 | --classthreshold 0 8 | --closingparen balanced 9 | --closurevoid remove 10 | --commas inline 11 | --conflictmarkers reject 12 | --decimalgrouping ignore 13 | --elseposition same-line 14 | --emptybraces spaced 15 | --enumthreshold 0 16 | --exponentcase lowercase 17 | --exponentgrouping disabled 18 | --extensionacl on-declarations 19 | --extensionlength 0 20 | --extensionmark "MARK: - %t + %c" 21 | --fractiongrouping disabled 22 | --fragment false 23 | --funcattributes prev-line 24 | --groupedextension "MARK: %c" 25 | --guardelse auto 26 | --header "\n {file}\n swift-pdf-processor • https://github.com/orchetect/swift-pdf-processor\n © 2023-{year} Steffan Andrews • Licensed under MIT License\n" 27 | --hexgrouping 4,8 28 | --hexliteralcase uppercase 29 | --ifdef no-indent 30 | --importgrouping alpha 31 | --indent 4 32 | --indentcase false 33 | --indentstrings true 34 | --lifecycle 35 | --lineaftermarks true 36 | --linebreaks lf 37 | --markcategories true 38 | --markextensions always 39 | --marktypes always 40 | --maxwidth 120 41 | --modifierorder 42 | --nevertrailing 43 | --nospaceoperators 44 | --nowrapoperators 45 | --octalgrouping 4,8 46 | --operatorfunc spaced 47 | --organizetypes actor,class,enum,struct 48 | --patternlet hoist 49 | --ranges spaced 50 | --redundanttype infer-locals-only 51 | --self remove 52 | --selfrequired 53 | --semicolons inline 54 | --shortoptionals always 55 | --smarttabs enabled 56 | --stripunusedargs always 57 | --structthreshold 0 58 | --tabwidth unspecified 59 | --trailingclosures 60 | --trimwhitespace nonblank-lines 61 | --typeattributes preserve 62 | --typemark "MARK: - %t" 63 | --varattributes preserve 64 | --voidtype void 65 | --wraparguments before-first 66 | --wrapcollections before-first 67 | --wrapconditions after-first 68 | --wrapparameters before-first 69 | --wrapreturntype preserve 70 | --wrapternary before-operators 71 | --wraptypealiases before-first 72 | --xcodeindentation enabled 73 | --yodaswap always 74 | --disable blankLinesAroundMark,consecutiveSpaces,preferKeyPath,redundantParens,sortDeclarations,sortedImports,unusedArguments 75 | --enable blankLinesBetweenImports,blockComments,isEmpty,wrapEnumCases 76 | -------------------------------------------------------------------------------- /Sources/PDFProcessor/Annotations/PDFAnnotationFilter.swift: -------------------------------------------------------------------------------- 1 | // 2 | // PDFAnnotationFilter.swift 3 | // swift-pdf-processor • https://github.com/orchetect/swift-pdf-processor 4 | // © 2023-2024 Steffan Andrews • Licensed under MIT License 5 | // 6 | 7 | #if canImport(PDFKit) 8 | 9 | import Foundation 10 | import PDFKit 11 | 12 | public enum PDFAnnotationFilter { 13 | case all 14 | case none 15 | case include(_ types: [PDFAnnotationSubtype]) 16 | case exclude(_ types: [PDFAnnotationSubtype]) 17 | } 18 | 19 | extension PDFAnnotationFilter: Equatable { } 20 | 21 | extension PDFAnnotationFilter: Hashable { } 22 | 23 | extension PDFAnnotationFilter: Sendable { } 24 | 25 | extension PDFAnnotationFilter { 26 | func filtering( 27 | _ inputs: [PDFAnnotation] 28 | ) -> [PDFAnnotation] { 29 | switch self { 30 | case .all: 31 | return inputs 32 | 33 | case .none: 34 | return [] 35 | 36 | case let .include(types): 37 | return inputs.filter { 38 | $0.type(containedIn: types) 39 | } 40 | 41 | case let .exclude(types): 42 | return inputs.filter { 43 | !$0.type(containedIn: types) 44 | } 45 | } 46 | } 47 | 48 | func contains( 49 | _ input: PDFAnnotation 50 | ) -> Bool { 51 | switch self { 52 | case .all: 53 | return true 54 | 55 | case .none: 56 | return false 57 | 58 | case let .include(types): 59 | return input.type(containedIn: types) 60 | 61 | case let .exclude(types): 62 | return !input.type(containedIn: types) 63 | } 64 | } 65 | } 66 | 67 | extension PDFAnnotationFilter { 68 | public var verboseDescription: String { 69 | switch self { 70 | case .all: 71 | return "all annotations" 72 | 73 | case .none: 74 | return "no annotations" 75 | 76 | case let .include(types): 77 | let typesStr = types.map(\.rawValue.quoted).joined(separator: ", ") 78 | return "including \(typesStr) annotations" 79 | 80 | case let .exclude(types): 81 | let typesStr = types.map(\.rawValue.quoted).joined(separator: ", ") 82 | return "excluding \(typesStr) annotations" 83 | } 84 | } 85 | } 86 | 87 | #endif 88 | -------------------------------------------------------------------------------- /Sources/PDFProcessor/Utilties/Utils.swift: -------------------------------------------------------------------------------- 1 | // 2 | // Utils.swift 3 | // swift-pdf-processor • https://github.com/orchetect/swift-pdf-processor 4 | // © 2023-2024 Steffan Andrews • Licensed under MIT License 5 | // 6 | 7 | import Foundation 8 | internal import SwiftExtensions 9 | 10 | extension RangeExpression where Bound: Strideable { 11 | @_disfavoredOverload 12 | func isContainedWithin(values: [Bound]) -> Bool { 13 | let bounds = getAbsoluteBounds() 14 | 15 | if let min = bounds.min, let max = bounds.max { // X...X, X..= max }) 18 | return lowerInclusive && upperInclusive 19 | } else if let min = bounds.min { // X... 20 | return values.contains(where: { $0 >= min }) 21 | } else if let max = bounds.max { // ...X, ..= max }) 23 | } else { 24 | return true 25 | } 26 | } 27 | } 28 | 29 | extension URL { 30 | @available(macOS 10.12, *) 31 | @available(iOS, unavailable) 32 | @available(tvOS, unavailable) 33 | @available(watchOS, unavailable) 34 | static var desktopDirectoryBackCompat: URL { 35 | if #available(macOS 13, iOS 16, tvOS 16, watchOS 9, *) { 36 | return .desktopDirectory 37 | } else { 38 | return FileManager.default.homeDirectoryForCurrentUser 39 | .appendingPathComponent("Desktop") 40 | } 41 | } 42 | } 43 | 44 | extension CGRect { 45 | /// Rotates the rect within its parent area, redefining the origin 46 | /// 47 | /// - Parameters: 48 | /// - area: Parent area. 49 | /// - isAbsolute: If `true`, this asserts that `self` and parent `area` share the same origin. 50 | /// If `false`, the calculation is treated as relative (the parent's area is treated as 51 | /// having a zero origin). 52 | /// - Returns: The rotated rect. 53 | func rotate90Degrees( 54 | within area: CGRect, 55 | isAbsolute: Bool 56 | ) -> Self { 57 | var rect = CGRect( 58 | x: origin.y, 59 | y: area.origin.y + area.width - width - (origin.x - area.origin.x), 60 | width: height, 61 | height: width 62 | ) 63 | 64 | if !isAbsolute { 65 | rect.origin.x += area.origin.y 66 | rect.origin.y -= area.origin.x 67 | } 68 | 69 | return rect 70 | } 71 | } 72 | -------------------------------------------------------------------------------- /Sources/PDFProcessor/Files/PDFFileDescriptor.swift: -------------------------------------------------------------------------------- 1 | // 2 | // PDFFileDescriptor.swift 3 | // swift-pdf-processor • https://github.com/orchetect/swift-pdf-processor 4 | // © 2023-2024 Steffan Andrews • Licensed under MIT License 5 | // 6 | 7 | #if canImport(PDFKit) 8 | 9 | import Foundation 10 | import PDFKit 11 | 12 | /// Criteria to match a single PDF file. 13 | public enum PDFFileDescriptor { 14 | /// First file. 15 | case first 16 | 17 | /// Second file. 18 | case second 19 | 20 | /// Last file. 21 | case last 22 | 23 | /// File with given index (0-based). 24 | case index(_ idx: Int) 25 | 26 | /// File matching given filename descriptor. 27 | case filename(_ filenameDescriptor: PDFFilenameDescriptor) 28 | 29 | /// File matching against an introspection closure. 30 | case introspecting(_ introspection: PDFFileIntrospection) 31 | } 32 | 33 | extension PDFFileDescriptor: Equatable { } 34 | 35 | extension PDFFileDescriptor: Hashable { } 36 | 37 | extension PDFFileDescriptor: Sendable { } 38 | 39 | extension PDFFileDescriptor { 40 | func first(in inputs: [PDFFile]) -> PDFFile? { 41 | switch self { 42 | case .first: 43 | return inputs.first 44 | 45 | case .second: 46 | guard inputs.count > 1 else { return nil } 47 | return inputs[1] 48 | 49 | case .last: 50 | return inputs.last 51 | 52 | case let .index(idx): 53 | guard inputs.indices.contains(idx) else { return nil } 54 | return inputs[idx] 55 | 56 | case let .filename(filenameDescriptor): 57 | return inputs.first { pdf in 58 | filenameDescriptor.matches(pdf.filenameForMatching) 59 | } 60 | 61 | case let .introspecting(introspection): 62 | return inputs.first(where: { introspection.closure($0.doc) }) 63 | } 64 | } 65 | } 66 | 67 | extension PDFFileDescriptor { 68 | public var verboseDescription: String { 69 | switch self { 70 | case .first: 71 | return "first file" 72 | case .second: 73 | return "second file" 74 | case .last: 75 | return "last file" 76 | case let .index(idx): 77 | return "file with index \(idx)" 78 | case let .filename(filenameDescriptor): 79 | return "file with filename \(filenameDescriptor.verboseDescription)" 80 | case let .introspecting(introspection): 81 | return "file matching \(introspection.description)" 82 | } 83 | } 84 | } 85 | 86 | #endif 87 | -------------------------------------------------------------------------------- /Sources/PDFProcessor/Files/PDFFile.swift: -------------------------------------------------------------------------------- 1 | // 2 | // PDFFile.swift 3 | // swift-pdf-processor • https://github.com/orchetect/swift-pdf-processor 4 | // © 2023-2024 Steffan Andrews • Licensed under MIT License 5 | // 6 | 7 | #if canImport(PDFKit) 8 | 9 | import Foundation 10 | import PDFKit 11 | 12 | /// Internal wrapper for `PDFDocument` to contain metadata for processing and exporting. 13 | class PDFFile { 14 | var doc: PDFDocument 15 | var writeOptions: [PDFDocumentWriteOption: Any] 16 | private var customExportFilename: String? 17 | 18 | init( 19 | doc: PDFDocument, 20 | customExportFilename: String? = nil, 21 | writeOptions: [PDFDocumentWriteOption: Any] = [:] 22 | ) { 23 | self.doc = doc 24 | self.customExportFilename = customExportFilename 25 | self.writeOptions = writeOptions 26 | } 27 | 28 | /// Initialize with a new empty `PDFDocument`. 29 | init() { 30 | doc = PDFDocument() 31 | self.customExportFilename = nil 32 | writeOptions = [:] 33 | } 34 | } 35 | 36 | extension PDFFile: Equatable { 37 | static func == (lhs: PDFFile, rhs: PDFFile) -> Bool { 38 | lhs.doc == rhs.doc 39 | } 40 | } 41 | 42 | extension PDFFile: Hashable { 43 | func hash(into hasher: inout Hasher) { 44 | hasher.combine(doc) 45 | } 46 | } 47 | 48 | extension PDFFile: NSCopying { 49 | func copy(with zone: NSZone? = nil) -> Any { 50 | PDFFile( 51 | doc: doc.copy() as! PDFDocument, 52 | customExportFilename: customExportFilename, 53 | writeOptions: writeOptions 54 | ) 55 | } 56 | } 57 | 58 | extension PDFFile: CustomStringConvertible { 59 | var description: String { 60 | "PDFFile(\(filenameForExport(withExtension: true)))" 61 | } 62 | } 63 | 64 | extension PDFFile { 65 | /// Return the consolidated filename for export. 66 | func filenameForExport(withExtension: Bool) -> String { 67 | let base = customExportFilename 68 | ?? doc.filenameWithoutExtension?.appending("-processed") 69 | ?? "File" 70 | return withExtension ? base + ".pdf" : base 71 | } 72 | 73 | func set(filenameForExportWithoutExtension filename: String?) { 74 | customExportFilename = filename 75 | } 76 | 77 | /// Return the consolidated filename for filename text matching logic, without file extension. 78 | var filenameForMatching: String { 79 | customExportFilename 80 | ?? doc.filenameWithoutExtension 81 | ?? "" 82 | } 83 | 84 | /// Returns `true` if a custom file name was set. 85 | var hasCustomExportFilename: Bool { 86 | customExportFilename != nil 87 | } 88 | } 89 | 90 | #endif 91 | -------------------------------------------------------------------------------- /Sources/PDFProcessor/Files/PDFFilenameDescriptor.swift: -------------------------------------------------------------------------------- 1 | // 2 | // PDFFilenameDescriptor.swift 3 | // swift-pdf-processor • https://github.com/orchetect/swift-pdf-processor 4 | // © 2023-2024 Steffan Andrews • Licensed under MIT License 5 | // 6 | 7 | import Foundation 8 | internal import SwiftExtensions 9 | 10 | /// Criteria to match a PDF filename (excluding .pdf file extension). 11 | public enum PDFFilenameDescriptor { 12 | /// Exact full string match. 13 | case equals(String) 14 | 15 | /// Filename that starts with the given string. 16 | case starts(with: String) 17 | 18 | /// Filename that ends with the given string. 19 | case ends(with: String) 20 | 21 | /// Filename that contains the given string. 22 | case contains(String) 23 | 24 | /// Filename that does not start with the given string. 25 | case doesNotStart(with: String) 26 | 27 | /// Filename that does not end with the given string. 28 | case doesNotEnd(with: String) 29 | 30 | /// Filename that does not contain the given string. 31 | case doesNotContain(String) 32 | 33 | // case matches(regex: Regex) 34 | 35 | // case doesNotMatch(regex: Regex) 36 | } 37 | 38 | extension PDFFilenameDescriptor: Equatable { } 39 | 40 | extension PDFFilenameDescriptor: Hashable { } 41 | 42 | extension PDFFilenameDescriptor: Sendable { } 43 | 44 | extension PDFFilenameDescriptor { 45 | public func matches(_ source: String) -> Bool { 46 | switch self { 47 | case let .equals(string): 48 | return source == string 49 | case let .starts(prefix): 50 | return source.starts(with: prefix) 51 | case let .ends(suffix): 52 | return source.hasSuffix(suffix) 53 | case let .contains(string): 54 | return source.contains(string) 55 | case let .doesNotStart(prefix): 56 | return !source.starts(with: prefix) 57 | case let .doesNotEnd(suffix): 58 | return !source.hasSuffix(suffix) 59 | case let .doesNotContain(string): 60 | return !source.contains(string) 61 | } 62 | } 63 | } 64 | 65 | extension PDFFilenameDescriptor { 66 | public var verboseDescription: String { 67 | switch self { 68 | case let .equals(string): 69 | return string.quoted 70 | case let .starts(prefix): 71 | return "starting with \(prefix.quoted)" 72 | case let .ends(suffix): 73 | return "ending with \(suffix.quoted)" 74 | case let .contains(string): 75 | return "containing \(string.quoted)" 76 | case let .doesNotStart(prefix): 77 | return "not starting with \(prefix.quoted)" 78 | case let .doesNotEnd(suffix): 79 | return "not ending with \(suffix.quoted)" 80 | case let .doesNotContain(string): 81 | return "not containing \(string.quoted)" 82 | } 83 | } 84 | } 85 | -------------------------------------------------------------------------------- /Sources/PDFProcessor/PDFProcessor Settings.swift: -------------------------------------------------------------------------------- 1 | // 2 | // PDFProcessor Settings.swift 3 | // swift-pdf-processor • https://github.com/orchetect/swift-pdf-processor 4 | // © 2023-2024 Steffan Andrews • Licensed under MIT License 5 | // 6 | 7 | #if canImport(PDFKit) 8 | 9 | import Foundation 10 | internal import SwiftExtensions 11 | 12 | extension PDFProcessor { 13 | public struct Settings { 14 | public var sourcePDFs: [URL] 15 | public var outputDir: URL? 16 | public var operations: [PDFOperation] 17 | public var savePDFs: Bool 18 | 19 | /// Initialize with defaults for default-able parameters. 20 | public init( 21 | sourcePDFs: [URL] 22 | ) throws { 23 | self.sourcePDFs = sourcePDFs 24 | 25 | outputDir = Defaults.outputDir 26 | operations = Defaults.operations 27 | savePDFs = Defaults.savePDFs 28 | 29 | try validate() 30 | } 31 | 32 | public init( 33 | sourcePDFs: [URL], 34 | outputDir: URL?, 35 | operations: [PDFOperation], 36 | savePDFs: Bool 37 | ) throws { 38 | self.operations = operations 39 | self.sourcePDFs = sourcePDFs 40 | self.outputDir = outputDir 41 | self.savePDFs = savePDFs 42 | 43 | try validate() 44 | } 45 | } 46 | } 47 | 48 | extension PDFProcessor.Settings: Sendable { } 49 | 50 | // MARK: - Defaults 51 | 52 | extension PDFProcessor.Settings { 53 | public enum Defaults { 54 | public static let operations: [PDFOperation] = [] 55 | public static let outputDir: URL? = nil 56 | public static let savePDFs: Bool = true 57 | } 58 | } 59 | 60 | // MARK: - Validation 61 | 62 | extension PDFProcessor.Settings { 63 | public enum Validation { 64 | // public static let ... = 1 ... 10 65 | } 66 | 67 | private func validate() throws { 68 | try sourcePDFs.forEach { url in 69 | guard url.fileExists, url.isDirectory else { 70 | throw PDFProcessorError.validationError( 71 | "File does not exist at \(url.path.quoted)." 72 | ) 73 | } 74 | } 75 | 76 | if let outputDir { 77 | guard outputDir.fileExists else { 78 | throw PDFProcessorError.validationError( 79 | "Output folder does not exist at \(outputDir.path.quoted)." 80 | ) 81 | } 82 | guard outputDir.isDirectory else { 83 | throw PDFProcessorError.validationError( 84 | "Output path is not a folder: \(outputDir.path.quoted)." 85 | ) 86 | } 87 | } 88 | 89 | guard !operations.isEmpty else { 90 | throw PDFProcessorError.validationError( 91 | "No operation(s) are specified." 92 | ) 93 | } 94 | } 95 | } 96 | 97 | #endif 98 | -------------------------------------------------------------------------------- /.swiftpm/xcode/xcshareddata/xcschemes/swift-pdf-processor.xcscheme: -------------------------------------------------------------------------------- 1 | 2 | 5 | 9 | 10 | 16 | 22 | 23 | 24 | 25 | 26 | 32 | 33 | 35 | 41 | 42 | 43 | 44 | 45 | 55 | 56 | 62 | 63 | 69 | 70 | 71 | 72 | 74 | 75 | 78 | 79 | 80 | -------------------------------------------------------------------------------- /Tests/PDFProcessorTests/Pages/PDFPageRotation Tests.swift: -------------------------------------------------------------------------------- 1 | // 2 | // PDFPageRotation Tests.swift 3 | // swift-pdf-processor • https://github.com/orchetect/swift-pdf-processor 4 | // © 2023-2024 Steffan Andrews • Licensed under MIT License 5 | // 6 | 7 | #if canImport(PDFKit) 8 | 9 | @testable import PDFProcessor 10 | import Testing 11 | import TestingExtensions 12 | 13 | @Suite struct PDFPageRotationTests { 14 | @Test func angleDegrees() throws { 15 | #expect(PDFPageRotation.Angle._0degrees.degrees == 0) 16 | #expect(PDFPageRotation.Angle._90degrees.degrees == 90) 17 | #expect(PDFPageRotation.Angle._180degrees.degrees == 180) 18 | #expect(PDFPageRotation.Angle._270degrees.degrees == 270) 19 | } 20 | 21 | @Test func angleInit() throws { 22 | // in spec 23 | #expect(PDFPageRotation.Angle(degrees: 0) == ._0degrees) 24 | #expect(PDFPageRotation.Angle(degrees: 90) == ._90degrees) 25 | #expect(PDFPageRotation.Angle(degrees: 180) == ._180degrees) 26 | #expect(PDFPageRotation.Angle(degrees: 270) == ._270degrees) 27 | 28 | // wrapping positive 29 | #expect(PDFPageRotation.Angle(degrees: 360) == ._0degrees) 30 | #expect(PDFPageRotation.Angle(degrees: 360 + 90) == ._90degrees) 31 | #expect(PDFPageRotation.Angle(degrees: 360 + 360 + 90) == ._90degrees) 32 | 33 | // wrapping negative 34 | #expect(PDFPageRotation.Angle(degrees: 360 - 90) == ._270degrees) 35 | #expect(PDFPageRotation.Angle(degrees: 360 - 360 - 90) == ._270degrees) 36 | } 37 | 38 | /// Test degrees that are not multiples of 90 39 | @Test func angleInitInvalid() throws { 40 | #expect(PDFPageRotation.Angle(degrees: 1) == nil) 41 | #expect(PDFPageRotation.Angle(degrees: 89) == nil) 42 | #expect(PDFPageRotation.Angle(degrees: 91) == nil) 43 | #expect(PDFPageRotation.Angle(degrees: 179) == nil) 44 | #expect(PDFPageRotation.Angle(degrees: 181) == nil) 45 | #expect(PDFPageRotation.Angle(degrees: 269) == nil) 46 | #expect(PDFPageRotation.Angle(degrees: 271) == nil) 47 | #expect(PDFPageRotation.Angle(degrees: 359) == nil) 48 | } 49 | 50 | @Test func angleMath() throws { 51 | let _0 = PDFPageRotation.Angle._0degrees 52 | let _90 = PDFPageRotation.Angle._90degrees 53 | let _180 = PDFPageRotation.Angle._180degrees 54 | let _270 = PDFPageRotation.Angle._270degrees 55 | 56 | #expect(_0 + _0 == _0) 57 | #expect(_0 + _90 == _90) 58 | #expect(_90 + _90 == _180) 59 | #expect(_90 + _180 == _270) 60 | #expect(_180 + _90 == _270) 61 | #expect(_180 + _180 == _0) 62 | #expect(_270 + _90 == _0) 63 | #expect(_270 + _180 == _90) 64 | #expect(_270 + _270 == _180) 65 | 66 | #expect(_0 - _0 == _0) 67 | #expect(_0 - _90 == _270) 68 | #expect(_90 - _90 == _0) 69 | #expect(_90 - _180 == _270) 70 | #expect(_180 - _90 == _90) 71 | #expect(_180 - _180 == _0) 72 | #expect(_270 - _90 == _180) 73 | #expect(_270 - _180 == _90) 74 | #expect(_270 - _270 == _0) 75 | } 76 | } 77 | 78 | #endif 79 | -------------------------------------------------------------------------------- /Sources/PDFProcessor/Pages/PDFPagesFilter.swift: -------------------------------------------------------------------------------- 1 | // 2 | // PDFPagesFilter.swift 3 | // swift-pdf-processor • https://github.com/orchetect/swift-pdf-processor 4 | // © 2023-2024 Steffan Andrews • Licensed under MIT License 5 | // 6 | 7 | #if canImport(PDFKit) 8 | 9 | import Foundation 10 | internal import SwiftExtensions 11 | 12 | /// Criteria to filter PDF pages. 13 | public enum PDFPagesFilter { 14 | case all 15 | // case none 16 | case include(_ descriptors: [PDFPagesDescriptor]) 17 | case exclude(_ descriptors: [PDFPagesDescriptor]) 18 | } 19 | 20 | extension PDFPagesFilter: Equatable { } 21 | 22 | extension PDFPagesFilter: Hashable { } 23 | 24 | extension PDFPagesFilter: Sendable { } 25 | 26 | extension PDFPagesFilter { 27 | func filtering(_ inputs: [Int], sort: Bool = true) -> IndexesDiff { 28 | var included = inputs 29 | var isInclusive = true 30 | 31 | switch self { 32 | case .all: 33 | // no logic needed, just keep all input indexes 34 | isInclusive = true 35 | 36 | case let .include(descriptor): 37 | let diffed = Self.diff(indexes: included, descriptor, include: true) 38 | included = diffed.indexes 39 | isInclusive = diffed.allAreInclusive 40 | 41 | case let .exclude(descriptor): 42 | let diffed = Self.diff(indexes: included, descriptor, include: false) 43 | included = diffed.indexes 44 | isInclusive = diffed.allAreInclusive 45 | } 46 | 47 | if sort { 48 | included.sort() 49 | } 50 | 51 | let excluded = inputs.filter { 52 | !included.contains($0) 53 | } 54 | 55 | return IndexesDiff( 56 | original: inputs, 57 | included: included, 58 | excluded: excluded, 59 | isInclusive: isInclusive 60 | ) 61 | } 62 | 63 | private static func diff( 64 | indexes: [Int], 65 | _ pagesDescriptors: [PDFPagesDescriptor], 66 | include: Bool 67 | ) -> (indexes: [Int], allAreInclusive: Bool) { 68 | let filtered: (results: Set, isInclusive: Bool) = pagesDescriptors 69 | .reduce(into: (results: [], isInclusive: true)) { base, pagesDescriptor in 70 | let result = pagesDescriptor.filtering(indexes) 71 | if result.isInclusive == false { base.isInclusive = false } 72 | base.results.formUnion(result.indexes) 73 | } 74 | 75 | let indexes = include 76 | ? Array(filtered.results) 77 | : Array(indexes.filter { !filtered.results.contains($0) }) 78 | 79 | let allAreInclusive = filtered.isInclusive 80 | 81 | return (indexes: indexes, allAreInclusive: allAreInclusive) 82 | } 83 | } 84 | 85 | extension PDFPagesFilter { 86 | public var verboseDescription: String { 87 | switch self { 88 | case .all: 89 | return "all pages" 90 | 91 | case let .include(descriptors): 92 | let pageSetsStr = descriptors.map(\.verboseDescription).joined(separator: ", ") 93 | return "pages including \(pageSetsStr)" 94 | 95 | case let .exclude(descriptors): 96 | let pageSetsStr = descriptors.map(\.verboseDescription).joined(separator: ", ") 97 | return "pages excluding \(pageSetsStr)" 98 | } 99 | } 100 | } 101 | 102 | #endif 103 | -------------------------------------------------------------------------------- /Sources/PDFProcessor/Files/PDFFilesDescriptor.swift: -------------------------------------------------------------------------------- 1 | // 2 | // PDFFilesDescriptor.swift 3 | // swift-pdf-processor • https://github.com/orchetect/swift-pdf-processor 4 | // © 2023-2024 Steffan Andrews • Licensed under MIT License 5 | // 6 | 7 | #if canImport(PDFKit) 8 | 9 | import Foundation 10 | import PDFKit 11 | 12 | /// Criteria to match an arbitrary number of PDF files. 13 | public enum PDFFilesDescriptor { 14 | case all 15 | case first 16 | case second 17 | case last 18 | case index(_ idx: Int) 19 | case indexes(_ indexes: [Int]) 20 | case indexRange(_ indexRange: ClosedRange) 21 | case filename(_ filenameDescriptor: PDFFilenameDescriptor) 22 | // case filenames(_ filenameDescriptors: [PDFFilenameDescriptor]) // TODO: add this 23 | case introspecting(_ introspection: PDFFileIntrospection) 24 | } 25 | 26 | extension PDFFilesDescriptor: Equatable { } 27 | 28 | extension PDFFilesDescriptor: Hashable { } 29 | 30 | extension PDFFilesDescriptor: Sendable { } 31 | 32 | extension PDFFilesDescriptor { 33 | /// Returns `nil` in the event of an error. 34 | func filtering(_ inputs: [PDFFile]) -> [PDFFile]? { 35 | switch self { 36 | case .all: 37 | return inputs 38 | 39 | case .first: 40 | if let f = inputs.first { return [f] } else { return nil } 41 | 42 | case .second: 43 | guard inputs.count > 1 else { return nil } 44 | return [inputs[1]] 45 | 46 | case .last: 47 | if let l = inputs.last { return [l] } else { return nil } 48 | 49 | case let .index(idx): 50 | guard inputs.indices.contains(idx) else { return nil } 51 | return [inputs[idx]] 52 | 53 | case let .indexes(indexes): 54 | guard indexes.allSatisfy({ inputs.indices.contains($0) }) else { return nil } 55 | return indexes.reduce(into: []) { base, idx in 56 | base.append(inputs[idx]) 57 | } 58 | 59 | case let .indexRange(indexRange): 60 | guard indexRange.allSatisfy({ inputs.indices.contains($0) }) else { return nil } 61 | return indexRange.reduce(into: []) { base, idx in 62 | base.append(inputs[idx]) 63 | } 64 | 65 | case let .filename(filenameDescriptor): 66 | return inputs.filter { pdf in 67 | filenameDescriptor.matches(pdf.filenameForMatching) 68 | } 69 | 70 | case let .introspecting(introspection): 71 | return inputs.filter { introspection.closure($0.doc) } 72 | } 73 | } 74 | } 75 | 76 | extension PDFFilesDescriptor { 77 | public var verboseDescription: String { 78 | switch self { 79 | case .all: 80 | return "all files" 81 | case .first: 82 | return "first file" 83 | case .second: 84 | return "second file" 85 | case .last: 86 | return "last file" 87 | case let .index(idx): 88 | return "file with index \(idx)" 89 | case let .indexes(idxes): 90 | return "files with indexes \(idxes.map { String($0) }.joined(separator: ", "))" 91 | case let .indexRange(range): 92 | return "files with index range \(range.lowerBound)-\(range.upperBound))" 93 | case let .filename(filenameDescriptor): 94 | return "files with filename \(filenameDescriptor.verboseDescription)" 95 | case let .introspecting(introspection): 96 | return "files matching \(introspection.description)" 97 | } 98 | } 99 | } 100 | 101 | #endif 102 | -------------------------------------------------------------------------------- /Sources/PDFProcessor/Files/PDFFileSplitDescriptor.swift: -------------------------------------------------------------------------------- 1 | // 2 | // PDFFileSplitDescriptor.swift 3 | // swift-pdf-processor • https://github.com/orchetect/swift-pdf-processor 4 | // © 2023-2024 Steffan Andrews • Licensed under MIT License 5 | // 6 | 7 | #if canImport(PDFKit) 8 | 9 | import Foundation 10 | internal import SwiftExtensions 11 | import PDFKit 12 | 13 | /// Criteria for splitting a PDF file. 14 | public enum PDFFileSplitDescriptor { 15 | case at(pageIndexes: [Int]) 16 | case every(pageCount: Int) 17 | case pageIndexesAndFilenames([PDFOperation.PageRangeAndFilename]) 18 | case pageNumbersAndFilenames([PDFOperation.PageRangeAndFilename]) 19 | 20 | // TODO: add fileCount(Int) case to split a file into n number of files with equal number of pages each 21 | } 22 | 23 | extension PDFFileSplitDescriptor: Equatable { } 24 | 25 | extension PDFFileSplitDescriptor: Hashable { } 26 | 27 | extension PDFFileSplitDescriptor: Sendable { } 28 | 29 | extension PDFFileSplitDescriptor { 30 | func splits(source: PDFFile) -> [PDFOperation.PageRangeAndFilename] { 31 | var splits: [PDFOperation.PageRangeAndFilename] = [] 32 | 33 | switch self { 34 | case let .at(pageIndexes): 35 | // also removes dupes and sorts 36 | let ranges = convertPageIndexesToRanges( 37 | pageIndexes: pageIndexes, 38 | totalPageCount: source.doc.pageCount 39 | ) 40 | for range in ranges { 41 | splits.append(.init(range, nil)) 42 | } 43 | 44 | case var .every(nthPage): 45 | nthPage = nthPage.clamped(to: 1...) 46 | 47 | // Check to see that at least two resulting files will occur 48 | if nthPage >= source.doc.pageCount { 49 | return [] 50 | } 51 | 52 | let ranges = (0 ..< source.doc.pageCount) 53 | .split(every: nthPage) 54 | splits = ranges.map { .init($0, String?.none) } 55 | 56 | case let .pageIndexesAndFilenames(pageIndexesAndFilenames): 57 | splits = pageIndexesAndFilenames 58 | 59 | case let .pageNumbersAndFilenames(pageNumbersAndFilenames): 60 | var mappedToIndexes = pageNumbersAndFilenames 61 | for index in mappedToIndexes.indices { 62 | mappedToIndexes[index].pageRange = 63 | mappedToIndexes[index].pageRange.lowerBound - 1 64 | ... mappedToIndexes[index].pageRange.upperBound - 1 65 | } 66 | splits = mappedToIndexes 67 | } 68 | 69 | return splits 70 | } 71 | 72 | func convertPageIndexesToRanges(pageIndexes: [Int], totalPageCount: Int) -> [ClosedRange] { 73 | var ranges: [ClosedRange] = [] 74 | var lastIndex = 0 75 | for endIndex in pageIndexes.removingDuplicates(.afterFirstOccurrences).sorted() { 76 | ranges.append(lastIndex ... endIndex) 77 | lastIndex = endIndex + 1 78 | } 79 | // add final split 80 | if lastIndex <= totalPageCount { 81 | ranges.append(lastIndex ... totalPageCount - 1) 82 | } 83 | return ranges 84 | } 85 | } 86 | 87 | extension PDFFileSplitDescriptor { 88 | public var verboseDescription: String { 89 | switch self { 90 | case let .at(pageIndexes): 91 | return "at page indexes \(pageIndexes.map { String($0) }.joined(separator: ", "))" 92 | case let .every(pageCount): 93 | return "every \(pageCount) page\(pageCount == 1 ? "" : "s")" 94 | case let .pageIndexesAndFilenames(splits): 95 | return "at \(splits.count) named splits" 96 | case let .pageNumbersAndFilenames(splits): 97 | return "at \(splits.count) named splits" 98 | } 99 | } 100 | } 101 | 102 | #endif 103 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # swift-pdf-processor 2 | 3 | [![Platforms - macOS 11+ | iOS 14+ | visionOS 1+](https://img.shields.io/badge/platforms-macOS%2011+%20|%20iOS%2014+%20|%20visionOS%201+-lightgrey.svg?style=flat)](https://developer.apple.com/swift) ![Swift 6.0](https://img.shields.io/badge/Swift-6.0-orange.svg?style=flat) [![Xcode 16](https://img.shields.io/badge/Xcode-16-blue.svg?style=flat)](https://developer.apple.com/swift) [![License: MIT](http://img.shields.io/badge/license-MIT-lightgrey.svg?style=flat)](https://github.com/orchetect/swift-pdf-processor/blob/main/LICENSE) 4 | 5 | Batch PDF utilities with simple API for Swift. Declarative API for: 6 | 7 | - assigning or removing file attributes (metadata) 8 | - file filtering, ordering, and merging 9 | - page management: reordering, collation, copying, moving, and replacement 10 | - page presentation: rotation, cropping, etc. 11 | - page content: filtering, removal or burn-in of annotations, removal of file protections 12 | 13 | ## Installation 14 | 15 | ### Swift Package Manager (SPM) 16 | 17 | To add this package to an Xcode app project, use: 18 | 19 | `https://github.com/orchetect/swift-pdf-processor` as the URL. 20 | 21 | To add this package to a Swift package, add the dependency to your package and target in Package.swift: 22 | 23 | ```swift 24 | let package = Package( 25 | dependencies: [ 26 | .package(url: "https://github.com/orchetect/swift-pdf-processor", from: "0.3.0") 27 | ], 28 | targets: [ 29 | .target( 30 | dependencies: [ 31 | .product(name: "PDFProcessor", package: "swift-pdf-processor") 32 | ] 33 | ) 34 | ] 35 | ) 36 | ``` 37 | 38 | ## Basic Usage 39 | 40 | ```swift 41 | import PDFProcessor 42 | 43 | let sources = [URL, URL, URL, ...] // URLs to one or more PDF files 44 | let outputDir = URL.desktopDirectory 45 | ``` 46 | 47 | The steps of loading source PDFs, performing operations, and saving the resulting PDFs can be performed individually: 48 | 49 | ```swift 50 | let processor = PDFProcessor() 51 | 52 | try processor.load(pdfs: sources) 53 | try processor.perform(operations: [ 54 | // one or more operations 55 | ]) 56 | 57 | // access the resulting PDF documents in memory 58 | processor.pdfDocuments // [PDFDocument] 59 | 60 | // or save them as PDF files to disk 61 | try processor.savePDFs(outputDir: outputDir) 62 | ``` 63 | 64 | Or a fully automated batch operation can be run with a single call to `run()` by passing in a populated instance of `PDFProcessor.Settings`. 65 | 66 | ```swift 67 | let settings = try PDFProcessor.Settings( 68 | sourcePDFs: sources, 69 | outputDir: outputDir, 70 | operations: [ 71 | // one or more operations 72 | ], 73 | savePDFs: true 74 | ) 75 | 76 | try PDFProcessor().run(using: settings) 77 | ``` 78 | 79 | ## Batch Operations 80 | 81 | The following are single operations that may be used in a batch sequence of operations. 82 | 83 | > [!NOTE] 84 | > More operations may be added in future on an as-needed basis. 85 | 86 | ### File Operations 87 | 88 | - New empty file 89 | - Clone file 90 | - Filter files 91 | - Merge files 92 | - Set file filename(s) 93 | - Set or remove file attributes (metadata such as title, author, etc.) 94 | - Remove file protections (encryption and permissions) 95 | 96 | ### Page Operations 97 | 98 | - Filter pages 99 | - Copy pages 100 | - Move pages 101 | - Replace pages by copying or moving them 102 | - Reverse page order (all or subset of pages) 103 | - Rotate pages 104 | - Crop pages 105 | - Split file into multiple files 106 | 107 | ### Page Content Operations 108 | 109 | - Filter annotations (by types, or remove all) 110 | - Burn in annotations 111 | - Extract plain text (to system pasteboard, to file on disk, or to variable in memory) 112 | 113 | ## Author 114 | 115 | Coded by a bunch of 🐹 hamsters in a trenchcoat that calls itself [@orchetect](https://github.com/orchetect). 116 | 117 | ## License 118 | 119 | Licensed under the MIT license. See [LICENSE](/LICENSE) for details. 120 | 121 | ## Sponsoring 122 | 123 | If you enjoy using swift-pdf-processor and want to contribute to open-source financially, GitHub sponsorship is much appreciated. Feedback and code contributions are also welcome. 124 | 125 | ## Community & Support 126 | 127 | Please do not email maintainers for technical support. Several options are available for issues and questions: 128 | 129 | - Questions and feature ideas can be posted to [Discussions](https://github.com/orchetect/swift-pdf-processor/discussions). 130 | - If an issue is a verifiable bug with reproducible steps it may be posted in [Issues](https://github.com/orchetect/swift-pdf-processor/issues). 131 | 132 | ## Contributions 133 | 134 | Contributions are welcome. Posting in [Discussions](https://github.com/orchetect/swift-pdf-processor/discussions) first prior to new submitting PRs for features or modifications is encouraged. 135 | 136 | ## Legacy 137 | 138 | This repository was formerly known as PDFGadget. 139 | -------------------------------------------------------------------------------- /Tests/PDFProcessorTests/Utils Tests.swift: -------------------------------------------------------------------------------- 1 | // 2 | // Utils Tests.swift 3 | // swift-pdf-processor • https://github.com/orchetect/swift-pdf-processor 4 | // © 2023-2024 Steffan Andrews • Licensed under MIT License 5 | // 6 | 7 | import CoreGraphics 8 | @testable import PDFProcessor 9 | import Testing 10 | 11 | @Suite struct CGRectTests { 12 | // MARK: - Absolute 13 | 14 | @Test func testRotate90Degrees_ZeroOrigin_CommonOrigin() { 15 | let area = CGRect(x: 0, y: 0, width: 10, height: 13) 16 | let rect = CGRect(x: 3, y: 4, width: 6, height: 7) 17 | 18 | let newRect = rect.rotate90Degrees(within: area, isAbsolute: true) 19 | 20 | #expect(newRect.origin.x == 4.0) 21 | #expect(newRect.origin.y == 1.0) 22 | #expect(newRect.width == 7.0) 23 | #expect(newRect.height == 6.0) 24 | } 25 | 26 | @Test func testRotate90Degrees_ZeroOrigin_Absolute_360() { 27 | let area = CGRect(x: 0, y: 0, width: 10, height: 13) 28 | let rect = CGRect(x: 3, y: 4, width: 6, height: 7) 29 | 30 | let newRect = rect 31 | .rotate90Degrees(within: area, isAbsolute: true) 32 | .rotate90Degrees(within: area, isAbsolute: true) 33 | .rotate90Degrees(within: area, isAbsolute: true) 34 | .rotate90Degrees(within: area, isAbsolute: true) 35 | 36 | #expect(newRect.origin.x == rect.origin.x) 37 | #expect(newRect.origin.y == rect.origin.y) 38 | #expect(newRect.width == rect.width) 39 | #expect(newRect.height == rect.height) 40 | } 41 | 42 | @Test func testRotate90Degrees_NonZeroOrigin_CommonOrigin() { 43 | let ox: CGFloat = 2 44 | let oy: CGFloat = 6 45 | let area = CGRect(x: ox, y: oy, width: 10, height: 13) 46 | let rect = CGRect(x: ox + 3, y: oy + 4, width: 6, height: 7) 47 | 48 | let newRect = rect.rotate90Degrees(within: area, isAbsolute: true) 49 | 50 | #expect(newRect.origin.x == oy + 4.0) 51 | #expect(newRect.origin.y == ox + 5.0) 52 | #expect(newRect.width == 7.0) 53 | #expect(newRect.height == 6.0) 54 | } 55 | 56 | @Test func testRotate90Degrees_NonZeroOrigin_Absolute_360() { 57 | let ox: CGFloat = 2 58 | let oy: CGFloat = 6 59 | let area = CGRect(x: ox, y: oy, width: 10, height: 13) 60 | let rect = CGRect(x: ox + 3, y: oy + 4, width: 6, height: 7) 61 | 62 | let newRect = rect 63 | .rotate90Degrees(within: area, isAbsolute: true) 64 | .rotate90Degrees(within: area, isAbsolute: true) 65 | .rotate90Degrees(within: area, isAbsolute: true) 66 | .rotate90Degrees(within: area, isAbsolute: true) 67 | 68 | #expect(newRect.origin.x == rect.origin.x) 69 | #expect(newRect.origin.y == rect.origin.y) 70 | #expect(newRect.width == rect.width) 71 | #expect(newRect.height == rect.height) 72 | } 73 | 74 | // MARK: - Relative 75 | 76 | @Test func testRotate90Degrees_ZeroOrigin_Relative() { 77 | let area = CGRect(x: 0, y: 0, width: 10, height: 13) 78 | let rect = CGRect(x: 3, y: 4, width: 6, height: 7) 79 | 80 | let newRect = rect.rotate90Degrees(within: area, isAbsolute: false) 81 | 82 | #expect(newRect.origin.x == 4.0) 83 | #expect(newRect.origin.y == 1.0) 84 | #expect(newRect.width == 7.0) 85 | #expect(newRect.height == 6.0) 86 | } 87 | 88 | @Test func testRotate90Degrees_ZeroOrigin_Relative_360() { 89 | let area = CGRect(x: 0, y: 0, width: 10, height: 13) 90 | let rect = CGRect(x: 3, y: 4, width: 6, height: 7) 91 | 92 | let newRect = rect 93 | .rotate90Degrees(within: area, isAbsolute: false) 94 | .rotate90Degrees(within: area, isAbsolute: false) 95 | .rotate90Degrees(within: area, isAbsolute: false) 96 | .rotate90Degrees(within: area, isAbsolute: false) 97 | 98 | #expect(newRect.origin.x == rect.origin.x) 99 | #expect(newRect.origin.y == rect.origin.y) 100 | #expect(newRect.width == rect.width) 101 | #expect(newRect.height == rect.height) 102 | } 103 | 104 | @Test func testRotate90Degrees_NonZeroOrigin_Relative() { 105 | let ox: CGFloat = 2 106 | let oy: CGFloat = 6 107 | let area = CGRect(x: ox, y: oy, width: 10, height: 13) 108 | let rect = CGRect(x: 3, y: 4, width: 6, height: 7) 109 | 110 | let newRect = rect.rotate90Degrees(within: area, isAbsolute: false) 111 | 112 | #expect(newRect.origin.x == oy + 4.0) 113 | #expect(newRect.origin.y == ox + 5.0) 114 | #expect(newRect.width == 7.0) 115 | #expect(newRect.height == 6.0) 116 | } 117 | 118 | @Test func testRotate90Degrees_NonZeroOrigin_Relative_360() { 119 | let ox: CGFloat = 2 120 | let oy: CGFloat = 6 121 | let area = CGRect(x: ox, y: oy, width: 10, height: 13) 122 | let rect = CGRect(x: 3, y: 4, width: 6, height: 7) 123 | 124 | let newRect = rect 125 | .rotate90Degrees(within: area, isAbsolute: false) 126 | .rotate90Degrees(within: area, isAbsolute: false) 127 | .rotate90Degrees(within: area, isAbsolute: false) 128 | .rotate90Degrees(within: area, isAbsolute: false) 129 | 130 | #expect(newRect.origin.x == rect.origin.x) 131 | #expect(newRect.origin.y == rect.origin.y) 132 | #expect(newRect.width == rect.width) 133 | #expect(newRect.height == rect.height) 134 | } 135 | } 136 | -------------------------------------------------------------------------------- /Tests/PDFProcessorTests/Pages/PDFPagesFilter Tests.swift: -------------------------------------------------------------------------------- 1 | // 2 | // PDFPagesFilter Tests.swift 3 | // swift-pdf-processor • https://github.com/orchetect/swift-pdf-processor 4 | // © 2023-2024 Steffan Andrews • Licensed under MIT License 5 | // 6 | 7 | #if canImport(PDFKit) 8 | 9 | @testable import PDFProcessor 10 | import Testing 11 | import TestingExtensions 12 | 13 | @Suite struct PDFPagesFilterTests { 14 | @Test func all() throws { 15 | let filter: PDFPagesFilter = .all 16 | 17 | do { 18 | let filtered = filter.filtering([]) 19 | #expect(filtered.included == []) 20 | #expect(filtered.excluded == []) 21 | #expect(filtered.isInclusive) 22 | } 23 | 24 | do { 25 | let filtered = filter.filtering([1, 3, 4, 5]) 26 | #expect(filtered.included == [1, 3, 4, 5]) 27 | #expect(filtered.excluded == []) 28 | #expect(filtered.isInclusive) 29 | } 30 | } 31 | 32 | @Test func include_OddNumbers() throws { 33 | let filter: PDFPagesFilter = .include([.oddNumbers]) 34 | 35 | do { 36 | let filtered = filter.filtering([]) 37 | #expect(filtered.included == []) 38 | #expect(filtered.excluded == []) 39 | #expect(!filtered.isInclusive) 40 | } 41 | 42 | do { 43 | let filtered = filter.filtering([1, 3, 4, 5]) 44 | #expect(filtered.included == [1, 4]) 45 | #expect(filtered.excluded == [3, 5]) 46 | #expect(filtered.isInclusive) 47 | } 48 | } 49 | 50 | @Test func include_EvenNumbers() throws { 51 | let filter: PDFPagesFilter = .include([.evenNumbers]) 52 | 53 | do { 54 | let filtered = filter.filtering([]) 55 | #expect(filtered.included == []) 56 | #expect(filtered.excluded == []) 57 | #expect(!filtered.isInclusive) 58 | } 59 | 60 | do { 61 | let filtered = filter.filtering([1, 3, 4, 5]) 62 | #expect(filtered.included == [3, 5]) 63 | #expect(filtered.excluded == [1, 4]) 64 | #expect(filtered.isInclusive) 65 | } 66 | } 67 | 68 | @Test func include_Multiple_isInclusive() throws { 69 | let filter: PDFPagesFilter = .include([ 70 | .pages(indexes: [0]), 71 | .range(indexes: 2 ... 3) 72 | ]) 73 | 74 | do { 75 | let filtered = filter.filtering([]) 76 | #expect(filtered.included == []) 77 | #expect(filtered.excluded == []) 78 | #expect(!filtered.isInclusive) 79 | } 80 | 81 | do { 82 | let filtered = filter.filtering([1, 3, 4, 5, 6]) 83 | #expect(filtered.included == [1, 4, 5]) 84 | #expect(filtered.excluded == [3, 6]) 85 | #expect(filtered.isInclusive) 86 | } 87 | } 88 | 89 | @Test func include_Multiple_isNotInclusive_FirstNotInclusive() throws { 90 | let filter: PDFPagesFilter = .include([ 91 | .pages(indexes: [4]), 92 | .range(indexes: 2 ... 3) 93 | ]) 94 | 95 | do { 96 | let filtered = filter.filtering([]) 97 | #expect(filtered.included == []) 98 | #expect(filtered.excluded == []) 99 | #expect(!filtered.isInclusive) 100 | } 101 | 102 | do { 103 | let filtered = filter.filtering([1, 3, 4, 5]) 104 | #expect(filtered.included == [4, 5]) 105 | #expect(filtered.excluded == [1, 3]) 106 | #expect(!filtered.isInclusive) 107 | } 108 | } 109 | 110 | @Test func include_Multiple_isNotInclusive_LastNotInclusive() throws { 111 | let filter: PDFPagesFilter = .include([ 112 | .pages(indexes: [0]), 113 | .range(indexes: 2 ... 3) 114 | ]) 115 | 116 | do { 117 | let filtered = filter.filtering([]) 118 | #expect(filtered.included == []) 119 | #expect(filtered.excluded == []) 120 | #expect(!filtered.isInclusive) 121 | } 122 | 123 | do { 124 | let filtered = filter.filtering([1, 3, 4]) 125 | #expect(filtered.included == [1, 4]) 126 | #expect(filtered.excluded == [3]) 127 | #expect(!filtered.isInclusive) 128 | } 129 | } 130 | 131 | @Test func exclude_OddNumbers() throws { 132 | let filter: PDFPagesFilter = .exclude([.oddNumbers]) 133 | 134 | do { 135 | let filtered = filter.filtering([]) 136 | #expect(filtered.included == []) 137 | #expect(filtered.excluded == []) 138 | #expect(!filtered.isInclusive) 139 | } 140 | 141 | do { 142 | let filtered = filter.filtering([1, 3, 4, 5]) 143 | #expect(filtered.included == [3, 5]) 144 | #expect(filtered.excluded == [1, 4]) 145 | #expect(filtered.isInclusive) 146 | } 147 | } 148 | 149 | @Test func exclude_EvenNumbers() throws { 150 | let filter: PDFPagesFilter = .exclude([.evenNumbers]) 151 | 152 | do { 153 | let filtered = filter.filtering([]) 154 | #expect(filtered.included == []) 155 | #expect(filtered.excluded == []) 156 | #expect(!filtered.isInclusive) 157 | } 158 | 159 | do { 160 | let filtered = filter.filtering([1, 3, 4, 5]) 161 | #expect(filtered.included == [1, 4]) 162 | #expect(filtered.excluded == [3, 5]) 163 | #expect(filtered.isInclusive) 164 | } 165 | } 166 | 167 | @Test func exclude_Multiple_isInclusive() throws { 168 | let filter: PDFPagesFilter = .exclude([ 169 | .pages(indexes: [0]), 170 | .range(indexes: 2 ... 3) 171 | ]) 172 | 173 | do { 174 | let filtered = filter.filtering([]) 175 | #expect(filtered.included == []) 176 | #expect(filtered.excluded == []) 177 | #expect(!filtered.isInclusive) 178 | } 179 | 180 | do { 181 | let filtered = filter.filtering([1, 3, 4, 5, 6]) 182 | #expect(filtered.included == [3, 6]) 183 | #expect(filtered.excluded == [1, 4, 5]) 184 | #expect(filtered.isInclusive) 185 | } 186 | } 187 | } 188 | 189 | #endif 190 | -------------------------------------------------------------------------------- /Sources/PDFProcessor/Utilties/PDFKit Extensions.swift: -------------------------------------------------------------------------------- 1 | // 2 | // PDFKit Extensions.swift 3 | // swift-pdf-processor • https://github.com/orchetect/swift-pdf-processor 4 | // © 2023-2024 Steffan Andrews • Licensed under MIT License 5 | // 6 | 7 | #if canImport(PDFKit) 8 | 9 | import Foundation 10 | import PDFKit 11 | 12 | extension PDFDocument { 13 | // MARK: - Page Indexes 14 | 15 | public var pageRange: Range { 16 | 0 ..< pageCount 17 | } 18 | 19 | public func pageIndexes() -> [Int] { 20 | Array(pageRange) 21 | } 22 | 23 | public func pageIndexes(at range: Range) throws -> [Int] { 24 | guard pageRange.contains(range) else { 25 | throw PDFProcessorError.runtimeError( 26 | "Page index out of range." 27 | ) 28 | } 29 | return Array(range) 30 | } 31 | 32 | public func pageIndexes(at range: ClosedRange) throws -> [Int] { 33 | guard pageRange.contains(range) else { 34 | throw PDFProcessorError.runtimeError( 35 | "Page index out of range." 36 | ) 37 | } 38 | return Array(range) 39 | } 40 | 41 | public func pageIndexes( 42 | filter: PDFPagesFilter 43 | ) throws -> IndexesDiff { 44 | filter.filtering(pageIndexes()) 45 | } 46 | 47 | // MARK: - Page Access 48 | 49 | public func pages(at range: Range, copy: Bool = false) throws -> [PDFPage] { 50 | try pages(at: pageIndexes(at: range), copy: copy) 51 | } 52 | 53 | public func pages(at range: ClosedRange, copy: Bool = false) throws -> [PDFPage] { 54 | try pages(at: pageIndexes(at: range), copy: copy) 55 | } 56 | 57 | public func pages(at indexes: [Int]? = nil, copy: Bool = false) throws -> [PDFPage] { 58 | let i = indexes ?? pageIndexes() 59 | let getPages = i.compactMap { page(at: $0) } 60 | guard i.count == getPages.count else { 61 | throw PDFProcessorError.runtimeError( 62 | "Error while enumerating pages." 63 | ) 64 | } 65 | return copy ? getPages.map { $0.copy() as! PDFPage } : getPages 66 | } 67 | 68 | public func pages(for filter: PDFPagesFilter, copy: Bool = false) throws -> [PDFPage] { 69 | try pages(at: pageIndexes(filter: filter).included, copy: copy) 70 | } 71 | 72 | // MARK: - Page Operations 73 | 74 | public func append(page: PDFPage) { 75 | insert(page, at: pageCount) 76 | } 77 | 78 | public func append(pages: [PDFPage]) { 79 | for page in pages { 80 | append(page: page) 81 | } 82 | } 83 | 84 | public func insert(_ pages: [PDFPage], at index: Int) throws { 85 | guard pageRange.contains(index) || pageCount == index else { 86 | throw PDFProcessorError.runtimeError( 87 | "Page index is out of range." 88 | ) 89 | } 90 | 91 | for page in pages.reversed() { 92 | insert(page, at: index) 93 | } 94 | } 95 | 96 | public func replaceAllPages( 97 | with pages: S 98 | ) throws where S.Element == PDFPage { 99 | try removeAllPages() 100 | for (pageIndex, page) in pages.enumerated() { 101 | insert(page, at: pageIndex) 102 | } 103 | guard pageCount == pages.count else { 104 | throw PDFProcessorError.runtimeError( 105 | "Failed to replace all pages; page count differs." 106 | ) 107 | } 108 | } 109 | 110 | public func removeAllPages() throws { 111 | for item in pageRange.reversed() { 112 | removePage(at: item) 113 | } 114 | guard pageCount == 0 else { 115 | throw PDFProcessorError.runtimeError( 116 | "Failed to remove all pages." 117 | ) 118 | } 119 | } 120 | 121 | public func removePages(at indexes: [Int]) throws { 122 | guard indexes.allSatisfy(pageRange.contains(_:)) else { 123 | throw PDFProcessorError.runtimeError( 124 | "One or more page indexes were not found while attempting to remove pages." 125 | ) 126 | } 127 | 128 | let originalPageCount = pageCount 129 | 130 | for index in indexes.sorted().reversed() { 131 | removePage(at: index) 132 | } 133 | 134 | let postPageCount = pageCount 135 | 136 | guard originalPageCount - postPageCount == indexes.count else { 137 | throw PDFProcessorError.runtimeError( 138 | "Failed to remove pages. Resulting page count is not as expected." 139 | ) 140 | } 141 | } 142 | 143 | public func exchangePage(at index: Int, withPage other: PDFPage, copy: Bool = false) throws { 144 | guard pageIndexes().contains(index) else { 145 | throw PDFProcessorError.runtimeError( 146 | "Failed to replace page. Index is out of bounds: \(index)." 147 | ) 148 | } 149 | 150 | let newPage = copy ? other.copy() as! PDFPage : other 151 | 152 | removePage(at: index) 153 | insert(newPage, at: index) 154 | } 155 | 156 | // MARK: - File Info 157 | 158 | public var filenameWithoutExtension: String? { 159 | documentURL?.deletingPathExtension().lastPathComponent 160 | } 161 | 162 | // MARK: - Protections 163 | 164 | /// Attempts to remove protections of the PDF file. 165 | /// If successful, returns a new copy of the document. 166 | /// 167 | /// This is based on the premise that copying pages to a new empty PDF document 168 | /// will strip the file's protections. This still works as of macOS 15 but it 169 | /// may break in future revisions to PDFKit. 170 | /// 171 | /// Note that the returned document will not inherit the source document's `documentURL`. 172 | @_disfavoredOverload 173 | public func unprotectedCopy() throws -> PDFDocument { 174 | let newPDF = PDFDocument() 175 | 176 | // document attributes 177 | newPDF.documentAttributes = documentAttributes 178 | 179 | // copy pages 180 | try (0 ..< pageCount) 181 | .forEach { pageIndex in 182 | guard let pageCopy = page(at: pageIndex)?.copy() as? PDFPage else { 183 | throw CocoaError(.fileLocking) 184 | } 185 | newPDF.insert(pageCopy, at: pageIndex) 186 | } 187 | 188 | return newPDF 189 | } 190 | } 191 | 192 | extension PDFAnnotation { 193 | func matches(subType: PDFAnnotationSubtype) -> Bool { 194 | guard let annoType = type else { return false } 195 | 196 | // includes a workaround for an inexplicable issue where 197 | // PDFAnnotationSubtype.rawValue adds a forward-slash character 198 | 199 | return subType.rawValue == annoType || 200 | subType.rawValue == "/" + annoType 201 | } 202 | 203 | func type(containedIn subTypes: [PDFAnnotationSubtype]) -> Bool { 204 | subTypes.contains { matches(subType: $0) } 205 | } 206 | } 207 | 208 | #endif 209 | -------------------------------------------------------------------------------- /Sources/PDFProcessor/Pages/PDFPageArea.swift: -------------------------------------------------------------------------------- 1 | // 2 | // PDFPageArea.swift 3 | // swift-pdf-processor • https://github.com/orchetect/swift-pdf-processor 4 | // © 2023-2024 Steffan Andrews • Licensed under MIT License 5 | // 6 | 7 | #if canImport(PDFKit) 8 | 9 | import Foundation 10 | internal import SwiftExtensions 11 | 12 | public enum PDFPageArea { 13 | /// Literal inset values. 14 | /// A value of zero represents no change. 15 | case insets( 16 | top: PDFPageInset = .passthrough, 17 | leading: PDFPageInset = .passthrough, 18 | bottom: PDFPageInset = .passthrough, 19 | trailing: PDFPageInset = .passthrough 20 | ) 21 | 22 | /// Literal bounds values in points. 23 | case rect( 24 | x: Double, 25 | y: Double, 26 | width: Double, 27 | height: Double 28 | ) 29 | } 30 | 31 | extension PDFPageArea: Equatable { } 32 | 33 | extension PDFPageArea: Hashable { } 34 | 35 | extension PDFPageArea: Sendable { } 36 | 37 | extension PDFPageArea { 38 | public var verboseDescription: String { 39 | switch self { 40 | case let .insets(top, leading, bottom, trailing): 41 | return "insets top: \(top.verboseDescription), leading: \(leading.verboseDescription), bottom: \(bottom.verboseDescription), trailing: \(trailing.verboseDescription)" 42 | case let .rect(x, y, width, height): 43 | return "area x:\(x) y:\(y) w:\(width) h:\(height)" 44 | } 45 | } 46 | } 47 | 48 | // MARK: - Static Constructors 49 | 50 | extension PDFPageArea { 51 | /// Scale the bounds by a uniform scale factor. 52 | /// `1.0` represents 1:1 scale to original. 53 | public static func scale(factor: Double) -> Self { 54 | .insets( 55 | top: .scale(factor: factor), 56 | leading: .scale(factor: factor), 57 | bottom: .scale(factor: factor), 58 | trailing: .scale(factor: factor) 59 | ) 60 | } 61 | 62 | #if os(macOS) 63 | public static func insets(_ insets: NSEdgeInsets) -> Self { 64 | .insets( 65 | top: .points(insets.top), 66 | leading: .points(insets.left), 67 | bottom: .points(insets.bottom), 68 | trailing: .points(insets.right) 69 | ) 70 | } 71 | #endif 72 | 73 | public static func rect(_ rect: CGRect) -> Self { 74 | .rect( 75 | x: rect.origin.x, 76 | y: rect.origin.y, 77 | width: rect.width, 78 | height: rect.height 79 | ) 80 | } 81 | } 82 | 83 | #if canImport(SwiftUI) 84 | import SwiftUI 85 | 86 | extension PDFPageArea { 87 | @available(macOS 10.15, iOS 13.0, tvOS 13.0, watchOS 6.0, *) 88 | public static func insets(_ insets: EdgeInsets) -> Self { 89 | .insets( 90 | top: .points(insets.top), 91 | leading: .points(insets.leading), 92 | bottom: .points(insets.bottom), 93 | trailing: .points(insets.trailing) 94 | ) 95 | } 96 | } 97 | #endif 98 | 99 | // MARK: - Methods 100 | 101 | extension PDFPageArea { 102 | public func rect( 103 | for source: CGRect, 104 | rotation: PDFPageRotation.Angle = ._0degrees 105 | ) -> CGRect { 106 | switch self { 107 | case let .insets(top, leading, bottom, trailing): 108 | let (top, leading, bottom, trailing) = PDFPageInset.rotate( 109 | top: top, 110 | leading: leading, 111 | bottom: bottom, 112 | trailing: trailing, 113 | by: rotation 114 | ) 115 | 116 | var x = source.origin.x 117 | var y = source.origin.y 118 | var width = source.width 119 | var height = source.height 120 | 121 | switch top { 122 | case let .points(value): 123 | height += value 124 | case var .scale(factor): 125 | factor = factor.clamped(to: 0.01 ... 100.0) 126 | height *= factor 127 | case .passthrough: 128 | break 129 | } 130 | 131 | switch leading { 132 | case let .points(value): 133 | width += value 134 | x -= value 135 | case var .scale(factor): 136 | factor = factor.clamped(to: 0.01 ... 100.0) 137 | width *= factor 138 | x += (source.width - (source.width * factor)) 139 | case .passthrough: 140 | break 141 | } 142 | 143 | switch bottom { 144 | case let .points(value): 145 | height += value 146 | y -= value 147 | case var .scale(factor): 148 | factor = factor.clamped(to: 0.01 ... 100.0) 149 | height *= factor 150 | y += (source.height - (source.height * factor)) 151 | case .passthrough: 152 | break 153 | } 154 | 155 | switch trailing { 156 | case let .points(value): 157 | width += value 158 | case var .scale(factor): 159 | factor = factor.clamped(to: 0.01 ... 100.0) 160 | width *= factor 161 | case .passthrough: 162 | break 163 | } 164 | 165 | // TODO: Add additional guards for validation checks to prevent inversions 166 | 167 | return CGRect(x: x, y: y, width: width, height: height) 168 | 169 | case let .rect(x, y, width, height): 170 | // associated values define a rect in the current rotation presentation, 171 | // however PDFKit treats bounds in the page's non-rotated coordinate context. 172 | // this means we need to "rotate" our coordinates respectively to convert them 173 | // if the page is rotated. 174 | 175 | switch rotation { 176 | case ._0degrees: 177 | var rect = CGRect(x: x, y: y, width: width, height: height) 178 | rect.origin.x += source.origin.x 179 | rect.origin.y += source.origin.y 180 | return rect 181 | case ._90degrees: 182 | var rect = CGRect(x: x, y: y, width: width, height: height) 183 | 184 | var source = source.rotate90Degrees(within: source, isAbsolute: true) 185 | rect = rect.rotate90Degrees(within: source, isAbsolute: false) 186 | 187 | source = source.rotate90Degrees(within: source, isAbsolute: true) 188 | rect = rect.rotate90Degrees(within: source, isAbsolute: false) 189 | 190 | source = source.rotate90Degrees(within: source, isAbsolute: true) 191 | rect = rect.rotate90Degrees(within: source, isAbsolute: false) 192 | 193 | return rect 194 | case ._180degrees: 195 | var rect = CGRect(x: x, y: y, width: width, height: height) 196 | 197 | var source = source 198 | rect = rect.rotate90Degrees(within: source, isAbsolute: false) 199 | 200 | source = source.rotate90Degrees(within: source, isAbsolute: true) 201 | rect = rect.rotate90Degrees(within: source, isAbsolute: false) 202 | 203 | return rect 204 | case ._270degrees: 205 | var rect = CGRect(x: x, y: y, width: width, height: height) 206 | 207 | let source = source.rotate90Degrees(within: source, isAbsolute: true) 208 | rect = rect.rotate90Degrees(within: source, isAbsolute: false) 209 | 210 | return rect 211 | } 212 | } 213 | } 214 | } 215 | 216 | #endif 217 | -------------------------------------------------------------------------------- /Tests/PDFProcessorTests/Pages/PDFPageArea Tests.swift: -------------------------------------------------------------------------------- 1 | // 2 | // PDFPageArea Tests.swift 3 | // swift-pdf-processor • https://github.com/orchetect/swift-pdf-processor 4 | // © 2023-2024 Steffan Andrews • Licensed under MIT License 5 | // 6 | 7 | #if canImport(PDFKit) 8 | 9 | @testable import PDFProcessor 10 | import Testing 11 | import TestingExtensions 12 | 13 | @Suite struct PDFPageAreaTests { 14 | @Test func rectForRotation_0degrees() throws { 15 | let pageBounds = CGRect(x: 0.0, y: 0.0, width: 612.0, height: 792.0) // original non-rotated coord space 16 | let angle: PDFPageRotation.Angle = ._0degrees 17 | 18 | #expect( 19 | PDFPageArea.rect(x: 153.0, y: 396.0, width: 153.0, height: 198.0).rect(for: pageBounds, rotation: angle) 20 | == CGRect(x: 153.0, y: 396.0, width: 153.0, height: 198.0) 21 | ) 22 | #expect( 23 | PDFPageArea.rect(x: 306.0, y: 396.0, width: 153.0, height: 198.0).rect(for: pageBounds, rotation: angle) 24 | == CGRect(x: 306.0, y: 396.0, width: 153.0, height: 198.0) 25 | ) 26 | #expect( 27 | PDFPageArea.rect(x: 153.0, y: 198.0, width: 153.0, height: 198.0).rect(for: pageBounds, rotation: angle) 28 | == CGRect(x: 153.0, y: 198.0, width: 153.0, height: 198.0) 29 | ) 30 | #expect( 31 | PDFPageArea.rect(x: 306.0, y: 198.0, width: 153.0, height: 198.0).rect(for: pageBounds, rotation: angle) 32 | == CGRect(x: 306.0, y: 198.0, width: 153.0, height: 198.0) 33 | ) 34 | 35 | #expect( 36 | PDFPageArea.rect(x: 0.0, y: 396.0, width: 306.0, height: 396.0).rect(for: pageBounds, rotation: angle) 37 | == CGRect(x: 0.0, y: 396.0, width: 306.0, height: 396.0) 38 | ) 39 | #expect( 40 | PDFPageArea.rect(x: 306.0, y: 396.0, width: 306.0, height: 396.0).rect(for: pageBounds, rotation: angle) 41 | == CGRect(x: 306.0, y: 396.0, width: 306.0, height: 396.0) 42 | ) 43 | #expect( 44 | PDFPageArea.rect(x: 0.0, y: 0.0, width: 306.0, height: 396.0).rect(for: pageBounds, rotation: angle) 45 | == CGRect(x: 0.0, y: 0.0, width: 306.0, height: 396.0) 46 | ) 47 | #expect( 48 | PDFPageArea.rect(x: 306.0, y: 0.0, width: 306.0, height: 396.0).rect(for: pageBounds, rotation: angle) 49 | == CGRect(x: 306.0, y: 0.0, width: 306.0, height: 396.0) 50 | ) 51 | } 52 | 53 | @Test func rectForRotation_90degrees() throws { 54 | let pageBounds = CGRect(x: 0.0, y: 0.0, width: 612.0, height: 792.0) // original non-rotated coord space 55 | let angle: PDFPageRotation.Angle = ._90degrees 56 | 57 | #expect( 58 | PDFPageArea.rect(x: 198.0, y: 306.0, width: 198.0, height: 153.0).rect(for: pageBounds, rotation: angle) 59 | == CGRect(x: 153.0, y: 198.0, width: 153.0, height: 198.0) 60 | ) 61 | #expect( 62 | PDFPageArea.rect(x: 396.0, y: 306.0, width: 198.0, height: 153.0).rect(for: pageBounds, rotation: angle) 63 | == CGRect(x: 153.0, y: 396.0, width: 153.0, height: 198.0) 64 | ) 65 | #expect( 66 | PDFPageArea.rect(x: 198.0, y: 153.0, width: 198.0, height: 153.0).rect(for: pageBounds, rotation: angle) 67 | == CGRect(x: 306.0, y: 198.0, width: 153.0, height: 198.0) 68 | ) 69 | #expect( 70 | PDFPageArea.rect(x: 396.0, y: 153.0, width: 198.0, height: 153.0).rect(for: pageBounds, rotation: angle) 71 | == CGRect(x: 306.0, y: 396.0, width: 153.0, height: 198.0) 72 | ) 73 | 74 | #expect( 75 | PDFPageArea.rect(x: 0.0, y: 306.0, width: 396.0, height: 306.0).rect(for: pageBounds, rotation: angle) 76 | == CGRect(x: 0.0, y: 0.0, width: 306.0, height: 396.0) 77 | ) 78 | #expect( 79 | PDFPageArea.rect(x: 396.0, y: 306.0, width: 396.0, height: 306.0).rect(for: pageBounds, rotation: angle) 80 | == CGRect(x: 0.0, y: 396.0, width: 306.0, height: 396.0) 81 | ) 82 | #expect( 83 | PDFPageArea.rect(x: 0.0, y: 0.0, width: 396.0, height: 306.0).rect(for: pageBounds, rotation: angle) 84 | == CGRect(x: 306.0, y: 0.0, width: 306.0, height: 396.0) 85 | ) 86 | #expect( 87 | PDFPageArea.rect(x: 396.0, y: 0.0, width: 396.0, height: 306.0).rect(for: pageBounds, rotation: angle) 88 | == CGRect(x: 306.0, y: 396.0, width: 306.0, height: 396.0) 89 | ) 90 | } 91 | 92 | @Test func rectForRotation_180degrees() throws { 93 | let pageBounds = CGRect(x: 0.0, y: 0.0, width: 612.0, height: 792.0) // original non-rotated coord space 94 | let angle: PDFPageRotation.Angle = ._180degrees 95 | 96 | #expect( 97 | PDFPageArea.rect(x: 153.0, y: 396.0, width: 153.0, height: 198.0).rect(for: pageBounds, rotation: angle) 98 | == CGRect(x: 306.0, y: 198.0, width: 153.0, height: 198.0) 99 | ) 100 | #expect( 101 | PDFPageArea.rect(x: 306.0, y: 396.0, width: 153.0, height: 198.0).rect(for: pageBounds, rotation: angle) 102 | == CGRect(x: 153.0, y: 198.0, width: 153.0, height: 198.0) 103 | ) 104 | #expect( 105 | PDFPageArea.rect(x: 153.0, y: 198.0, width: 153.0, height: 198.0).rect(for: pageBounds, rotation: angle) 106 | == CGRect(x: 306.0, y: 396.0, width: 153.0, height: 198.0) 107 | ) 108 | #expect( 109 | PDFPageArea.rect(x: 306.0, y: 198.0, width: 153.0, height: 198.0).rect(for: pageBounds, rotation: angle) 110 | == CGRect(x: 153.0, y: 396.0, width: 153.0, height: 198.0) 111 | ) 112 | 113 | #expect( 114 | PDFPageArea.rect(x: 0.0, y: 396.0, width: 306.0, height: 396.0).rect(for: pageBounds, rotation: angle) 115 | == CGRect(x: 306.0, y: 0.0, width: 306.0, height: 396.0) 116 | ) 117 | #expect( 118 | PDFPageArea.rect(x: 306.0, y: 396.0, width: 306.0, height: 396.0).rect(for: pageBounds, rotation: angle) 119 | == CGRect(x: 0.0, y: 0.0, width: 306.0, height: 396.0) 120 | ) 121 | #expect( 122 | PDFPageArea.rect(x: 0.0, y: 0.0, width: 306.0, height: 396.0).rect(for: pageBounds, rotation: angle) 123 | == CGRect(x: 306.0, y: 396.0, width: 306.0, height: 396.0) 124 | ) 125 | #expect( 126 | PDFPageArea.rect(x: 306.0, y: 0.0, width: 306.0, height: 396.0).rect(for: pageBounds, rotation: angle) 127 | == CGRect(x: 0.0, y: 396.0, width: 306.0, height: 396.0) 128 | ) 129 | } 130 | 131 | @Test func rectForRotation_270degrees() throws { 132 | let pageBounds = CGRect(x: 0.0, y: 0.0, width: 612.0, height: 792.0) // original non-rotated coord space 133 | let angle: PDFPageRotation.Angle = ._270degrees 134 | 135 | #expect( 136 | PDFPageArea.rect(x: 198.0, y: 306.0, width: 198.0, height: 153.0).rect(for: pageBounds, rotation: angle) 137 | == CGRect(x: 306.0, y: 396.0, width: 153.0, height: 198.0) 138 | ) 139 | #expect( 140 | PDFPageArea.rect(x: 396.0, y: 306.0, width: 198.0, height: 153.0).rect(for: pageBounds, rotation: angle) 141 | == CGRect(x: 306.0, y: 198.0, width: 153.0, height: 198.0) 142 | ) 143 | #expect( 144 | PDFPageArea.rect(x: 198.0, y: 153.0, width: 198.0, height: 153.0).rect(for: pageBounds, rotation: angle) 145 | == CGRect(x: 153.0, y: 396.0, width: 153.0, height: 198.0) 146 | ) 147 | #expect( 148 | PDFPageArea.rect(x: 396.0, y: 153.0, width: 198.0, height: 153.0).rect(for: pageBounds, rotation: angle) 149 | == CGRect(x: 153.0, y: 198.0, width: 153.0, height: 198.0) 150 | ) 151 | 152 | #expect( 153 | PDFPageArea.rect(x: 0.0, y: 306.0, width: 396.0, height: 306.0).rect(for: pageBounds, rotation: angle) 154 | == CGRect(x: 306.0, y: 396.0, width: 306.0, height: 396.0) 155 | ) 156 | #expect( 157 | PDFPageArea.rect(x: 396.0, y: 306.0, width: 396.0, height: 306.0).rect(for: pageBounds, rotation: angle) 158 | == CGRect(x: 306.0, y: 0.0, width: 306.0, height: 396.0) 159 | ) 160 | #expect( 161 | PDFPageArea.rect(x: 0.0, y: 0.0, width: 396.0, height: 306.0).rect(for: pageBounds, rotation: angle) 162 | == CGRect(x: 0.0, y: 396.0, width: 306.0, height: 396.0) 163 | ) 164 | #expect( 165 | PDFPageArea.rect(x: 396.0, y: 0.0, width: 396.0, height: 306.0).rect(for: pageBounds, rotation: angle) 166 | == CGRect(x: 0.0, y: 0.0, width: 306.0, height: 396.0) 167 | ) 168 | } 169 | } 170 | 171 | #endif 172 | -------------------------------------------------------------------------------- /Sources/PDFProcessor/Pages/PDFPagesDescriptor.swift: -------------------------------------------------------------------------------- 1 | // 2 | // PDFPagesDescriptor.swift 3 | // swift-pdf-processor • https://github.com/orchetect/swift-pdf-processor 4 | // © 2023-2024 Steffan Andrews • Licensed under MIT License 5 | // 6 | 7 | #if canImport(PDFKit) 8 | 9 | import Foundation 10 | internal import SwiftExtensions 11 | 12 | /// Criteria to filter PDF pages. 13 | public enum PDFPagesDescriptor { 14 | /// Page number (not index) is explicitly an odd integer. 15 | case oddNumbers 16 | 17 | /// Page number (not index) is explicitly an even integer. 18 | case evenNumbers 19 | 20 | /// Every n number of pages. 21 | case every(nthPage: Int, includeFirst: Bool) 22 | 23 | /// A defined range of page indexes. 24 | case range(indexes: any RangeExpression) 25 | 26 | /// An open-ended range of pages with a starting page index. 27 | case openRange(startIndex: Int) 28 | 29 | /// First n number of pages. 30 | case first(count: Int) 31 | 32 | /// Last n number of pages. 33 | case last(count: Int) 34 | 35 | /// Individual pages. 36 | case pages(indexes: [Int]) 37 | } 38 | 39 | extension PDFPagesDescriptor: Equatable { 40 | public static func == (lhs: Self, rhs: Self) -> Bool { 41 | switch lhs { 42 | case .oddNumbers: 43 | guard case .oddNumbers = rhs else { return false } 44 | return true 45 | 46 | case .evenNumbers: 47 | guard case .evenNumbers = rhs else { return false } 48 | return true 49 | 50 | case let .every(lhsnthPage, lhsincludeFirst): 51 | guard case let .every(rhsnthPage, rhsincludeFirst) = rhs else { return false } 52 | return lhsnthPage == rhsnthPage && lhsincludeFirst == rhsincludeFirst 53 | 54 | case let .range(lhsRange): 55 | guard case let .range(rhsRange) = rhs else { return false } 56 | let lhsBounds = lhsRange.getAbsoluteBounds() 57 | let rhsBounds = rhsRange.getAbsoluteBounds() 58 | return lhsBounds.min == rhsBounds.min && lhsBounds.max == rhsBounds.max 59 | 60 | case let .openRange(lhsOpenRange): 61 | guard case let .openRange(rhsOpenRange) = rhs else { return false } 62 | return lhsOpenRange == rhsOpenRange 63 | 64 | case let .first(lhspageCount): 65 | guard case let .first(rhspageCount) = rhs else { return false } 66 | return lhspageCount == rhspageCount 67 | 68 | case let .last(lhspageCount): 69 | guard case let .first(rhspageCount) = rhs else { return false } 70 | return lhspageCount == rhspageCount 71 | 72 | case let .pages(lhsArray): 73 | guard case let .pages(rhsArray) = rhs else { return false } 74 | return lhsArray == rhsArray 75 | } 76 | } 77 | } 78 | 79 | extension PDFPagesDescriptor: Hashable { 80 | public func hash(into hasher: inout Hasher) { 81 | switch self { 82 | case .oddNumbers: 83 | hasher.combine(100000) 84 | 85 | case .evenNumbers: 86 | hasher.combine(200000) 87 | 88 | case let .every(nthPage, includeFirst): 89 | hasher.combine(300000) 90 | hasher.combine(nthPage) 91 | hasher.combine(includeFirst ? 10000 : 0) 92 | 93 | case let .range(range): 94 | hasher.combine(400000) 95 | let bounds = range.getAbsoluteBounds() 96 | hasher.combine(bounds.min) 97 | hasher.combine(bounds.max) 98 | 99 | case let .openRange(startIndex): 100 | hasher.combine(500000) 101 | hasher.combine(startIndex) 102 | 103 | case let .first(pageCount): 104 | hasher.combine(600000) 105 | hasher.combine(pageCount) 106 | 107 | case let .last(pageCount): 108 | hasher.combine(700000) 109 | hasher.combine(pageCount) 110 | 111 | case let .pages(array): 112 | hasher.combine(800000) 113 | hasher.combine(array) 114 | } 115 | } 116 | } 117 | 118 | // Using `@unchecked` only because `RangeExpression` isn't Sendable 119 | extension PDFPagesDescriptor: @unchecked Sendable { } 120 | 121 | extension PDFPagesDescriptor { 122 | public var verboseDescription: String { 123 | switch self { 124 | case .oddNumbers: 125 | return "odd page numbers" 126 | 127 | case .evenNumbers: 128 | return "even page numbers" 129 | 130 | case let .every(nthPage, includeFirst): 131 | // TODO: implement localized ordinal number string "1st page", "2nd page" 132 | return "every \(nthPage) pages,\(includeFirst ? "" : " not") including the first page" 133 | 134 | case let .range(range): 135 | let bounds = range.getAbsoluteBounds() 136 | 137 | let fromNum = bounds.min != nil ? (bounds.min! + 1).string : "" 138 | let toNum = bounds.max != nil ? (bounds.max! + 1).string : "" 139 | return "page numbers \(fromNum)...\(toNum)" 140 | 141 | case let .openRange(start): 142 | return "from page number \(start + 1)" 143 | 144 | case let .first(pageCount): 145 | return "first \(pageCount) page\(pageCount == 1 ? "" : "s")" 146 | 147 | case let .last(pageCount): 148 | return "last \(pageCount) page\(pageCount == 1 ? "" : "s")" 149 | 150 | case let .pages(intArray): 151 | return "page numbers \(intArray.map { "\($0 + 1)" }.joined(separator: ", "))" 152 | } 153 | } 154 | } 155 | 156 | extension PDFPagesDescriptor { 157 | public func filtering( 158 | _ pageNumbers: [Int], 159 | sort: Bool = true 160 | ) -> FilterResult { 161 | var arrayIndices = Array(pageNumbers.indices) 162 | var isInclusive: Bool 163 | 164 | switch self { 165 | case .oddNumbers: 166 | isInclusive = arrayIndices.count > 0 167 | arrayIndices = arrayIndices.filter { $0 % 2 == 0 } 168 | 169 | case .evenNumbers: 170 | isInclusive = arrayIndices.count > 1 171 | arrayIndices = arrayIndices.filter { $0 % 2 == 1 } 172 | 173 | case let .every(nthPage, includeFirst): 174 | guard nthPage > 0 else { 175 | isInclusive = true 176 | break 177 | } 178 | if includeFirst { 179 | isInclusive = arrayIndices.count >= 1 180 | arrayIndices = arrayIndices.filter { $0 % nthPage == 0 } 181 | } else { 182 | isInclusive = arrayIndices.count >= nthPage 183 | arrayIndices = arrayIndices.filter { ($0 - 1) % nthPage == 0 } 184 | } 185 | 186 | case let .range(range): 187 | isInclusive = range.isContainedWithin(values: arrayIndices) 188 | arrayIndices = arrayIndices.filter { range.contains($0) } 189 | 190 | case let .openRange(start): 191 | isInclusive = arrayIndices.contains(where: { (start...).contains($0) }) 192 | arrayIndices = arrayIndices.filter { (start...).contains($0) } 193 | 194 | case let .first(pageCount): 195 | isInclusive = arrayIndices.count >= pageCount 196 | if isInclusive { // avoid crashes 197 | arrayIndices = Array(arrayIndices[0 ..< pageCount]) 198 | } else { 199 | arrayIndices = Array(arrayIndices[0 ..< arrayIndices.count]) 200 | } 201 | 202 | case let .last(pageCount): 203 | isInclusive = arrayIndices.count >= pageCount 204 | if isInclusive { // avoid crashes 205 | arrayIndices = arrayIndices.suffix(pageCount) 206 | } else { 207 | arrayIndices = Array(arrayIndices[0 ..< arrayIndices.count]) 208 | } 209 | 210 | case let .pages(array): 211 | isInclusive = array.allSatisfy(arrayIndices.contains(_:)) 212 | arrayIndices = arrayIndices.filter { array.contains($0) } 213 | } 214 | 215 | var indexNumbers = arrayIndices.map { pageNumbers[$0] } 216 | 217 | if sort { 218 | indexNumbers.sort() 219 | } 220 | return FilterResult(indexes: indexNumbers, isInclusive: isInclusive) 221 | } 222 | 223 | public func containsSamePages(as other: Self) -> Bool { 224 | switch self { 225 | case let .pages(lhsArray): 226 | guard case let .pages(rhsArray) = other else { return false } 227 | return Set(lhsArray) == Set(rhsArray) 228 | 229 | default: 230 | return Set([self, other]).count == 1 231 | } 232 | } 233 | } 234 | 235 | #endif 236 | -------------------------------------------------------------------------------- /Sources/PDFProcessor/PDFProcessor.swift: -------------------------------------------------------------------------------- 1 | // 2 | // PDFProcessor.swift 3 | // swift-pdf-processor • https://github.com/orchetect/swift-pdf-processor 4 | // © 2023-2024 Steffan Andrews • Licensed under MIT License 5 | // 6 | 7 | #if canImport(PDFKit) 8 | 9 | import Foundation 10 | import os.log 11 | internal import SwiftExtensions 12 | import PDFKit 13 | 14 | /// PDF editing toolkit offering declarative batch file & page operations. 15 | public final class PDFProcessor { 16 | let logger = Logger(subsystem: "com.orchetect.PDFProcessor", category: "Gadget") 17 | 18 | var pdfs: [PDFFile] = [] 19 | 20 | public var pdfDocuments: [PDFDocument] { 21 | pdfs.map(\.doc) 22 | } 23 | 24 | /// Temporary storage for PDF operations, keyed by the variable name. 25 | public var variables: [String: VariableContent] = [:] 26 | 27 | public init() { } 28 | 29 | public init(pdfs: [PDFDocument]) { 30 | self.pdfs = pdfs.map { PDFFile(doc: $0) } 31 | } 32 | 33 | public init(pdfs: [URL]) throws { 34 | try load(pdfs: pdfs) 35 | } 36 | } 37 | 38 | // MARK: - Run 39 | 40 | extension PDFProcessor { 41 | /// Runs the batch job using supplied settings. (Load PDFs, run operations, and save PDFs) 42 | public func run(using settings: Settings) throws { 43 | logger.info("Processing...") 44 | 45 | do { 46 | try load(pdfs: settings.sourcePDFs, removeExisting: true) 47 | try perform(operations: settings.operations) 48 | if settings.savePDFs { 49 | try savePDFs(outputDir: settings.outputDir) 50 | } 51 | } catch { 52 | throw PDFProcessorError.runtimeError( 53 | "Failed to export: \(error.localizedDescription)" 54 | ) 55 | } 56 | 57 | logger.info("Done.") 58 | } 59 | 60 | /// Load PDFs from disk. 61 | /// 62 | /// - Parameters: 63 | /// - urls: File URLs for PDFS to load from disk. 64 | /// - removeExisting: Remove currently loaded PDFs first. 65 | public func load(pdfs urls: [URL], removeExisting: Bool = false) throws { 66 | let docs = try urls.map { 67 | guard let doc = PDFDocument(url: $0) else { 68 | throw PDFProcessorError.runtimeError( 69 | "Failed to read PDF file contents: \($0.path.quoted)" 70 | ) 71 | } 72 | return doc 73 | } 74 | try load(pdfs: docs, removeExisting: removeExisting) 75 | } 76 | 77 | public func load(pdfs docs: [PDFDocument], removeExisting: Bool = false) throws { 78 | if removeExisting { 79 | pdfs = [] 80 | } 81 | for doc in docs { 82 | pdfs.append(PDFFile(doc: doc)) 83 | } 84 | } 85 | 86 | /// Perform one or more operations on the loaded PDFs. 87 | /// 88 | /// - Parameters: 89 | /// - operations: One or more sequential operations to perform on the loaded PDF(s). 90 | public func perform(operations: [PDFOperation]) throws { 91 | for operation in operations { 92 | let result = try perform(operation: operation) 93 | 94 | switch result { 95 | case let .noChange(reason): 96 | if let reason { 97 | logger.info("No change performed: \(reason)") 98 | } else { 99 | logger.info("No change performed.") 100 | } 101 | case .changed: 102 | break 103 | } 104 | } 105 | } 106 | 107 | /// Save the PDFs to disk. 108 | /// 109 | /// - Parameters: 110 | /// - outputDir: Output directory. Must be a folder that exists on disk. 111 | /// If `nil`, PDF file(s) are saved to the same directory they exist. 112 | /// - baseFilenames: Array of filenames (excluding .pdf file extension) to use. 113 | /// If `nil`, a smart default is used. 114 | public func savePDFs( 115 | outputDir: URL? 116 | ) throws { 117 | let filenames = pdfs.map { $0.filenameForExport(withExtension: false) } 118 | 119 | // ensure there are exactly the right number of filenames 120 | guard filenames.count == pdfs.count else { 121 | throw PDFProcessorError.runtimeError( 122 | "Incorrect number of output filenames supplied." 123 | ) 124 | } 125 | 126 | // ensure there are no duplicate filenames 127 | guard filenames.duplicateElements().isEmpty 128 | else { 129 | throw PDFProcessorError.runtimeError( 130 | "Output filenames are not unique." 131 | ) 132 | } 133 | 134 | for (filename, pdf) in zip(filenames, pdfs) { 135 | let outFilePath = try formOutputFilePath( 136 | for: pdf, 137 | fileNameWithoutExtension: filename, 138 | outputDir: outputDir 139 | ) 140 | 141 | // TODO: allow overwriting by way of Settings flag 142 | guard !outFilePath.fileExists else { 143 | throw PDFProcessorError.runtimeError( 144 | "Output file already exists: \(outFilePath.path.quoted)" 145 | ) 146 | } 147 | 148 | logger.info("Saving to file \(outFilePath.path.quoted)...") 149 | 150 | // PDFKit Obj-C header docs for `dataRepresentation(options:)`: 151 | // 152 | // "Methods to record the current state of the PDFDocument as data or a file. Passing a QuartzFilter object 153 | // in the options dictionary with the key @"QuartzFilter" will allow you to have the filter applied when 154 | // saving the PDF." 155 | 156 | // PDFKit Obj-C header docs for `write(to:withOptions:)`: 157 | // 158 | // [This method] may take any CoreGraphics options that are typically passed into CGPDFContextCreate(...) 159 | // and CGPDFContextCreateWithURL(...)'s auxiliaryInfo dictionary. For encryption, you may provide an owner 160 | // and user password via the keys PDFDocumentWriteOption, where the values should be non-zero length 161 | // strings. 162 | 163 | // TL;DR - write options can only be used with `.write(to:withOptions:)` 164 | 165 | if !pdf.doc.write(to: outFilePath, withOptions: pdf.writeOptions) { 166 | throw PDFProcessorError.runtimeError( 167 | "An error occurred while attempting to save the PDF file." 168 | ) 169 | } 170 | } 171 | } 172 | } 173 | 174 | // MARK: - Helpers 175 | 176 | extension PDFProcessor { 177 | /// Internal utility to execute a single operation. 178 | func perform(operation: PDFOperation) throws -> PDFOperationResult { 179 | logger.info("Performing operation: \(operation.verboseDescription)") 180 | 181 | switch operation { 182 | case .newFile: 183 | return try performNewFile() 184 | 185 | case let .cloneFile(file): 186 | return try performCloneFile(file: file) 187 | 188 | case let .filterFiles(files): 189 | return try performFilterFiles(files: files) 190 | 191 | case let .mergeFiles(files, target): 192 | return try performMergeFiles(files: files, appendingTo: target) 193 | 194 | case let .splitFile(file, discardUnused, splits): 195 | return try performSplitFile(file: file, discardUnused: discardUnused, splits: splits) 196 | 197 | case let .setFilename(file, filename): 198 | return try performSetFilename(file: file, filename: filename) 199 | 200 | case let .setFilenames(files, filenames): 201 | return try performSetFilenames(files: files, filenames: filenames) 202 | 203 | case let .removeFileAttributes(files): 204 | return try performRemoveFileAttributes(files: files) 205 | 206 | case let .setFileAttribute(files, attr, value): 207 | return try performSetFileAttribute(files: files, attribute: attr, value: value) 208 | 209 | case let .filterPages(file, filter): 210 | return try performFilterPages(file: file, pages: filter) 211 | 212 | case let .copyPages(fromFile, fromPages, toFile, toPageIndex): 213 | return try performInsertPages( 214 | from: fromFile, 215 | fromPages: fromPages, 216 | to: toFile, 217 | toPageIndex: toPageIndex, 218 | behavior: .copy 219 | ) 220 | 221 | case let .movePages(fromFile, fromPages, toFile, toPageIndex): 222 | return try performInsertPages( 223 | from: fromFile, 224 | fromPages: fromPages, 225 | to: toFile, 226 | toPageIndex: toPageIndex, 227 | behavior: .move 228 | ) 229 | 230 | case let .replacePages(fromFile, fromPages, toFile, toPages, behavior): 231 | return try performReplacePages( 232 | from: fromFile, 233 | fromPages: fromPages, 234 | to: toFile, 235 | toPages: toPages, 236 | behavior: behavior 237 | ) 238 | 239 | case let .reversePageOrder(file, pages): 240 | return try performReversePageOrder(file: file, pages: pages) 241 | 242 | case let .rotatePages(files, pages, rotation): 243 | return try performRotatePages(files: files, pages: pages, rotation: rotation) 244 | 245 | case let .cropPages(files, pages, area, apply): 246 | return try performCropPages(files: files, pages: pages, area: area, apply: apply) 247 | 248 | case let .filterAnnotations(files, pages, annotations): 249 | return try performFilterAnnotations(files: files, pages: pages, annotations: annotations) 250 | 251 | case let .burnInAnnotations(files): 252 | let errorMessage = "Burn in annotations is not supported. macOS 13.0, iOS 16.0, or tvOS 16.0 is required." 253 | guard #available(macOS 13.0, iOS 16.0, tvOS 16.0, *) else { 254 | throw PDFProcessorError.runtimeError(errorMessage) 255 | } 256 | #if !os(watchOS) 257 | return try performBurnInAnnotations(files: files) 258 | #else 259 | throw PDFProcessorError.runtimeError(errorMessage) 260 | #endif 261 | 262 | case let .extractPlainText(file, pages, destination, pageBreak): 263 | return try performExtractPlainText( 264 | file: file, 265 | pages: pages, 266 | to: destination, 267 | pageBreak: pageBreak 268 | ) 269 | 270 | case let .removeProtections(files): 271 | return try performRemoveProtections(files: files) 272 | } 273 | } 274 | 275 | /// Generates full output path including filename. 276 | func formOutputFilePath( 277 | for pdf: PDFFile, 278 | fileNameWithoutExtension: String, 279 | outputDir: URL? 280 | ) throws -> URL { 281 | var folderPath = outputDir 282 | ?? pdf.doc.documentURL?.deletingLastPathComponent() 283 | 284 | #if os(macOS) 285 | folderPath = folderPath ?? URL.desktopDirectoryBackCompat 286 | #endif 287 | 288 | guard let folderPath, 289 | folderPath.fileExists, 290 | folderPath.isDirectory 291 | else { 292 | throw PDFProcessorError.runtimeError( 293 | "Could not determine output path. Output path is either not a folder or does not exist." 294 | ) 295 | } 296 | 297 | return folderPath 298 | .appendingPathComponent(fileNameWithoutExtension) 299 | .appendingPathExtension("pdf") 300 | } 301 | } 302 | 303 | #endif 304 | -------------------------------------------------------------------------------- /.github/workflows/build.yml: -------------------------------------------------------------------------------- 1 | name: build 2 | permissions: 3 | contents: read 4 | 5 | on: 6 | push: 7 | branches: [main, dev] 8 | paths-ignore: 9 | - 'Docs/**' # Docs folder in root of repo 10 | - '**/*.md' # .md files anywhere in the repo 11 | - '**/LICENSE' # LICENSE files anywhere in the repo 12 | - '**/.gitignore' # .gitignore files anywhere in the repo 13 | 14 | pull_request: 15 | branches: [main] 16 | paths-ignore: 17 | - 'Docs/**' # Docs folder in root of repo 18 | - '**/*.md' # .md files anywhere in the repo 19 | - '**/LICENSE' # LICENSE files anywhere in the repo 20 | - '**/.gitignore' # .gitignore files anywhere in the repo 21 | 22 | workflow_dispatch: 23 | 24 | schedule: 25 | - cron: '10 11 * * *' # once a day @ 11:10am UTC (4:50am PST) 26 | 27 | env: 28 | SCHEME: "swift-pdf-processor" 29 | 30 | jobs: 31 | macOS: 32 | name: macOS 33 | runs-on: macos-latest 34 | timeout-minutes: 20 35 | steps: 36 | - uses: actions/checkout@main 37 | - uses: maxim-lobanov/setup-xcode@v1 38 | with: 39 | xcode-version: latest-stable 40 | - name: Build 41 | run: xcodebuild build -workspace ".swiftpm/xcode/package.xcworkspace" -scheme "$SCHEME" -destination "generic/platform=macOS" | xcbeautify --renderer github-actions && exit ${PIPESTATUS[0]} 42 | - name: Unit Tests 43 | run: xcodebuild test -skipMacroValidation -workspace ".swiftpm/xcode/package.xcworkspace" -scheme "$SCHEME" -destination "platform=macOS" | xcbeautify --renderer github-actions && exit ${PIPESTATUS[0]} 44 | 45 | macOS-swift6: 46 | name: macOS (Swift 6) 47 | runs-on: macos-latest 48 | timeout-minutes: 20 49 | steps: 50 | - uses: actions/checkout@main 51 | - uses: maxim-lobanov/setup-xcode@v1 52 | with: 53 | xcode-version: latest-stable 54 | - name: Set Package to Swift 6.0 55 | run: swift package tools-version --set "6.0" 56 | - name: Build 57 | run: xcodebuild build -workspace ".swiftpm/xcode/package.xcworkspace" -scheme "$SCHEME" -destination "generic/platform=macOS" | xcbeautify --renderer github-actions && exit ${PIPESTATUS[0]} 58 | - name: Unit Tests 59 | run: xcodebuild test -skipMacroValidation -workspace ".swiftpm/xcode/package.xcworkspace" -scheme "$SCHEME" -destination "platform=macOS" | xcbeautify --renderer github-actions && exit ${PIPESTATUS[0]} 60 | 61 | macCatalyst: 62 | name: macCatalyst 63 | runs-on: macos-latest 64 | timeout-minutes: 20 65 | steps: 66 | - uses: actions/checkout@main 67 | - uses: maxim-lobanov/setup-xcode@v1 68 | with: 69 | xcode-version: latest-stable 70 | - name: Build 71 | run: xcodebuild build -workspace ".swiftpm/xcode/package.xcworkspace" -scheme "$SCHEME" -destination "generic/platform=macOS,variant=Mac Catalyst" | xcbeautify --renderer github-actions && exit ${PIPESTATUS[0]} 72 | - name: Unit Tests 73 | run: xcodebuild test -skipMacroValidation -workspace ".swiftpm/xcode/package.xcworkspace" -scheme "$SCHEME" -destination "platform=macOS,variant=Mac Catalyst" | xcbeautify --renderer github-actions && exit ${PIPESTATUS[0]} 74 | 75 | iOS: 76 | name: iOS 77 | runs-on: macos-latest 78 | timeout-minutes: 20 79 | steps: 80 | - uses: actions/checkout@main 81 | - uses: maxim-lobanov/setup-xcode@v1 82 | with: 83 | xcode-version: latest-stable 84 | - name: Bootstrap Platforms 85 | # Workaround for Xcode/GitHub runner issues finding simulators. 86 | # see https://github.com/actions/runner-images/issues/12758#issuecomment-3206748945 87 | run: xcrun simctl list 88 | - name: Download Platform 89 | # Workaround for Xcode/GitHub runner issues finding simulators. 90 | # see https://github.com/actions/runner-images/issues/12758#issuecomment-3206748945 91 | run: xcodebuild -downloadPlatform iOS 92 | - name: Build 93 | run: xcodebuild build -workspace ".swiftpm/xcode/package.xcworkspace" -scheme "$SCHEME" -destination "generic/platform=iOS" | xcbeautify --renderer github-actions && exit ${PIPESTATUS[0]} 94 | - name: Prepare Destination Device Name 95 | id: destnameprep 96 | # As of GitHub's updates to the macOS-15 runner summer 2025, randomly Xcode may not list any simulators. 97 | # No idea why. Seems like a bug or runner config issue. So to prevent false-positive job failures, we'll skip 98 | # the unit tests if no devices are found, but mark the unit test step as cancelled. The job is still marked as 99 | # passed (green), but the Unit Tests step will show cancelled. This is the best we can do, as there appears to 100 | # be no way to "cancel" an individual job programmatically. 101 | continue-on-error: true 102 | shell: bash 103 | run: | 104 | xcodebuild -showdestinations -workspace ".swiftpm/xcode/package.xcworkspace" -scheme "$SCHEME" > destinations.txt 105 | SIMPLATFORM="iOS" 106 | SIMDEVICE="iPhone\s\d{2}\s" 107 | REGEX="m/\{\splatform:(.*\sSimulator),.*OS:(\d{1,2}\.\d),.*name:([a-zA-Z0-9\(\)\s]*)\s\}/g" 108 | SIMPATFORMLIST=$(cat destinations.txt | perl -nle 'if ('$REGEX') { ($plat, $os, $name) = ($1, $2, $3); if ($plat =~ /'$SIMPLATFORM'/ and $name =~ /'$SIMDEVICE'/) { print "- ${name} (${plat}) (${os})"; } }') 109 | if [[ -z $SIMPATFORMLIST ]]; then echo "Error: no matching simulators available."; exit 1; fi 110 | echo "Available $SIMPLATFORM simulators:" 111 | echo "$SIMPATFORMLIST" 112 | DESTNAME=$(cat destinations.txt | perl -nle 'if ('$REGEX') { ($plat, $os, $name) = ($1, $2, $3); if ($plat =~ /'$SIMPLATFORM'/ and $name =~ /'$SIMDEVICE'/) { print $name; } }' | sort -rV | head -n 1) 113 | if [[ -z $DESTNAME ]]; then echo "Error: no matching simulators available."; exit 1; fi 114 | echo "Using device name \"$DESTNAME\"" 115 | echo "DESTNAME=$DESTNAME" >> "$GITHUB_ENV" 116 | - name: Unit Tests 117 | if: steps.destnameprep.outcome != 'failure' 118 | run: xcodebuild test -skipMacroValidation -workspace ".swiftpm/xcode/package.xcworkspace" -scheme "$SCHEME" -destination "platform=iOS Simulator,name=$DESTNAME" | xcbeautify --renderer github-actions && exit ${PIPESTATUS[0]} 119 | 120 | tvOS: 121 | name: tvOS 122 | runs-on: macos-latest 123 | timeout-minutes: 20 124 | steps: 125 | - uses: actions/checkout@main 126 | - uses: maxim-lobanov/setup-xcode@v1 127 | with: 128 | xcode-version: latest-stable 129 | - name: Bootstrap Platforms 130 | # Workaround for Xcode/GitHub runner issues finding simulators. 131 | # see https://github.com/actions/runner-images/issues/12758#issuecomment-3206748945 132 | run: xcrun simctl list 133 | - name: Download Platform 134 | # Workaround for Xcode/GitHub runner issues finding simulators. 135 | # see https://github.com/actions/runner-images/issues/12758#issuecomment-3206748945 136 | run: xcodebuild -downloadPlatform tvOS 137 | - name: Build 138 | run: xcodebuild build -workspace ".swiftpm/xcode/package.xcworkspace" -scheme "$SCHEME" -destination "generic/platform=tvOS" | xcbeautify --renderer github-actions && exit ${PIPESTATUS[0]} 139 | - name: Prepare Destination Device Name 140 | id: destnameprep 141 | # As of GitHub's updates to the macOS-15 runner summer 2025, randomly Xcode may not list any simulators. 142 | # No idea why. Seems like a bug or runner config issue. So to prevent false-positive job failures, we'll skip 143 | # the unit tests if no devices are found, but mark the unit test step as cancelled. The job is still marked as 144 | # passed (green), but the Unit Tests step will show cancelled. This is the best we can do, as there appears to 145 | # be no way to "cancel" an individual job programmatically. 146 | continue-on-error: true 147 | shell: bash 148 | run: | 149 | xcodebuild -showdestinations -workspace ".swiftpm/xcode/package.xcworkspace" -scheme "$SCHEME" > destinations.txt 150 | SIMPLATFORM="tvOS" 151 | SIMDEVICE="Apple\sTV" 152 | REGEX="m/\{\splatform:(.*\sSimulator),.*OS:(\d{1,2}\.\d),.*name:([a-zA-Z0-9\(\)\s]*)\s\}/g" 153 | SIMPATFORMLIST=$(cat destinations.txt | perl -nle 'if ('$REGEX') { ($plat, $os, $name) = ($1, $2, $3); if ($plat =~ /'$SIMPLATFORM'/ and $name =~ /'$SIMDEVICE'/) { print "- ${name} (${plat}) (${os})"; } }') 154 | if [[ -z $SIMPATFORMLIST ]]; then echo "Error: no matching simulators available."; exit 1; fi 155 | echo "Available $SIMPLATFORM simulators:" 156 | echo "$SIMPATFORMLIST" 157 | DESTNAME=$(cat destinations.txt | perl -nle 'if ('$REGEX') { ($plat, $os, $name) = ($1, $2, $3); if ($plat =~ /'$SIMPLATFORM'/ and $name =~ /'$SIMDEVICE'/) { print $name; } }' | sort -rV | head -n 1) 158 | if [[ -z $DESTNAME ]]; then echo "Error: no matching simulators available."; exit 1; fi 159 | echo "Using device name \"$DESTNAME\"" 160 | echo "DESTNAME=$DESTNAME" >> "$GITHUB_ENV" 161 | - name: Unit Tests 162 | if: steps.destnameprep.outcome != 'failure' 163 | run: xcodebuild test -skipMacroValidation -workspace ".swiftpm/xcode/package.xcworkspace" -scheme "$SCHEME" -destination "platform=tvOS Simulator,name=$DESTNAME" | xcbeautify --renderer github-actions && exit ${PIPESTATUS[0]} 164 | 165 | watchOS: 166 | name: watchOS 167 | runs-on: macos-latest 168 | timeout-minutes: 20 169 | steps: 170 | - uses: actions/checkout@main 171 | - uses: maxim-lobanov/setup-xcode@v1 172 | with: 173 | xcode-version: latest-stable 174 | - name: Bootstrap Platforms 175 | # Workaround for Xcode/GitHub runner issues finding simulators. 176 | # see https://github.com/actions/runner-images/issues/12758#issuecomment-3206748945 177 | run: xcrun simctl list 178 | - name: Download Platform 179 | # Workaround for Xcode/GitHub runner issues finding simulators. 180 | # see https://github.com/actions/runner-images/issues/12758#issuecomment-3206748945 181 | run: xcodebuild -downloadPlatform watchOS 182 | - name: Build 183 | run: xcodebuild build -workspace ".swiftpm/xcode/package.xcworkspace" -scheme "$SCHEME" -destination "generic/platform=watchOS" | xcbeautify --renderer github-actions && exit ${PIPESTATUS[0]} 184 | - name: Prepare Destination Device Name 185 | id: destnameprep 186 | # As of GitHub's updates to the macOS-15 runner summer 2025, randomly Xcode may not list any simulators. 187 | # No idea why. Seems like a bug or runner config issue. So to prevent false-positive job failures, we'll skip 188 | # the unit tests if no devices are found, but mark the unit test step as cancelled. The job is still marked as 189 | # passed (green), but the Unit Tests step will show cancelled. This is the best we can do, as there appears to 190 | # be no way to "cancel" an individual job programmatically. 191 | continue-on-error: true 192 | shell: bash 193 | run: | 194 | xcodebuild -showdestinations -workspace ".swiftpm/xcode/package.xcworkspace" -scheme "$SCHEME" > destinations.txt 195 | SIMPLATFORM="watchOS" 196 | SIMDEVICE="Apple\sWatch\sSeries" 197 | REGEX="m/\{\splatform:(.*\sSimulator),.*OS:(\d{1,2}\.\d),.*name:([a-zA-Z0-9\(\)\s]*)\s\}/g" 198 | SIMPATFORMLIST=$(cat destinations.txt | perl -nle 'if ('$REGEX') { ($plat, $os, $name) = ($1, $2, $3); if ($plat =~ /'$SIMPLATFORM'/ and $name =~ /'$SIMDEVICE'/) { print "- ${name} (${plat}) (${os})"; } }') 199 | if [[ -z $SIMPATFORMLIST ]]; then echo "Error: no matching simulators available."; exit 1; fi 200 | echo "Available $SIMPLATFORM simulators:" 201 | echo "$SIMPATFORMLIST" 202 | DESTNAME=$(cat destinations.txt | perl -nle 'if ('$REGEX') { ($plat, $os, $name) = ($1, $2, $3); if ($plat =~ /'$SIMPLATFORM'/ and $name =~ /'$SIMDEVICE'/) { print $name; } }' | sort -rV | head -n 1) 203 | if [[ -z $DESTNAME ]]; then echo "Error: no matching simulators available."; exit 1; fi 204 | echo "Using device name \"$DESTNAME\"" 205 | echo "DESTNAME=$DESTNAME" >> "$GITHUB_ENV" 206 | - name: Unit Tests 207 | if: steps.destnameprep.outcome != 'failure' 208 | run: xcodebuild test -skipMacroValidation -workspace ".swiftpm/xcode/package.xcworkspace" -scheme "$SCHEME" -destination "platform=watchOS Simulator,name=$DESTNAME" | xcbeautify --renderer github-actions && exit ${PIPESTATUS[0]} 209 | -------------------------------------------------------------------------------- /Sources/PDFProcessor/Operations/PDFOperation.swift: -------------------------------------------------------------------------------- 1 | // 2 | // PDFOperation.swift 3 | // swift-pdf-processor • https://github.com/orchetect/swift-pdf-processor 4 | // © 2023-2024 Steffan Andrews • Licensed under MIT License 5 | // 6 | 7 | #if canImport(PDFKit) 8 | 9 | import Foundation 10 | import PDFKit 11 | 12 | /// PDF editing operations. 13 | public enum PDFOperation { 14 | // MARK: - File Operations 15 | 16 | /// New empty PDF file. 17 | case newFile 18 | 19 | /// Clone (duplicate) a loaded PDF file. 20 | case cloneFile(file: PDFFileDescriptor) 21 | 22 | /// Filter loaded PDF files. 23 | case filterFiles(_ files: PDFFilesDescriptor) 24 | 25 | /// Merge loaded PDF files. 26 | case mergeFiles( 27 | _ files: PDFFilesDescriptor = .all, 28 | appendingTo: PDFFileDescriptor? = nil 29 | ) 30 | 31 | // TODO: reorder or sort files 32 | 33 | /// Split file at certain page points into multiple files. 34 | /// The original file is discarded. 35 | case splitFile( 36 | file: PDFFileDescriptor, 37 | discardUnused: Bool = true, 38 | _ splits: PDFFileSplitDescriptor 39 | ) 40 | 41 | /// Set new filename for a PDF file (excluding .pdf file extension). 42 | /// Passing `nil` resets the filename. 43 | case setFilename(file: PDFFileDescriptor, filename: String?) 44 | 45 | /// Set new filenames for one or more PDF files (excluding .pdf file extension). 46 | /// Passing `nil` for a filename resets that filename. 47 | case setFilenames(files: PDFFilesDescriptor = .all, filenames: [String?]) 48 | 49 | /// Removes file attributes (metadata). 50 | case removeFileAttributes(files: PDFFilesDescriptor) 51 | 52 | /// Set or clear an attribute for one or more files. 53 | case setFileAttribute( 54 | files: PDFFilesDescriptor, 55 | _ attribute: PDFDocumentAttribute, 56 | value: String? 57 | ) 58 | 59 | // MARK: - Page Operations 60 | 61 | // TODO: collation stuff 62 | 63 | /// Filter page(s) of PDF file(s). 64 | case filterPages(file: PDFFileDescriptor, pages: PDFPagesFilter) 65 | 66 | // TODO: additional setFilename cases, such as renaming all to sequential numbers etc. 67 | 68 | /// Copy page(s) within the same PDF file or from one file to another. 69 | case copyPages( 70 | fromFile: PDFFileDescriptor, 71 | fromPages: PDFPagesFilter, 72 | toFile: PDFFileDescriptor, 73 | toPageIndex: Int? = nil 74 | ) 75 | 76 | /// Copy page(s) within the same PDF file or from one file to another. 77 | case movePages( 78 | fromFile: PDFFileDescriptor, 79 | fromPages: PDFPagesFilter, 80 | toFile: PDFFileDescriptor, 81 | toPageIndex: Int? = nil 82 | ) 83 | 84 | /// Replace existing page(s) with other page(s). 85 | case replacePages( 86 | fromFile: PDFFileDescriptor, 87 | fromPages: PDFPagesFilter, 88 | toFile: PDFFileDescriptor, 89 | toPages: PDFPagesFilter, 90 | behavior: InterchangeBehavior 91 | ) 92 | 93 | /// Reverse the page order of a PDF file. 94 | case reversePageOrder(file: PDFFileDescriptor, pages: PDFPagesFilter) 95 | 96 | /// Rotate page(s) by a multiple of 90 degrees. 97 | /// Rotation can be absolute or relative to current page rotation (if any). 98 | case rotatePages(files: PDFFilesDescriptor, pages: PDFPagesFilter, rotation: PDFPageRotation) 99 | 100 | /// Crop page(s) by the given area descriptor. 101 | /// 102 | /// For scaling descriptors, a value of `1.0` represents 1:1 scale (no change). 103 | /// A crop cannot be larger than the source page's dimensions -- if a crop operation results in 104 | /// bounds that extend past the original media box, the crop will be reduced to the extents of 105 | /// the existing page. 106 | /// 107 | /// - Parameters: 108 | /// - files: File(s). 109 | /// - pages: Page(s). 110 | /// - area: Area descriptor. 111 | /// - apply: If `absolute`, the crop applied to the original media box dimensions, 112 | /// even if a crop already exists (effectively, the crop is replaced and not augmented). 113 | /// If `relative`, the the crop operation is applied relatively - if no crop exists, it is applied 114 | /// to the media box, but if a crop exists, the existing crop is augmented. 115 | case cropPages( 116 | files: PDFFilesDescriptor, 117 | pages: PDFPagesFilter, 118 | area: PDFPageArea, 119 | apply: PDFOperation.ChangeBehavior = .relative 120 | ) 121 | 122 | // TODO: case flip(file: PDFFileDescriptor, pages: PDFPagesFilter, axis: Axis) // -> use Quartz filter? 123 | 124 | // MARK: - Page Content Operations 125 | 126 | /// Filter annotation(s). 127 | case filterAnnotations( 128 | files: PDFFilesDescriptor, 129 | pages: PDFPagesFilter, 130 | annotations: PDFAnnotationFilter 131 | ) 132 | 133 | /// Burn in annotations when exporting file to disk. 134 | /// This applies to an entire file and cannot be applied to individual pages. 135 | /// (macOS 13+) 136 | case burnInAnnotations(files: PDFFilesDescriptor) 137 | 138 | // --> nil out all annotations' `userName: String?` property etc. 139 | // case removeAnnotationAuthors(files: PDFFilesDescriptor, pages: PDFPagesFilter, for: PDFAnnotationFilter) 140 | 141 | // TODO: text/freeText annotation: removal based on text content, allowing regex matching 142 | // TODO: text/freeText annotation: text search & replace, allowing regex matching 143 | 144 | // TODO: Title, Author, Subject, PDF Producer, Content creator, etc. 145 | // case setFileMetadata(files: PDFFilesDescriptor, property: PDFFileProperty, value: String) 146 | 147 | // TODO: Draw text, shapes or images on page(s) - ie: a watermark or redaction 148 | // case addPageElement(files: PDFFilesDescriptor, pages: PDFPagesFilter, text: String, in: Rect) 149 | 150 | // TODO: Modify style of existing text/freeText annotations 151 | 152 | // TODO: Logic operations or assertions 153 | // perhaps these could be nesting blocks using a result builder; might need to rethink the whole library API? 154 | // case expect(fileCount: Int) 155 | // case expect(file: PDFFileDescriptor, pageCount: Int) // could use enum: equals(), greaterThan(), lessThan() 156 | 157 | /// Extract plain text content and send it to the specified destination. 158 | case extractPlainText( 159 | file: PDFFileDescriptor, 160 | pages: PDFPagesFilter, 161 | to: PDFTextDestination, 162 | pageBreak: PDFTextPageBreak 163 | ) 164 | 165 | /// Attempts to remove document protections. 166 | case removeProtections(files: PDFFilesDescriptor) 167 | } 168 | 169 | extension PDFOperation: Equatable { } 170 | 171 | extension PDFOperation: Hashable { } 172 | 173 | extension PDFOperation: Sendable { } 174 | 175 | // MARK: - Static Constructors 176 | 177 | extension PDFOperation { 178 | /// Copy page(s) within the same PDF file or from one file to another. 179 | public static func copyPages( 180 | file: PDFFileDescriptor, 181 | from fromPages: PDFPagesFilter, 182 | toPageIndex: Int? = nil 183 | ) -> Self { 184 | .copyPages(fromFile: file, fromPages: fromPages, toFile: file, toPageIndex: toPageIndex) 185 | } 186 | 187 | /// Copy page(s) within the same PDF file or from one file to another. 188 | public static func movePages( 189 | file: PDFFileDescriptor, 190 | from fromPages: PDFPagesFilter, 191 | toPageIndex: Int? = nil 192 | ) -> Self { 193 | .movePages(fromFile: file, fromPages: fromPages, toFile: file, toPageIndex: toPageIndex) 194 | } 195 | 196 | /// Replace existing page(s) with other page(s). 197 | public static func replacePages( 198 | file: PDFFileDescriptor, 199 | from fromPages: PDFPagesFilter, 200 | to toPages: PDFPagesFilter, 201 | behavior: InterchangeBehavior 202 | ) -> Self { 203 | .replacePages( 204 | fromFile: file, 205 | fromPages: fromPages, 206 | toFile: file, 207 | toPages: toPages, 208 | behavior: behavior 209 | ) 210 | } 211 | } 212 | 213 | extension PDFOperation { 214 | public var verboseDescription: String { 215 | switch self { 216 | case .newFile: 217 | return "New empty file" 218 | 219 | case let .cloneFile(file): 220 | return "Clone \(file.verboseDescription)" 221 | 222 | case let .filterFiles(files): 223 | return "Filter \(files.verboseDescription)" 224 | 225 | case .mergeFiles: 226 | return "Merge files" 227 | 228 | case let .splitFile(file, discardUnused, splits): 229 | return "Split \(file.verboseDescription) \(splits.verboseDescription)\(discardUnused ? ", discarding unused pages if any" : "")" 230 | 231 | case let .setFilename(file, filename): 232 | if let filename { 233 | return "Set filename for \(file.verboseDescription) to \(filename.quoted) (without extension)" 234 | } else { 235 | return "Reset filename for \(file.verboseDescription)" 236 | } 237 | 238 | case let .setFilenames(files, filenames): 239 | let formattedFilenames = filenames 240 | .map { $0 ?? "" } 241 | .map { $0.quoted } 242 | .joined(separator: ", ") 243 | return "Set filename(s) for \(files.verboseDescription) to \(formattedFilenames)" 244 | 245 | case let .removeFileAttributes(files): 246 | return "Remove attributes (metadata) for \(files.verboseDescription)" 247 | 248 | case let .setFileAttribute(files, attr, value): 249 | if let value { 250 | return "Set \(attr.rawValue) attribute value \(value.quoted) for \(files.verboseDescription)" 251 | } else { 252 | return "Remove \(attr.rawValue) attribute from \(files.verboseDescription)" 253 | } 254 | 255 | case let .filterPages(file, pages): 256 | return "Filter \(pages.verboseDescription) in \(file.verboseDescription)" 257 | 258 | case let .copyPages(fromFile, fromPages, toFile, toPageIndex): 259 | let location = toPageIndex != nil 260 | ? "inserting at page number \(toPageIndex! + 1) in" 261 | : "appending to end of" 262 | return "Copy \(fromPages.verboseDescription) from \(fromFile.verboseDescription), \(location) \(toFile.verboseDescription)" 263 | 264 | case let .movePages(fromFile, fromPages, toFile, toPageIndex): 265 | let location = toPageIndex != nil 266 | ? "inserting at page number \(toPageIndex! + 1) in" 267 | : "appending to end of" 268 | return "Move \(fromPages.verboseDescription) from \(fromFile.verboseDescription), \(location) \(toFile.verboseDescription)" 269 | 270 | case let .replacePages(fromFile, fromPages, toFile, toPages, behavior): 271 | return "Replace \(toPages.verboseDescription) of \(toFile.verboseDescription) with \(fromPages.verboseDescription) from \(fromFile.verboseDescription) by \(behavior.verboseDescription)" 272 | 273 | case let .reversePageOrder(file, pages): 274 | return "Reverse page order of \(pages.verboseDescription) in \(file.verboseDescription)" 275 | 276 | case let .rotatePages(files, pages, rotation): 277 | return "Rotate \(pages.verboseDescription) in \(files.verboseDescription) \(rotation.verboseDescription)" 278 | 279 | case let .cropPages(files, pages, area, process): 280 | return "Crop \(pages.verboseDescription) in \(files.verboseDescription) to \(area.verboseDescription) (\(process.verboseDescription))" 281 | 282 | case let .filterAnnotations(files, pages, annotations): 283 | return "Filter annotations \(annotations.verboseDescription) for \(pages.verboseDescription) in \(files.verboseDescription)" 284 | 285 | case let .burnInAnnotations(files): 286 | return "Burn in annotations for \(files.verboseDescription)" 287 | 288 | case let .extractPlainText(file, pages, destination, _ /* pageBreak */ ): 289 | return "Extract plain text from \(pages.verboseDescription) in \(file.verboseDescription) to \(destination.verboseDescription)" 290 | 291 | case .removeProtections: 292 | return "Remove protections" 293 | } 294 | } 295 | } 296 | 297 | #endif 298 | -------------------------------------------------------------------------------- /Tests/PDFProcessorTests/Pages/PDFPagesDescriptor Tests.swift: -------------------------------------------------------------------------------- 1 | // 2 | // PDFPagesDescriptor Tests.swift 3 | // swift-pdf-processor • https://github.com/orchetect/swift-pdf-processor 4 | // © 2023-2024 Steffan Andrews • Licensed under MIT License 5 | // 6 | 7 | #if canImport(PDFKit) 8 | 9 | @testable import PDFProcessor 10 | import Testing 11 | import TestingExtensions 12 | 13 | @Suite struct PDFPagesDescriptorTests { 14 | @Test func hashable() throws { 15 | let descriptors: Set = [ 16 | .oddNumbers, 17 | .oddNumbers, 18 | 19 | .evenNumbers, 20 | .evenNumbers, 21 | 22 | .every(nthPage: 1, includeFirst: true), 23 | .every(nthPage: 1, includeFirst: true), 24 | 25 | .every(nthPage: 2, includeFirst: true), 26 | .every(nthPage: 2, includeFirst: true), 27 | 28 | .range(indexes: 1 ... 5), 29 | .range(indexes: 1 ... 5), 30 | 31 | .range(indexes: 1 ... 6), 32 | .range(indexes: 1 ... 6), 33 | 34 | .openRange(startIndex: 1), 35 | .openRange(startIndex: 1), 36 | 37 | .openRange(startIndex: 2), 38 | .openRange(startIndex: 2), 39 | 40 | .pages(indexes: [1, 2]), 41 | .pages(indexes: [1, 2]), 42 | 43 | .pages(indexes: [3, 4]), 44 | .pages(indexes: [3, 4]) 45 | ] 46 | 47 | #expect(descriptors.count == 10) 48 | } 49 | 50 | @Test func hashable_EdgeCases() throws { 51 | // .page() page order is an array, not a set. 52 | #expect( 53 | Set([.pages(indexes: [1, 2]), .pages(indexes: [2, 1])]) == 54 | [.pages(indexes: [1, 2]), .pages(indexes: [2, 1])] 55 | ) 56 | } 57 | 58 | @Test func containsSamePages() throws { 59 | func isSame(_ lhs: PDFPagesDescriptor, _ rhs: PDFPagesDescriptor? = nil) { 60 | let rhs = rhs ?? lhs 61 | #expect( 62 | lhs.containsSamePages(as: rhs), 63 | "\(lhs) is not the same as \(rhs)" 64 | ) 65 | } 66 | 67 | // baseline cases 68 | 69 | let sets = [PDFPagesDescriptor]([ 70 | .oddNumbers, 71 | .evenNumbers, 72 | .every(nthPage: 1, includeFirst: true), 73 | .range(indexes: 1 ... 5), 74 | .openRange(startIndex: 1), 75 | .pages(indexes: [1, 2]) 76 | ]) 77 | 78 | sets.forEach { isSame($0) } 79 | 80 | // specific conditions 81 | 82 | isSame(.pages(indexes: [1, 2]), .pages(indexes: [2, 1])) 83 | 84 | // false conditions 85 | 86 | #expect(!PDFPagesDescriptor.oddNumbers.containsSamePages(as: .evenNumbers)) 87 | } 88 | 89 | @Test func oddNumbers() throws { 90 | let descriptor: PDFPagesDescriptor = .oddNumbers 91 | 92 | #expect( 93 | descriptor.filtering([]) == 94 | .init(indexes: [], isInclusive: false) 95 | ) 96 | 97 | #expect( 98 | descriptor.filtering([0]) == 99 | .init(indexes: [0], isInclusive: true) 100 | ) 101 | 102 | #expect( 103 | descriptor.filtering([0, 1]) == 104 | .init(indexes: [0], isInclusive: true) 105 | ) 106 | 107 | #expect( 108 | descriptor.filtering([1]) == 109 | .init(indexes: [1], isInclusive: true) 110 | ) 111 | 112 | #expect( 113 | descriptor.filtering([0, 1, 2, 3, 4]) == 114 | .init(indexes: [0, 2, 4], isInclusive: true) 115 | ) 116 | } 117 | 118 | @Test func evenNumbers() throws { 119 | let descriptor: PDFPagesDescriptor = .evenNumbers 120 | 121 | #expect( 122 | descriptor.filtering([]) == 123 | .init(indexes: [], isInclusive: false) 124 | ) 125 | 126 | #expect( 127 | descriptor.filtering([0]) == 128 | .init(indexes: [], isInclusive: false) 129 | ) 130 | 131 | #expect( 132 | descriptor.filtering([0, 1]) == 133 | .init(indexes: [1], isInclusive: true) 134 | ) 135 | 136 | #expect( 137 | descriptor.filtering([1]) == 138 | .init(indexes: [], isInclusive: false) 139 | ) 140 | 141 | #expect( 142 | descriptor.filtering([0, 1, 2, 3, 4]) == 143 | .init(indexes: [1, 3], isInclusive: true) 144 | ) 145 | } 146 | 147 | @Test func everyNthPage_1() throws { 148 | let descriptor: PDFPagesDescriptor = .every(nthPage: 1, includeFirst: true) 149 | 150 | #expect( 151 | descriptor.filtering([]) == 152 | .init(indexes: [], isInclusive: false) 153 | ) 154 | 155 | #expect( 156 | descriptor.filtering([0]) == 157 | .init(indexes: [0], isInclusive: true) 158 | ) 159 | 160 | #expect( 161 | descriptor.filtering([0, 1]) == 162 | .init(indexes: [0, 1], isInclusive: true) 163 | ) 164 | 165 | #expect( 166 | descriptor.filtering([1]) == 167 | .init(indexes: [1], isInclusive: true) 168 | ) 169 | 170 | #expect( 171 | descriptor.filtering([0, 1, 2, 3, 4]) == 172 | .init(indexes: [0, 1, 2, 3, 4], isInclusive: true) 173 | ) 174 | } 175 | 176 | @Test func everyNthPage_2_IncludeFirst() throws { 177 | let descriptor: PDFPagesDescriptor = .every(nthPage: 2, includeFirst: true) 178 | 179 | #expect( 180 | descriptor.filtering([]) == 181 | .init(indexes: [], isInclusive: false) 182 | ) 183 | 184 | #expect( 185 | descriptor.filtering([0]) == 186 | .init(indexes: [0], isInclusive: true) 187 | ) 188 | 189 | #expect( 190 | descriptor.filtering([0, 1]) == 191 | .init(indexes: [0], isInclusive: true) 192 | ) 193 | 194 | #expect( 195 | descriptor.filtering([1]) == 196 | .init(indexes: [1], isInclusive: true) 197 | ) 198 | 199 | #expect( 200 | descriptor.filtering([0, 1, 2, 3, 4]) == 201 | .init(indexes: [0, 2, 4], isInclusive: true) 202 | ) 203 | } 204 | 205 | @Test func everyNthPage_2_DoNotIncludeFirst() throws { 206 | let descriptor: PDFPagesDescriptor = .every(nthPage: 2, includeFirst: false) 207 | 208 | #expect( 209 | descriptor.filtering([]) == 210 | .init(indexes: [], isInclusive: false) 211 | ) 212 | 213 | #expect( 214 | descriptor.filtering([0]) == 215 | .init(indexes: [], isInclusive: false) 216 | ) 217 | 218 | #expect( 219 | descriptor.filtering([0, 1]) == 220 | .init(indexes: [1], isInclusive: true) 221 | ) 222 | 223 | #expect( 224 | descriptor.filtering([1]) == 225 | .init(indexes: [], isInclusive: false) 226 | ) 227 | 228 | #expect( 229 | descriptor.filtering([0, 1, 2, 3, 4]) == 230 | .init(indexes: [1, 3], isInclusive: true) 231 | ) 232 | } 233 | 234 | @Test func range() throws { 235 | let descriptor: PDFPagesDescriptor = .range(indexes: 1 ..< 3) 236 | 237 | #expect( 238 | descriptor.filtering([]) == 239 | .init(indexes: [], isInclusive: false) 240 | ) 241 | 242 | #expect( 243 | descriptor.filtering([1]) == 244 | .init(indexes: [], isInclusive: false) 245 | ) 246 | 247 | #expect( 248 | descriptor.filtering([5, 6]) == 249 | .init(indexes: [6], isInclusive: false) 250 | ) 251 | 252 | #expect( 253 | descriptor.filtering([5, 6, 7]) == 254 | .init(indexes: [6, 7], isInclusive: true) 255 | ) 256 | 257 | #expect( 258 | descriptor.filtering([5, 6, 7, 8]) == 259 | .init(indexes: [6, 7], isInclusive: true) 260 | ) 261 | } 262 | 263 | @Test func closedRange() throws { 264 | let descriptor: PDFPagesDescriptor = .range(indexes: 1 ... 3) 265 | 266 | #expect( 267 | descriptor.filtering([]) == 268 | .init(indexes: [], isInclusive: false) 269 | ) 270 | 271 | #expect( 272 | descriptor.filtering([1]) == 273 | .init(indexes: [], isInclusive: false) 274 | ) 275 | 276 | #expect( 277 | descriptor.filtering([5, 6]) == 278 | .init(indexes: [6], isInclusive: false) 279 | ) 280 | 281 | #expect( 282 | descriptor.filtering([5, 6, 7]) == 283 | .init(indexes: [6, 7], isInclusive: false) 284 | ) 285 | 286 | #expect( 287 | descriptor.filtering([5, 6, 7, 8]) == 288 | .init(indexes: [6, 7, 8], isInclusive: true) 289 | ) 290 | 291 | #expect( 292 | descriptor.filtering([5, 6, 7, 8, 9]) == 293 | .init(indexes: [6, 7, 8], isInclusive: true) 294 | ) 295 | } 296 | 297 | @Test func partialRangeFrom() throws { 298 | let descriptor: PDFPagesDescriptor = .range(indexes: 1...) 299 | 300 | #expect( 301 | descriptor.filtering([]) == 302 | .init(indexes: [], isInclusive: false) 303 | ) 304 | 305 | #expect( 306 | descriptor.filtering([1]) == 307 | .init(indexes: [], isInclusive: false) 308 | ) 309 | 310 | #expect( 311 | descriptor.filtering([5, 6]) == 312 | .init(indexes: [6], isInclusive: true) 313 | ) 314 | 315 | #expect( 316 | descriptor.filtering([5, 6, 7]) == 317 | .init(indexes: [6, 7], isInclusive: true) 318 | ) 319 | 320 | #expect( 321 | descriptor.filtering([5, 6, 7, 8]) == 322 | .init(indexes: [6, 7, 8], isInclusive: true) 323 | ) 324 | 325 | #expect( 326 | descriptor.filtering([5, 6, 7, 8, 9]) == 327 | .init(indexes: [6, 7, 8, 9], isInclusive: true) 328 | ) 329 | } 330 | 331 | @Test func partialRangeUpTo() throws { 332 | let descriptor: PDFPagesDescriptor = .range(indexes: ..<2) 333 | 334 | #expect( 335 | descriptor.filtering([]) == 336 | .init(indexes: [], isInclusive: false) 337 | ) 338 | 339 | #expect( 340 | descriptor.filtering([1]) == 341 | .init(indexes: [1], isInclusive: false) 342 | ) 343 | 344 | #expect( 345 | descriptor.filtering([5, 6]) == 346 | .init(indexes: [5, 6], isInclusive: true) 347 | ) 348 | 349 | #expect( 350 | descriptor.filtering([5, 6, 7]) == 351 | .init(indexes: [5, 6], isInclusive: true) 352 | ) 353 | } 354 | 355 | @Test func partialRangeThrough() throws { 356 | let descriptor: PDFPagesDescriptor = .range(indexes: ...2) 357 | 358 | #expect( 359 | descriptor.filtering([]) == 360 | .init(indexes: [], isInclusive: false) 361 | ) 362 | 363 | #expect( 364 | descriptor.filtering([1]) == 365 | .init(indexes: [1], isInclusive: false) 366 | ) 367 | 368 | #expect( 369 | descriptor.filtering([5, 6]) == 370 | .init(indexes: [5, 6], isInclusive: false) 371 | ) 372 | 373 | #expect( 374 | descriptor.filtering([5, 6, 7]) == 375 | .init(indexes: [5, 6, 7], isInclusive: true) 376 | ) 377 | 378 | #expect( 379 | descriptor.filtering([5, 6, 7, 8]) == 380 | .init(indexes: [5, 6, 7], isInclusive: true) 381 | ) 382 | } 383 | 384 | @Test func openRange() throws { 385 | let descriptor: PDFPagesDescriptor = .openRange(startIndex: 1) 386 | 387 | #expect( 388 | descriptor.filtering([]) == 389 | .init(indexes: [], isInclusive: false) 390 | ) 391 | 392 | #expect( 393 | descriptor.filtering([1]) == 394 | .init(indexes: [], isInclusive: false) 395 | ) 396 | 397 | #expect( 398 | descriptor.filtering([5, 6]) == 399 | .init(indexes: [6], isInclusive: true) 400 | ) 401 | 402 | #expect( 403 | descriptor.filtering([5, 6, 7]) == 404 | .init(indexes: [6, 7], isInclusive: true) 405 | ) 406 | 407 | #expect( 408 | descriptor.filtering([5, 6, 7, 8]) == 409 | .init(indexes: [6, 7, 8], isInclusive: true) 410 | ) 411 | 412 | #expect( 413 | descriptor.filtering([5, 6, 7, 8, 9]) == 414 | .init(indexes: [6, 7, 8, 9], isInclusive: true) 415 | ) 416 | } 417 | 418 | @Test func first() throws { 419 | let descriptor: PDFPagesDescriptor = .first(count: 2) 420 | 421 | #expect( 422 | descriptor.filtering([]) == 423 | .init(indexes: [], isInclusive: false) 424 | ) 425 | 426 | #expect( 427 | descriptor.filtering([5]) == 428 | .init(indexes: [5], isInclusive: false) 429 | ) 430 | 431 | #expect( 432 | descriptor.filtering([5, 6]) == 433 | .init(indexes: [5, 6], isInclusive: true) 434 | ) 435 | 436 | #expect( 437 | descriptor.filtering([5, 6, 7]) == 438 | .init(indexes: [5, 6], isInclusive: true) 439 | ) 440 | } 441 | 442 | @Test func last() throws { 443 | let descriptor: PDFPagesDescriptor = .last(count: 2) 444 | 445 | #expect( 446 | descriptor.filtering([]) == 447 | .init(indexes: [], isInclusive: false) 448 | ) 449 | 450 | #expect( 451 | descriptor.filtering([5]) == 452 | .init(indexes: [5], isInclusive: false) 453 | ) 454 | 455 | #expect( 456 | descriptor.filtering([5, 6]) == 457 | .init(indexes: [5, 6], isInclusive: true) 458 | ) 459 | 460 | #expect( 461 | descriptor.filtering([5, 6, 7]) == 462 | .init(indexes: [6, 7], isInclusive: true) 463 | ) 464 | } 465 | 466 | @Test func pages() throws { 467 | let descriptor: PDFPagesDescriptor = .pages(indexes: [1, 3, 4]) 468 | 469 | #expect( 470 | descriptor.filtering([]) == 471 | .init(indexes: [], isInclusive: false) 472 | ) 473 | 474 | #expect( 475 | descriptor.filtering([5]) == 476 | .init(indexes: [], isInclusive: false) 477 | ) 478 | 479 | #expect( 480 | descriptor.filtering([5, 6]) == 481 | .init(indexes: [6], isInclusive: false) 482 | ) 483 | 484 | #expect( 485 | descriptor.filtering([5, 6, 7, 8, 9]) == 486 | .init(indexes: [6, 8, 9], isInclusive: true) 487 | ) 488 | 489 | #expect( 490 | descriptor.filtering([5, 6, 7, 8, 9, 10]) == 491 | .init(indexes: [6, 8, 9], isInclusive: true) 492 | ) 493 | } 494 | } 495 | 496 | #endif 497 | -------------------------------------------------------------------------------- /Sources/PDFProcessor/PDFProcessor Operations.swift: -------------------------------------------------------------------------------- 1 | // 2 | // PDFProcessor Operations.swift 3 | // swift-pdf-processor • https://github.com/orchetect/swift-pdf-processor 4 | // © 2023-2024 Steffan Andrews • Licensed under MIT License 5 | // 6 | 7 | #if canImport(PDFKit) 8 | 9 | import Foundation 10 | internal import SwiftExtensions 11 | import PDFKit 12 | 13 | extension PDFProcessor { 14 | /// New empty PDF files. 15 | func performNewFile() throws -> PDFOperationResult { 16 | pdfs.append(PDFFile()) 17 | return .changed 18 | } 19 | 20 | /// Clone PDF file. 21 | func performCloneFile(file: PDFFileDescriptor) throws -> PDFOperationResult { 22 | let pdf = try expectOneFile(file) 23 | pdfs.append(pdf.copy() as! PDFFile) 24 | 25 | return .changed 26 | } 27 | 28 | /// Filter PDF file(s). 29 | func performFilterFiles(files: PDFFilesDescriptor) throws -> PDFOperationResult { 30 | let filteredPDFs = try expectZeroOrMoreFiles(files) 31 | let sourcePDFs = pdfs 32 | 33 | pdfs = filteredPDFs 34 | 35 | if sourcePDFs != filteredPDFs { 36 | return .changed 37 | } else { 38 | return .noChange(reason: "Filtered files are identical to input.") 39 | } 40 | } 41 | 42 | /// Merge all PDF file(s) sequentially into a single PDF file. 43 | /// If target file is `nil`, the first file is used as the target and the contents of subsequent 44 | /// files are appended to it. 45 | /// - Note: The target file will always be removed from the set of source file(s). 46 | /// This then allows the use of `.all` input files without encountering an error condition. 47 | /// - Note: Source file(s) not matching the input descriptor are discarded. 48 | func performMergeFiles( 49 | files: PDFFilesDescriptor, 50 | appendingTo targetFile: PDFFileDescriptor? = nil 51 | ) throws -> PDFOperationResult { 52 | var filteredPDFs = try expectZeroOrMoreFiles(files) 53 | 54 | guard let targetPDF = targetFile != nil 55 | ? try expectOneFile(targetFile!) 56 | : filteredPDFs.first 57 | else { 58 | throw PDFProcessorError.runtimeError("Could not determine file to append to.") 59 | } 60 | 61 | // ensure the target PDF is not a member of the source PDFs 62 | filteredPDFs.removeAll(targetPDF) 63 | 64 | // check count again 65 | guard !filteredPDFs.isEmpty else { 66 | return .noChange(reason: "Not enough source files to perform merge.") 67 | } 68 | 69 | for pdf in filteredPDFs { 70 | try targetPDF.doc.append(pages: pdf.doc.pages(for: .all, copy: true)) 71 | pdfs.removeAll(pdf) 72 | } 73 | 74 | return .changed 75 | } 76 | 77 | /// Split a single PDF file into multiple files. 78 | func performSplitFile( 79 | file: PDFFileDescriptor, 80 | discardUnused: Bool, 81 | splits: PDFFileSplitDescriptor 82 | ) throws -> PDFOperationResult { 83 | let pdf = try expectOneFile(file) 84 | var remainingPageIndexes: [Int] = pdf.doc.pageIndexes() 85 | 86 | let newSplits = splits.splits(source: pdf) 87 | 88 | guard !newSplits.isEmpty else { 89 | return .noChange(reason: "File split descriptor does not result in multiple files.") 90 | } 91 | 92 | var dedupeFilenameCount = 0 93 | for split in newSplits { 94 | let pages = try pdf.doc.pages(at: split.pageRange, copy: true) 95 | let newFile = PDFFile() 96 | 97 | if let filename = split.filename { 98 | newFile.set(filenameForExportWithoutExtension: filename) 99 | } else { 100 | let newFilename = newFile.filenameForExport(withExtension: false) 101 | + "-split\(dedupeFilenameCount)" 102 | newFile.set(filenameForExportWithoutExtension: newFilename) 103 | dedupeFilenameCount += 1 104 | } 105 | newFile.doc.append(pages: pages) 106 | pdfs.append(newFile) 107 | remainingPageIndexes.removeAll(where: { split.pageRange.contains($0) }) 108 | } 109 | 110 | func removeSourceFile() { pdfs.removeAll(pdf) } 111 | 112 | // some logic and user feedback regarding source file and page utilization 113 | let remainingPageNumbersString = remainingPageIndexes 114 | .map { String($0 + 1) } 115 | .joined(separator: ", ") 116 | if discardUnused { 117 | if !remainingPageIndexes.isEmpty { 118 | logger.info( 119 | "Note: Split source file will be discarded, but page numbers \(remainingPageNumbersString) were unused." 120 | ) 121 | } 122 | removeSourceFile() 123 | } else { 124 | if remainingPageIndexes.isEmpty { 125 | logger.info("Removing split source file; all pages were split into new file(s).") 126 | removeSourceFile() 127 | } else { 128 | // source file has at least one unused page remaining 129 | logger.info( 130 | "Split source file still contains unused page numbers \(remainingPageNumbersString)." 131 | ) 132 | 133 | // remove used pages 134 | let usedIndexes = pdf.doc.pageIndexes() 135 | .filter { !remainingPageIndexes.contains($0) } 136 | try pdf.doc.removePages(at: usedIndexes) 137 | } 138 | } 139 | 140 | return .changed 141 | } 142 | 143 | /// Set new filename for a PDF file. Passing `nil` resets the filename. 144 | func performSetFilename( 145 | file: PDFFileDescriptor, 146 | filename: String? 147 | ) throws -> PDFOperationResult { 148 | let pdf = try expectOneFile(file) 149 | 150 | return try performSetFilename(file: pdf, filename: filename) 151 | } 152 | 153 | /// Utility for `performSetFilename(file:filename:)` 154 | private func performSetFilename( 155 | file pdf: PDFFile, 156 | filename: String? 157 | ) throws -> PDFOperationResult { 158 | let oldFilename = pdf.filenameForExport(withExtension: false) 159 | 160 | pdf.set(filenameForExportWithoutExtension: filename) 161 | 162 | return filename == oldFilename 163 | ? .noChange(reason: "New filename is identical to old filename.") 164 | : .changed 165 | } 166 | 167 | /// Set new filenames for one or more PDF files. Passing `nil` resets a filename. 168 | func performSetFilenames( 169 | files: PDFFilesDescriptor, 170 | filenames: [String?] 171 | ) throws -> PDFOperationResult { 172 | let pdfs = try expectZeroOrMoreFiles(files) 173 | 174 | guard !pdfs.isEmpty else { 175 | return .noChange(reason: "No files specified.") 176 | } 177 | 178 | guard filenames.count == pdfs.count else { 179 | throw PDFProcessorError.runtimeError( 180 | "Failed to set filenames; the resulting number of files does not match the supplied number of filenames." 181 | ) 182 | } 183 | 184 | var result: PDFOperationResult = .noChange(reason: "All filenames are identical to old filenames.") 185 | 186 | for (pdf, filename) in zip(pdfs, filenames) { 187 | let singleFileResult = try performSetFilename(file: pdf, filename: filename) 188 | if case .changed = singleFileResult { 189 | result = .changed 190 | } 191 | } 192 | 193 | return result 194 | } 195 | 196 | /// Remove metadata (attributes) from one or more files. 197 | func performRemoveFileAttributes( 198 | files: PDFFilesDescriptor 199 | ) throws -> PDFOperationResult { 200 | try performTransform(files: files) { pdf, _ in 201 | var isChanged = false 202 | 203 | // setting nil doesn't work, have to set empty dictionary instead 204 | if pdf.doc.documentAttributes?.isEmpty == false { 205 | pdf.doc.documentAttributes = [:] 206 | isChanged = true 207 | } 208 | // validation check 209 | guard pdf.doc.documentAttributes == nil 210 | || pdf.doc.documentAttributes?.isEmpty == true 211 | else { 212 | throw PDFProcessorError.runtimeError( 213 | "Failed to remove attributes for \(pdf)." 214 | ) 215 | } 216 | 217 | return isChanged ? .changed : .noChange(reason: "No attributes were found.") 218 | } 219 | } 220 | 221 | /// Set an attribute's value for one or more files. 222 | func performSetFileAttribute( 223 | files: PDFFilesDescriptor, 224 | attribute: PDFDocumentAttribute, 225 | value: String? 226 | ) throws -> PDFOperationResult { 227 | try performTransform(files: files) { pdf, _ in 228 | var isChanged = false 229 | 230 | if pdf.doc.documentAttributes == nil { 231 | pdf.doc.documentAttributes = [:] 232 | } 233 | 234 | func assignValue() { 235 | pdf.doc.documentAttributes?[attribute] = value 236 | isChanged = true 237 | } 238 | 239 | if let existingValue = pdf.doc.documentAttributes?[attribute] as? String { 240 | if existingValue != value { 241 | assignValue() 242 | } 243 | } else { 244 | assignValue() 245 | } 246 | 247 | return isChanged ? .changed : .noChange(reason: "Value(s) are identical.") 248 | } 249 | } 250 | 251 | /// Filter page(s). 252 | func performFilterPages( 253 | file: PDFFileDescriptor, 254 | pages: PDFPagesFilter 255 | ) throws -> PDFOperationResult { 256 | let pdf = try expectOneFile(file) 257 | 258 | let diff = try pdf.doc.pageIndexes(filter: pages) 259 | 260 | guard !diff.isIdentical else { 261 | return .noChange(reason: "Filtered page numbers are identical to input.") 262 | } 263 | 264 | try pdf.doc.removePages(at: diff.excluded) 265 | 266 | return .changed 267 | } 268 | 269 | /// Insert page(s) with a copy of other page(s) either within the same file or between two files. 270 | func performInsertPages( 271 | from sourceFile: PDFFileDescriptor, 272 | fromPages: PDFPagesFilter, 273 | to destFile: PDFFileDescriptor?, 274 | toPageIndex: Int?, 275 | behavior: PDFOperation.InterchangeBehavior 276 | ) throws -> PDFOperationResult { 277 | let (pdfA, pdfB) = try expectSourceAndDestinationFiles(sourceFile, destFile ?? sourceFile) 278 | 279 | let pdfAIndexes = try pdfA.doc.pageIndexes(filter: fromPages) 280 | 281 | guard pdfAIndexes.isInclusive else { 282 | throw PDFProcessorError.runtimeError( 283 | "Page number descriptors are invalid or out of range." 284 | ) 285 | } 286 | 287 | // append to end of file if index is nil 288 | let targetPageIndex = toPageIndex ?? pdfB.doc.pageCount 289 | 290 | let pdfAPages = try pdfA.doc.pages(at: pdfAIndexes.included, copy: pdfA != pdfB) 291 | try pdfB.doc.insert(pdfAPages, at: targetPageIndex) 292 | 293 | if behavior == .move { 294 | try pdfA.doc.removePages(at: pdfAIndexes.included) 295 | } 296 | 297 | return .changed 298 | } 299 | 300 | /// Replace page(s) with a copy of other page(s) either within the same file or between two files. 301 | func performReplacePages( 302 | from sourceFile: PDFFileDescriptor, 303 | fromPages: PDFPagesFilter, 304 | to destFile: PDFFileDescriptor?, 305 | toPages: PDFPagesFilter, 306 | behavior: PDFOperation.InterchangeBehavior 307 | ) throws -> PDFOperationResult { 308 | let (pdfA, pdfB) = try expectSourceAndDestinationFiles(sourceFile, destFile ?? sourceFile) 309 | 310 | let pdfAIndexes = try pdfA.doc.pageIndexes(filter: fromPages) 311 | let pdfBIndexes = try pdfB.doc.pageIndexes(filter: toPages) 312 | 313 | // TODO: could have an exception for when toFilter is .all to always allow it 314 | 315 | guard pdfAIndexes.isInclusive, pdfBIndexes.isInclusive else { 316 | throw PDFProcessorError.runtimeError( 317 | "Page number descriptors are invalid or out of range." 318 | ) 319 | } 320 | 321 | guard pdfAIndexes.included.count == pdfBIndexes.included.count else { 322 | let a = pdfAIndexes.included.count 323 | let b = pdfBIndexes.included.count 324 | throw PDFProcessorError.runtimeError( 325 | "Selected page counts for replacement do not match: \(a) pages from file A to \(b) pages in file B." 326 | ) 327 | } 328 | 329 | let pdfAPages = try pdfA.doc.pages(at: pdfAIndexes.included) 330 | 331 | try zip(pdfAPages, zip(pdfAIndexes.included, pdfBIndexes.included)) 332 | .forEach { pdfAPage, indexes in 333 | if pdfA == pdfB { 334 | // behavior has no effect for same-file operations 335 | pdfB.doc.exchangePage(at: indexes.1, withPageAt: indexes.0) 336 | } else { 337 | try pdfB.doc.exchangePage(at: indexes.1, withPage: pdfAPage, copy: true) 338 | } 339 | } 340 | 341 | if behavior == .move { 342 | try pdfA.doc.removePages(at: pdfAIndexes.included) 343 | } 344 | 345 | return .changed 346 | } 347 | 348 | /// Reverse the pages in a file. 349 | func performReversePageOrder( 350 | file: PDFFileDescriptor, 351 | pages: PDFPagesFilter 352 | ) throws -> PDFOperationResult { 353 | let pdf = try expectOneFile(file) 354 | 355 | let pageIndexes = try pdf.doc.pageIndexes(filter: pages) 356 | 357 | guard pageIndexes.isInclusive else { 358 | throw PDFProcessorError.runtimeError( 359 | "Page number descriptors are invalid or out of range." 360 | ) 361 | } 362 | 363 | let indexesToReverse = pageIndexes.included 364 | 365 | guard indexesToReverse.count > 1 else { 366 | let plural = "page\(indexesToReverse.count == 1 ? " is" : "s are")" 367 | return .noChange( 368 | reason: "Reversing pages has no effect because file only has \(indexesToReverse.count) \(plural) selected for reversal." 369 | ) 370 | } 371 | 372 | let pairs = zip(indexesToReverse, indexesToReverse.reversed()) 373 | .prefix(indexesToReverse.count / 2) 374 | 375 | for (srcIndex, destIndex) in pairs { 376 | pdf.doc.exchangePage(at: srcIndex, withPageAt: destIndex) 377 | } 378 | 379 | return .changed 380 | } 381 | 382 | /// Sets the rotation angle for the page in degrees. 383 | func performRotatePages( 384 | files: PDFFilesDescriptor, 385 | pages: PDFPagesFilter, 386 | rotation: PDFPageRotation 387 | ) throws -> PDFOperationResult { 388 | try performTransform(files: files, pages: pages) { page, _ in 389 | let originalPageRotation = page.rotation 390 | 391 | let sourceAngle = PDFPageRotation.Angle(degrees: page.rotation) ?? ._0degrees 392 | let newPageRotation = rotation.degrees(offsetting: sourceAngle) 393 | page.rotation = newPageRotation 394 | 395 | return originalPageRotation != newPageRotation 396 | ? .changed 397 | : .noChange(reason: nil) 398 | } 399 | } 400 | 401 | func performCropPages( 402 | files: PDFFilesDescriptor, 403 | pages: PDFPagesFilter, 404 | area: PDFPageArea, 405 | apply changeBehavior: PDFOperation.ChangeBehavior 406 | ) throws -> PDFOperationResult { 407 | try performTransform(files: files, pages: pages) { page, _ in 408 | let originalCropBox = page.bounds(for: .cropBox) 409 | 410 | let bounds = switch changeBehavior { 411 | case .absolute: page.bounds(for: .mediaBox) 412 | case .relative: page.bounds(for: .cropBox) 413 | } 414 | let rotationAngle = PDFPageRotation.Angle(degrees: page.rotation) ?? ._0degrees 415 | let newCropBox = area.rect(for: bounds, rotation: rotationAngle) 416 | page.setBounds(newCropBox, for: .cropBox) 417 | 418 | return originalCropBox != newCropBox 419 | ? .changed 420 | : .noChange(reason: nil) 421 | } 422 | } 423 | 424 | /// Filter annotations by type. 425 | func performFilterAnnotations( 426 | files: PDFFilesDescriptor, 427 | pages: PDFPagesFilter, 428 | annotations: PDFAnnotationFilter 429 | ) throws -> PDFOperationResult { 430 | try performTransform(files: files, pages: pages) { page, pageDescription in 431 | let preCount = page.annotations.count 432 | var filteredCount = preCount 433 | for annotation in page.annotations { 434 | if !annotations.contains(annotation) { 435 | filteredCount -= 1 436 | page.removeAnnotation(annotation) 437 | } 438 | } 439 | let postCount = page.annotations.count 440 | 441 | guard postCount == filteredCount else { 442 | throw PDFProcessorError.runtimeError( 443 | "Could not remove \(annotations) annotations for \(pageDescription)." 444 | ) 445 | } 446 | 447 | return preCount != postCount 448 | ? .changed 449 | : .noChange(reason: nil) 450 | } 451 | } 452 | 453 | @available(macOS 13.0, iOS 16.0, tvOS 16.0, *) 454 | @available(watchOS, unavailable) 455 | func performBurnInAnnotations( 456 | files: PDFFilesDescriptor 457 | ) throws -> PDFOperationResult { 458 | try performTransform(files: files) { pdf, _ in 459 | if !pdf.writeOptions.keys.contains(.burnInAnnotationsOption) { 460 | pdf.writeOptions[.burnInAnnotationsOption] = true 461 | return .changed 462 | } else { 463 | return .noChange(reason: "Option already set.") 464 | } 465 | } 466 | } 467 | 468 | func performExtractPlainText( 469 | file: PDFFileDescriptor, 470 | pages: PDFPagesFilter, 471 | to destination: PDFTextDestination, 472 | pageBreak: PDFTextPageBreak 473 | ) throws -> PDFOperationResult { 474 | let noChangeReason = "Reading plain text." 475 | 476 | var pageTexts: [String] = [] 477 | 478 | // discard result since this is a read-only operation 479 | let _ = try performTransform(file: file, pages: pages) { page, pageDescription in 480 | guard let pageText = page.string else { 481 | return .noChange(reason: noChangeReason) 482 | } 483 | pageTexts.append(pageText) 484 | return .noChange(reason: noChangeReason) 485 | } 486 | 487 | let fullText = pageTexts.joined(separator: pageBreak.rawValue) 488 | 489 | switch destination { 490 | case .pasteboard: 491 | #if !os(tvOS) && !os(watchOS) 492 | if !fullText.copyToClipboard() { 493 | throw PDFProcessorError.runtimeError( 494 | "Error while attempting to copy text to pasteboard." 495 | ) 496 | } 497 | #else 498 | throw PDFProcessorError.runtimeError( 499 | "Copy text to pasteboard operation is unavailable on the current platform." 500 | ) 501 | #endif 502 | 503 | case let .file(url): 504 | try fullText.write(to: url, atomically: false, encoding: .utf8) 505 | 506 | case let .variable(named: variableName): 507 | variables[variableName] = .string(fullText) 508 | } 509 | 510 | return .noChange(reason: noChangeReason) 511 | } 512 | 513 | func performRemoveProtections( 514 | files: PDFFilesDescriptor 515 | ) throws -> PDFOperationResult { 516 | let files = try expectZeroOrMoreFiles(files) 517 | 518 | guard !files.isEmpty else { 519 | return .noChange() 520 | } 521 | 522 | for file in files { 523 | // TODO: add checks to see if file has permissions set first, and skip removing protections if unnecessary and return `.noChange` 524 | 525 | let originalFilenameForExport = file.filenameForExport(withExtension: false) 526 | let unprotectedFile = try file.doc.unprotectedCopy() 527 | file.doc = unprotectedFile 528 | 529 | // new PDFDocument does not inherit `documentURL` so we will set its custom filename 530 | // since `documentURL` is a read-only property 531 | if !file.hasCustomExportFilename { 532 | file.set(filenameForExportWithoutExtension: originalFilenameForExport) 533 | } 534 | } 535 | 536 | return .changed 537 | } 538 | } 539 | 540 | // MARK: - Helpers 541 | 542 | extension PDFProcessor { 543 | func expectOneFile( 544 | _ descriptor: PDFFileDescriptor, 545 | error: String? = nil 546 | ) throws -> PDFFile { 547 | guard let file = descriptor.first(in: pdfs) else { 548 | throw PDFProcessorError.runtimeError( 549 | error ?? "Missing input PDF file: \(descriptor.verboseDescription)." 550 | ) 551 | } 552 | 553 | return file 554 | } 555 | 556 | func expectSourceAndDestinationFiles( 557 | _ descriptorA: PDFFileDescriptor, 558 | _ descriptorB: PDFFileDescriptor, 559 | error: String? = nil 560 | ) throws -> (pdfA: PDFFile, pdfB: PDFFile) { 561 | guard let fileA = descriptorA.first(in: pdfs) else { 562 | throw PDFProcessorError.runtimeError( 563 | error ?? "Missing input PDF file: \(descriptorA)." 564 | ) 565 | } 566 | guard let fileB = descriptorB.first(in: pdfs) else { 567 | throw PDFProcessorError.runtimeError( 568 | error ?? "Missing input PDF file: \(descriptorB)." 569 | ) 570 | } 571 | 572 | return (pdfA: fileA, pdfB: fileB) 573 | } 574 | 575 | func expectZeroOrMoreFiles( 576 | _ descriptor: PDFFilesDescriptor, 577 | error: String? = nil 578 | ) throws -> [PDFFile] { 579 | guard let files = descriptor.filtering(pdfs) else { 580 | throw PDFProcessorError.runtimeError( 581 | error ?? "Missing input PDF files: \(descriptor.verboseDescription)." 582 | ) 583 | } 584 | 585 | return files 586 | } 587 | 588 | /// Generic wrapper for transforming page(s). 589 | func performTransform( 590 | files: PDFFilesDescriptor, 591 | transform: (_ file: PDFFile, _ pageDescription: String) throws -> PDFOperationResult 592 | ) throws -> PDFOperationResult { 593 | let pdfs = try expectZeroOrMoreFiles(files) 594 | 595 | guard !pdfs.isEmpty else { 596 | return .noChange(reason: "No files specified.") 597 | } 598 | 599 | var returnResult: PDFOperationResult = .noChange(reason: nil) 600 | 601 | for pdf in pdfs { 602 | let result = try transform(pdf, "file \(pdf)") 603 | if returnResult != .changed { returnResult = result } 604 | } 605 | 606 | return returnResult 607 | } 608 | 609 | /// Generic wrapper for transforming page(s). 610 | func performTransform( 611 | files: PDFFilesDescriptor, 612 | pages: PDFPagesFilter, 613 | transform: (_ page: PDFPage, _ pageDescription: String) throws -> PDFOperationResult 614 | ) throws -> PDFOperationResult { 615 | let pdfs = try expectZeroOrMoreFiles(files) 616 | 617 | guard !pdfs.isEmpty else { 618 | return .noChange(reason: "No files specified.") 619 | } 620 | 621 | var returnResult: PDFOperationResult = .noChange(reason: nil) 622 | 623 | for pdf in pdfs { 624 | let result = try performTransform(file: pdf, pages: pages, transform: transform) 625 | if returnResult != .changed { returnResult = result } 626 | } 627 | 628 | return returnResult 629 | } 630 | 631 | /// Generic wrapper for transforming page(s). 632 | func performTransform( 633 | file: PDFFileDescriptor, 634 | pages: PDFPagesFilter, 635 | transform: (_ page: PDFPage, _ pageDescription: String) throws -> PDFOperationResult 636 | ) throws -> PDFOperationResult { 637 | let pdf = try expectOneFile(file) 638 | 639 | return try performTransform(file: pdf, pages: pages, transform: transform) 640 | } 641 | 642 | /// Generic wrapper for transforming page(s). 643 | func performTransform( 644 | file pdf: PDFFile, 645 | pages: PDFPagesFilter, 646 | transform: (_ page: PDFPage, _ pageDescription: String) throws -> PDFOperationResult 647 | ) throws -> PDFOperationResult { 648 | let pdfIndexes = try pdf.doc.pageIndexes(filter: pages) 649 | 650 | guard pdfIndexes.isInclusive else { 651 | throw PDFProcessorError.runtimeError( 652 | "Page number descriptor is invalid or out of range." 653 | ) 654 | } 655 | 656 | var returnResult: PDFOperationResult = .noChange(reason: nil) 657 | 658 | for index in pdfIndexes.included { 659 | guard let page = pdf.doc.page(at: index) else { 660 | throw PDFProcessorError.runtimeError( 661 | "Page number \(index + 1) of \(pdf) could not be read." 662 | ) 663 | } 664 | let result = try transform(page, "page number \(index + 1) of \(pdf)") 665 | if returnResult != .changed { returnResult = result } 666 | } 667 | 668 | return returnResult 669 | } 670 | } 671 | 672 | #endif 673 | -------------------------------------------------------------------------------- /Tests/PDFProcessorTests/PDFProcessor Operations Tests.swift: -------------------------------------------------------------------------------- 1 | // 2 | // PDFProcessor Operations Tests.swift 3 | // swift-pdf-processor • https://github.com/orchetect/swift-pdf-processor 4 | // © 2023-2024 Steffan Andrews • Licensed under MIT License 5 | // 6 | 7 | #if canImport(PDFKit) 8 | 9 | @testable import PDFProcessor 10 | internal import SwiftExtensions 11 | import PDFKit 12 | import Testing 13 | import TestingExtensions 14 | 15 | /// These are integration tests to test the actual operations, 16 | /// not the specific syntax or underlying semantics. 17 | @Suite struct PDFProcessorOperationsTests { 18 | @Test func newFile() throws { 19 | let tool = PDFProcessor() 20 | 21 | try tool.perform(operations: [ 22 | .newFile 23 | ]) 24 | 25 | #expect(tool.pdfs.count == 1) 26 | #expect(tool.pdfs[0].doc.pageCount == 0) 27 | 28 | try tool.perform(operations: [ 29 | .newFile 30 | ]) 31 | 32 | #expect(tool.pdfs.count == 2) 33 | #expect(tool.pdfs[1].doc.pageCount == 0) 34 | } 35 | 36 | @Test func cloneFile() throws { 37 | let tool = PDFProcessor() 38 | 39 | try tool.load(pdfs: [TestResource.pdf1page.url()]) 40 | 41 | try tool.perform(operations: [ 42 | .cloneFile(file: .first) 43 | ]) 44 | 45 | #expect(tool.pdfs.count == 2) 46 | try expectFilesAreEqual(tool.pdfs[0], tool.pdfs[1]) 47 | 48 | try expect(page: tool.pdfs[0].doc.page(at: 0), isTagged: "1") 49 | try expect(page: tool.pdfs[1].doc.page(at: 0), isTagged: "1") 50 | } 51 | 52 | @Test func filterFiles() throws { 53 | let tool = PDFProcessor() 54 | 55 | try tool.load(pdfs: [ 56 | testPDF1Page(), 57 | testPDF2Pages(), 58 | testPDF5Pages() 59 | ]) 60 | 61 | // all 62 | 63 | try tool.perform(operations: [ 64 | .filterFiles(.all) 65 | ]) 66 | 67 | #expect(tool.pdfs.count == 3) 68 | 69 | // index range 70 | 71 | try tool.perform(operations: [ 72 | .filterFiles(.indexRange(1 ... 2)) 73 | ]) 74 | 75 | #expect(tool.pdfs.count == 2) 76 | try expectDocumentIsEqual(tool.pdfs[0].doc, testPDF2Pages()) 77 | try expectDocumentIsEqual(tool.pdfs[1].doc, testPDF5Pages()) 78 | 79 | // index 80 | 81 | try tool.perform(operations: [ 82 | .filterFiles(.index(1)) 83 | ]) 84 | 85 | #expect(tool.pdfs.count == 1) 86 | try expectDocumentIsEqual(tool.pdfs[0].doc, testPDF5Pages()) 87 | } 88 | 89 | @Test func mergeFilesA() throws { 90 | let tool = PDFProcessor() 91 | 92 | try tool.load(pdfs: [ 93 | testPDF1Page(), 94 | testPDF2Pages(), 95 | testPDF5Pages() 96 | ]) 97 | 98 | try tool.perform(operations: [ 99 | .mergeFiles() 100 | ]) 101 | 102 | #expect(tool.pdfs.count == 1) 103 | #expect(tool.pdfs[0].doc.pageCount == 8) 104 | try expectPagesAreEqual( 105 | tool.pdfs[0].doc.pages(for: .all), 106 | testPDF1Page().pages() + testPDF2Pages().pages() + testPDF5Pages().pages() 107 | ) 108 | } 109 | 110 | @Test func mergeFilesB() throws { 111 | let tool = PDFProcessor() 112 | 113 | try tool.load(pdfs: [ 114 | testPDF1Page(), 115 | testPDF2Pages(), 116 | testPDF5Pages() 117 | ]) 118 | 119 | try tool.perform(operations: [ 120 | .mergeFiles(.second, appendingTo: .last) 121 | ]) 122 | 123 | #expect(tool.pdfs.count == 2) 124 | 125 | try expectDocumentIsEqual(tool.pdfs[0].doc, testPDF1Page()) 126 | 127 | #expect(tool.pdfs[1].doc.pageCount == 7) 128 | try expectPagesAreEqual( 129 | tool.pdfs[1].doc.pages(for: .all), 130 | testPDF5Pages().pages() + testPDF2Pages().pages() 131 | ) 132 | } 133 | 134 | @Test func splitFile() throws { 135 | let tool = PDFProcessor() 136 | 137 | try tool.load(pdfs: [ 138 | testPDF5Pages() 139 | ]) 140 | 141 | try tool.perform(operations: [ 142 | .splitFile(file: .first, discardUnused: false, .every(pageCount: 2)) 143 | ]) 144 | 145 | #expect(tool.pdfs.count == 3) 146 | 147 | #expect(tool.pdfs[0].doc.pageCount == 2) 148 | try expect(page: tool.pdfs[0].doc.page(at: 0), isTagged: "1") 149 | try expect(page: tool.pdfs[0].doc.page(at: 1), isTagged: "2") 150 | 151 | #expect(tool.pdfs[1].doc.pageCount == 2) 152 | try expect(page: tool.pdfs[1].doc.page(at: 0), isTagged: "3") 153 | try expect(page: tool.pdfs[1].doc.page(at: 1), isTagged: "4") 154 | 155 | #expect(tool.pdfs[2].doc.pageCount == 1) 156 | try expect(page: tool.pdfs[2].doc.page(at: 0), isTagged: "5") 157 | } 158 | 159 | @Test func setFilename() throws { 160 | let tool = PDFProcessor() 161 | 162 | try tool.load(pdfs: [ 163 | testPDF1Page(), 164 | testPDF2Pages(), 165 | testPDF5Pages() 166 | ]) 167 | 168 | // just check default filename - not important for this test but we'll do it any way 169 | #expect(tool.pdfs[1].filenameForExport(withExtension: false) == TestResource.pdf2pages.name + "-processed") 170 | 171 | try tool.perform(operations: [ 172 | .setFilename(file: .index(1), filename: "NewFileName") 173 | ]) 174 | 175 | #expect(tool.pdfs[1].filenameForExport(withExtension: false) == "NewFileName") 176 | } 177 | 178 | @Test func setFilenames() throws { 179 | let tool = PDFProcessor() 180 | 181 | try tool.load(pdfs: [ 182 | testPDF1Page(), 183 | testPDF2Pages(), 184 | testPDF5Pages() 185 | ]) 186 | 187 | // check default filenames first 188 | #expect(tool.pdfs[0].filenameForExport(withExtension: false) == TestResource.pdf1page.name + "-processed") 189 | #expect(tool.pdfs[1].filenameForExport(withExtension: false) == TestResource.pdf2pages.name + "-processed") 190 | #expect(tool.pdfs[2].filenameForExport(withExtension: false) == TestResource.pdf5pages.name + "-processed") 191 | 192 | try tool.perform(operations: [ 193 | .setFilenames(files: .all, filenames: ["Renamed1", "Renamed2", "Renamed3"]) 194 | ]) 195 | 196 | // check renamed files 197 | #expect(tool.pdfs[0].filenameForExport(withExtension: false) == "Renamed1") 198 | #expect(tool.pdfs[1].filenameForExport(withExtension: false) == "Renamed2") 199 | #expect(tool.pdfs[2].filenameForExport(withExtension: false) == "Renamed3") 200 | } 201 | 202 | @Test func removeFileAttributes() throws { 203 | let tool = PDFProcessor() 204 | 205 | try tool.load(pdfs: [ 206 | testPDF1Page_withAttrAnno() 207 | ]) 208 | 209 | try tool.perform(operations: [ 210 | .removeFileAttributes(files: .all) 211 | ]) 212 | 213 | #expect(tool.pdfs.count == 1) 214 | try expectPageIsEqual( 215 | tool.pdfs[0].doc.page(at: 0)!, 216 | testPDF1Page_withAttrAnno().page(at: 0)!, 217 | ignoreOpenState: true 218 | ) 219 | #expect(tool.pdfs[0].doc.documentAttributes?.count ?? 0 == 0) 220 | } 221 | 222 | @Test func setFileAttribute() throws { 223 | let tool = PDFProcessor() 224 | 225 | try tool.load(pdfs: [ 226 | testPDF1Page_withAttrAnno() 227 | ]) 228 | 229 | // set new value 230 | 231 | try tool.perform(operations: [ 232 | .setFileAttribute(files: .all, .titleAttribute, value: "New Title") 233 | ]) 234 | 235 | #expect(tool.pdfs[0].doc.documentAttributes?.count == 7) 236 | #expect( 237 | tool.pdfs[0].doc.documentAttributes?[PDFDocumentAttribute.titleAttribute] as? String == 238 | "New Title" 239 | ) 240 | 241 | // clear value 242 | 243 | try tool.perform(operations: [ 244 | .setFileAttribute(files: .all, .titleAttribute, value: nil) 245 | ]) 246 | 247 | #expect(tool.pdfs[0].doc.documentAttributes?.count == 6) 248 | #expect( 249 | tool.pdfs[0].doc.documentAttributes?.keys 250 | .contains(PDFDocumentAttribute.titleAttribute) == false 251 | ) 252 | } 253 | 254 | @Test func filterPages() throws { 255 | let tool = PDFProcessor() 256 | 257 | try tool.load(pdfs: [ 258 | testPDF1Page(), 259 | testPDF2Pages(), 260 | testPDF5Pages() 261 | ]) 262 | 263 | try tool.perform(operations: [ 264 | .filterPages(file: .index(2), pages: .include([.oddNumbers])) 265 | ]) 266 | 267 | #expect(tool.pdfs.count == 3) 268 | 269 | try expectDocumentIsEqual(tool.pdfs[0].doc, testPDF1Page()) 270 | try expectDocumentIsEqual(tool.pdfs[1].doc, testPDF2Pages()) 271 | try expectPagesAreEqual(tool.pdfs[2].doc.pages(), testPDF5Pages().pages(at: [0, 2, 4])) 272 | } 273 | 274 | @Test func copyPages() throws { 275 | let tool = PDFProcessor() 276 | 277 | try tool.load(pdfs: [ 278 | testPDF1Page(), 279 | testPDF2Pages(), 280 | testPDF5Pages() 281 | ]) 282 | 283 | try tool.perform(operations: [ 284 | .copyPages( 285 | fromFile: .index(2), 286 | fromPages: .include([.evenNumbers]), 287 | toFile: .index(1), 288 | toPageIndex: 1 289 | ) 290 | ]) 291 | 292 | #expect(tool.pdfs.count == 3) 293 | 294 | try expectDocumentIsEqual(tool.pdfs[0].doc, testPDF1Page()) 295 | 296 | let fileIdx1Pages = try tool.pdfs[1].doc.pages() 297 | #expect(fileIdx1Pages.count == 2 + 2) 298 | try expect(page: fileIdx1Pages[0], isTagged: "1") // testPDF2Pages page 1 299 | try expect(page: fileIdx1Pages[1], isTagged: "2") // testPDF5Pages page 2 300 | try expect(page: fileIdx1Pages[2], isTagged: "4") // testPDF5Pages page 4 301 | try expect(page: fileIdx1Pages[3], isTagged: "2") // testPDF2Pages page 2 302 | 303 | try expectDocumentIsEqual(tool.pdfs[2].doc, testPDF5Pages()) 304 | } 305 | 306 | @Test func movePages() throws { 307 | let tool = PDFProcessor() 308 | 309 | try tool.load(pdfs: [ 310 | testPDF1Page(), 311 | testPDF2Pages(), 312 | testPDF5Pages() 313 | ]) 314 | 315 | try tool.perform(operations: [ 316 | .movePages( 317 | fromFile: .index(2), 318 | fromPages: .include([.evenNumbers]), 319 | toFile: .index(1), 320 | toPageIndex: 1 321 | ) 322 | ]) 323 | 324 | #expect(tool.pdfs.count == 3) 325 | 326 | try expectDocumentIsEqual(tool.pdfs[0].doc, testPDF1Page()) 327 | 328 | let fileIdx1Pages = try tool.pdfs[1].doc.pages() 329 | #expect(fileIdx1Pages.count == 2 + 2) 330 | try expect(page: fileIdx1Pages[0], isTagged: "1") // testPDF2Pages page 1 331 | try expect(page: fileIdx1Pages[1], isTagged: "2") // testPDF5Pages page 2 332 | try expect(page: fileIdx1Pages[2], isTagged: "4") // testPDF5Pages page 4 333 | try expect(page: fileIdx1Pages[3], isTagged: "2") // testPDF2Pages page 2 334 | 335 | let fileIdx2Pages = try tool.pdfs[2].doc.pages() 336 | #expect(fileIdx2Pages.count == 5 - 2) 337 | try expect(page: fileIdx2Pages[0], isTagged: "1") 338 | try expect(page: fileIdx2Pages[1], isTagged: "3") 339 | try expect(page: fileIdx2Pages[2], isTagged: "5") 340 | } 341 | 342 | /// Replace pages by copying. 343 | @Test func replacePagesCopy() throws { 344 | let tool = PDFProcessor() 345 | 346 | try tool.load(pdfs: [ 347 | testPDF1Page(), 348 | testPDF2Pages(), 349 | testPDF5Pages() 350 | ]) 351 | 352 | try tool.perform(operations: [ 353 | .replacePages( 354 | fromFile: .second, 355 | fromPages: .all, 356 | toFile: .last, 357 | toPages: .include([.range(indexes: 3 ... 4)]), 358 | behavior: .copy 359 | ) 360 | ]) 361 | 362 | #expect(tool.pdfs.count == 3) 363 | 364 | try expectDocumentIsEqual(tool.pdfs[0].doc, testPDF1Page()) 365 | try expectDocumentIsEqual(tool.pdfs[1].doc, testPDF2Pages()) 366 | 367 | let fileIdx2Pages = try tool.pdfs[2].doc.pages() 368 | #expect(fileIdx2Pages.count == 5) 369 | try expect(page: fileIdx2Pages[0], isTagged: "1") // testPDF5Pages page 1 370 | try expect(page: fileIdx2Pages[1], isTagged: "2") // testPDF5Pages page 2 371 | try expect(page: fileIdx2Pages[2], isTagged: "3") // testPDF5Pages page 3 372 | try expect(page: fileIdx2Pages[3], isTagged: "1") // testPDF2Pages page 1 373 | try expect(page: fileIdx2Pages[4], isTagged: "2") // testPDF2Pages page 2 374 | } 375 | 376 | /// Replace pages by moving. 377 | @Test func replacePagesMove() throws { 378 | let tool = PDFProcessor() 379 | 380 | try tool.load(pdfs: [ 381 | testPDF1Page(), 382 | testPDF2Pages(), 383 | testPDF5Pages() 384 | ]) 385 | 386 | try tool.perform(operations: [ 387 | .replacePages( 388 | fromFile: .second, 389 | fromPages: .all, 390 | toFile: .last, 391 | toPages: .include([.range(indexes: 3 ... 4)]), 392 | behavior: .move 393 | ) 394 | ]) 395 | 396 | #expect(tool.pdfs.count == 3) 397 | 398 | try expectDocumentIsEqual(tool.pdfs[0].doc, testPDF1Page()) 399 | 400 | #expect(tool.pdfs[1].doc.pageCount == 0) 401 | 402 | let fileIdx2Pages = try tool.pdfs[2].doc.pages() 403 | #expect(fileIdx2Pages.count == 5) 404 | try expect(page: fileIdx2Pages[0], isTagged: "1") // testPDF5Pages page 1 405 | try expect(page: fileIdx2Pages[1], isTagged: "2") // testPDF5Pages page 2 406 | try expect(page: fileIdx2Pages[2], isTagged: "3") // testPDF5Pages page 3 407 | try expect(page: fileIdx2Pages[3], isTagged: "1") // testPDF2Pages page 1 408 | try expect(page: fileIdx2Pages[4], isTagged: "2") // testPDF2Pages page 2 409 | } 410 | 411 | /// Reverse page order of all pages of a file. 412 | @Test func reversePageOrderA() throws { 413 | let tool = PDFProcessor() 414 | 415 | try tool.load(pdfs: [ 416 | testPDF5Pages() 417 | ]) 418 | 419 | try tool.perform(operations: [ 420 | .reversePageOrder(file: .first, pages: .all) 421 | ]) 422 | 423 | #expect(tool.pdfs.count == 1) 424 | 425 | let filePages = try tool.pdfs[0].doc.pages() 426 | #expect(filePages.count == 5) 427 | try expect(page: filePages[0], isTagged: "5") 428 | try expect(page: filePages[1], isTagged: "4") 429 | try expect(page: filePages[2], isTagged: "3") 430 | try expect(page: filePages[3], isTagged: "2") 431 | try expect(page: filePages[4], isTagged: "1") 432 | } 433 | 434 | /// Reverse page order of some pages of a file. 435 | @Test func reversePageOrderB() throws { 436 | let tool = PDFProcessor() 437 | 438 | try tool.load(pdfs: [ 439 | testPDF5Pages() 440 | ]) 441 | 442 | try tool.perform(operations: [ 443 | .reversePageOrder(file: .first, pages: .include([.range(indexes: 1 ... 3)])) 444 | ]) 445 | 446 | #expect(tool.pdfs.count == 1) 447 | 448 | let filePages = try tool.pdfs[0].doc.pages() 449 | #expect(filePages.count == 5) 450 | try expect(page: filePages[0], isTagged: "1") 451 | try expect(page: filePages[1], isTagged: "4") 452 | try expect(page: filePages[2], isTagged: "3") 453 | try expect(page: filePages[3], isTagged: "2") 454 | try expect(page: filePages[4], isTagged: "5") 455 | } 456 | 457 | @Test func rotatePages() throws { 458 | let tool = PDFProcessor() 459 | 460 | try tool.load(pdfs: [ 461 | testPDF5Pages() 462 | ]) 463 | 464 | // establish baseline 465 | #expect(tool.pdfs[0].doc.page(at: 0)?.rotation == 0) 466 | #expect(tool.pdfs[0].doc.page(at: 1)?.rotation == 0) 467 | #expect(tool.pdfs[0].doc.page(at: 2)?.rotation == 0) 468 | #expect(tool.pdfs[0].doc.page(at: 3)?.rotation == 0) 469 | #expect(tool.pdfs[0].doc.page(at: 4)?.rotation == 0) 470 | 471 | // absolute rotation 472 | try tool.perform(operations: [ 473 | .rotatePages( 474 | files: .first, 475 | pages: .include([.pages(indexes: [2])]), 476 | rotation: .init(angle: ._180degrees, apply: .absolute) 477 | ) 478 | ]) 479 | 480 | #expect(tool.pdfs[0].doc.page(at: 0)?.rotation == 0) 481 | #expect(tool.pdfs[0].doc.page(at: 1)?.rotation == 0) 482 | #expect(tool.pdfs[0].doc.page(at: 2)?.rotation == 180) 483 | #expect(tool.pdfs[0].doc.page(at: 3)?.rotation == 0) 484 | #expect(tool.pdfs[0].doc.page(at: 4)?.rotation == 0) 485 | 486 | // relative rotation 487 | try tool.perform(operations: [ 488 | .rotatePages( 489 | files: .first, 490 | pages: .include([.pages(indexes: [2])]), 491 | rotation: .init(angle: ._90degrees, apply: .relative) 492 | ) 493 | ]) 494 | 495 | #expect(tool.pdfs[0].doc.page(at: 0)?.rotation == 0) 496 | #expect(tool.pdfs[0].doc.page(at: 1)?.rotation == 0) 497 | #expect(tool.pdfs[0].doc.page(at: 2)?.rotation == 270) 498 | #expect(tool.pdfs[0].doc.page(at: 3)?.rotation == 0) 499 | #expect(tool.pdfs[0].doc.page(at: 4)?.rotation == 0) 500 | } 501 | 502 | // TODO: add unit test for 'crop' operation 503 | 504 | @Test func filterAnnotations() throws { 505 | let tool = PDFProcessor() 506 | 507 | try tool.load(pdfs: [ 508 | testPDF1Page_withAttrAnno() 509 | ]) 510 | 511 | // initial conditions 512 | 513 | try #require(tool.pdfs.count == 1) 514 | 515 | #expect(tool.pdfs[0].doc.page(at: 0)?.annotations.count == 8) 516 | 517 | // all 518 | 519 | try tool.perform(operations: [ 520 | .filterAnnotations(files: .first, pages: .all, annotations: .all) 521 | ]) 522 | 523 | try #require(tool.pdfs.count == 1) 524 | 525 | #expect(tool.pdfs[0].doc.pageCount == 1) 526 | #expect(tool.pdfs[0].doc.page(at: 0)?.annotations.count == 8) 527 | 528 | // specific subtypes 529 | 530 | try tool.perform(operations: [ 531 | .filterAnnotations(files: .first, pages: .all, annotations: .exclude([.circle, .square])) 532 | ]) 533 | 534 | try #require(tool.pdfs.count == 1) 535 | 536 | #expect(tool.pdfs[0].doc.pageCount == 1) 537 | #expect(tool.pdfs[0].doc.page(at: 0)?.annotations.count == 6) 538 | 539 | // none 540 | 541 | try tool.perform(operations: [ 542 | .filterAnnotations(files: .first, pages: .all, annotations: .none) 543 | ]) 544 | 545 | try #require(tool.pdfs.count == 1) 546 | 547 | #expect(tool.pdfs[0].doc.pageCount == 1) 548 | #expect(tool.pdfs[0].doc.page(at: 0)?.annotations.count == 0) 549 | } 550 | 551 | @available(watchOS, unavailable) 552 | @Test func burnInAnnotations() throws { 553 | let tool = PDFProcessor() 554 | 555 | try tool.load(pdfs: [ 556 | testPDF1Page_withAttrAnno() 557 | ]) 558 | 559 | // initial conditions 560 | 561 | try #require(tool.pdfs.count == 1) 562 | 563 | #expect(tool.pdfs[0].doc.page(at: 0)?.annotations.count == 8) 564 | 565 | // set option 566 | 567 | try tool.perform(operations: [ 568 | .burnInAnnotations(files: .all) // only takes effect on file write to disk 569 | ]) 570 | 571 | try #require(tool.pdfs.count == 1) 572 | 573 | #expect(tool.pdfs[0].doc.pageCount == 1) 574 | #expect(tool.pdfs[0].doc.page(at: 0)?.annotations.count == 8) 575 | 576 | // write file and read it back 577 | // (must write to disk, as `dataRepresentation(options:)` does not take write options) 578 | 579 | let tempDir = FileManager.default.temporaryDirectoryCompat 580 | .appendingPathComponent(UUID().uuidString) 581 | try FileManager.default.createDirectory(at: tempDir, withIntermediateDirectories: false) 582 | // defer cleanup 583 | defer { try? FileManager.default.removeItem(at: tempDir) } 584 | 585 | try tool.savePDFs(outputDir: tempDir) 586 | let filename = tool.pdfs[0].filenameForExport(withExtension: true) 587 | let savedPDF = tempDir.appendingPathComponent(filename) 588 | let newDoc = try #require(PDFDocument(url: savedPDF)) 589 | 590 | #expect(newDoc.page(at: 0)?.annotations.isEmpty == true) 591 | } 592 | 593 | @Test func extractPlainText() throws { 594 | let tool = PDFProcessor() 595 | 596 | try tool.load(pdfs: [ 597 | try #require(PDFDocument(url: TestResource.loremIpsum.url())) 598 | ]) 599 | 600 | let textPage1 = "TEXTPAGE1" 601 | 602 | try tool.perform(operations: [ 603 | .extractPlainText( 604 | file: .first, 605 | pages: .include([.first(count: 1)]), 606 | to: .variable(named: textPage1), 607 | pageBreak: .none 608 | ) 609 | ]) 610 | 611 | let extractedPage1TextCase = try #require(tool.variables[textPage1]) 612 | guard case let .string(extractedPage1Text) = extractedPage1TextCase 613 | else { #fail(); return } 614 | 615 | let expectedPage1Text = """ 616 | Lorem ipsum dolor sit amet, consectetur adipiscing elit. Duis ultrices 617 | vel mi vitae pharetra. Pellentesque venenatis massa et dui viverra 618 | efficitur. Aliquam mollis ex sit amet nibh tincidunt, nec posuere orci 619 | tempor. Nullam eleifend, sem sed ornare laoreet, justo dolor ultrices 620 | tellus, eu viverra odio risus et ante. Curabitur vel tempus est. Fusce in 621 | ante aliquam, iaculis risus eget, ultricies magna. Morbi molestie sem 622 | auctor tristique luctus. Vivamus nisi augue, elementum at nibh vel, 623 | volutpat vestibulum justo. Nulla eu libero dui. Nulla non pharetra magna. 624 | Aliquam ut finibus dui, sit amet consequat lectus. Donec massa turpis, 625 | faucibus nec nisl posuere, cursus vestibulum tellus. Mauris dignissim 626 | orci a rutrum tristique. Quisque fermentum metus ut bibendum accumsan. 627 | Proin hendrerit vulputate nisi. Pellentesque suscipit lectus quam, sit 628 | amet fermentum quam accumsan at. Sed ac justo nisl. Duis leo dolor, 629 | suscipit elementum ligula a, consequat lacinia magna. Pellentesque at 630 | accumsan nisi. Interdum et malesuada fames ac ante ipsum primis in 631 | faucibus. Mauris efficitur metus eget massa malesuada placerat. 632 | Sed sed magna consectetur, facilisis magna vitae, consequat quam. Fusce 633 | semper libero risus, quis sagittis arcu ornare a. Morbi varius lacus eget 634 | magna sodales, eu auctor tortor porttitor. Aenean gravida justo ipsum, 635 | efficitur tempus velit iaculis sit amet. Duis ut suscipit ipsum, non 636 | vestibulum urna. Etiam viverra sit amet sapien ut viverra. Ut suscipit 637 | feugiat risus a lacinia. In hac habitasse platea dictumst. Duis fringilla 638 | tellus sed luctus consequat. Nam placerat venenatis ligula pharetra 639 | laoreet. Donec quis purus non tortor blandit facilisis. Proin iaculis 640 | augue eu dignissim sagittis. Proin elementum dui iaculis diam blandit 641 | aliquam. Duis in nunc leo. Quisque mattis risus quis lacinia interdum. 642 | Nullam nec pulvinar massa. 643 | Sed molestie nisi ligula, id semper ante ullamcorper eget. Proin sed nisl 644 | aliquet, porta nisl ut, aliquet magna. Etiam volutpat congue est, eget 645 | pretium ligula feugiat quis. In consectetur tellus leo, nec malesuada 646 | mauris gravida mattis. Mauris viverra ultricies nibh at tempor. Donec 647 | blandit sem non rutrum mollis. Etiam metus erat, fermentum vel congue ut, 648 | bibendum rhoncus risus. Nulla tincidunt vehicula eleifend. 649 | Curabitur volutpat lorem et mauris efficitur, at dictum odio mollis. Nunc 650 | euismod euismod placerat. Morbi eleifend volutpat vehicula. Curabitur eu 651 | mauris vel purus commodo dignissim in a velit. Donec auctor tempus neque, 652 | vitae venenatis velit fringilla eu. Aliquam erat volutpat. Morbi iaculis, 653 | nisl vitae consectetur consectetur, tortor odio imperdiet nisi, quis 654 | suscipit libero urna non dui. Fusce tempor rhoncus commodo. Proin 655 | molestie porta nisi. Proin id felis ante. Nulla vulputate nunc nulla, sit 656 | amet consequat felis ornare non. Morbi tristique vitae nunc ut pretium. 657 | Pellentesque ac orci tincidunt, tempus nisi eget, volutpat sapien. Fusce 658 | """ 659 | 660 | // TODO: This could be a flakey test if PDFKit changes how it extracts text from PDFs. 661 | // oddly enough, PDFKit has slightly different behaviors on different platforms (and has 662 | // changed over time). 663 | // sometimes it pads extracted text with whitespace and/or trailing line-break, sometimes it 664 | // doesn't. 665 | // for our tests we choose to ignore these differences when comparing. 666 | #expect( 667 | extractedPage1Text.trimmingCharacters(in: .whitespacesAndNewlines) == 668 | expectedPage1Text 669 | ) 670 | } 671 | 672 | @Test func removeProtections() throws { 673 | // note: PDF password is "1234" 674 | 675 | let tool = PDFProcessor() 676 | 677 | try tool.load(pdfs: [ 678 | try #require(PDFDocument(url: TestResource.permissions.url())) 679 | ]) 680 | 681 | // check initial permission status 682 | #expect(tool.pdfs[0].doc.allowsContentAccessibility) 683 | #expect(!tool.pdfs[0].doc.allowsCommenting) 684 | #expect(tool.pdfs[0].doc.allowsCopying) 685 | #expect(tool.pdfs[0].doc.allowsPrinting) 686 | #expect(!tool.pdfs[0].doc.allowsDocumentAssembly) 687 | #expect(!tool.pdfs[0].doc.allowsDocumentChanges) 688 | #expect(!tool.pdfs[0].doc.allowsFormFieldEntry) 689 | // check initial encryption status 690 | #expect(tool.pdfs[0].doc.isEncrypted) 691 | #expect(!tool.pdfs[0].doc.isLocked) 692 | // capture document atrributes 693 | let originalDocumentAttributes = tool.pdfs[0].doc.documentAttributes 694 | 695 | // remove protections 696 | let result = try tool.perform(operation: .removeProtections(files: .all)) 697 | #expect(result == .changed) 698 | 699 | // check permission status 700 | #expect(tool.pdfs[0].doc.allowsContentAccessibility) 701 | #expect(tool.pdfs[0].doc.allowsCommenting) 702 | #expect(tool.pdfs[0].doc.allowsCopying) 703 | #expect(tool.pdfs[0].doc.allowsPrinting) 704 | #expect(tool.pdfs[0].doc.allowsDocumentAssembly) 705 | #expect(tool.pdfs[0].doc.allowsDocumentChanges) 706 | #expect(tool.pdfs[0].doc.allowsFormFieldEntry) 707 | // check encryption status 708 | #expect(!tool.pdfs[0].doc.isEncrypted) 709 | #expect(!tool.pdfs[0].doc.isLocked) 710 | // check document attributes are retained 711 | #expect( 712 | tool.pdfs[0].doc.documentAttributes?.count == 713 | originalDocumentAttributes?.count 714 | ) 715 | } 716 | } 717 | 718 | // MARK: - Utils 719 | 720 | extension PDFProcessorOperationsTests { 721 | // MARK: Test Resource Conveniences 722 | 723 | func testPDF1Page() throws -> PDFDocument { 724 | try #require(PDFDocument(url: TestResource.pdf1page.url())) 725 | } 726 | 727 | func testPDF2Pages() throws -> PDFDocument { 728 | try #require(PDFDocument(url: TestResource.pdf2pages.url())) 729 | } 730 | 731 | func testPDF5Pages() throws -> PDFDocument { 732 | try #require(PDFDocument(url: TestResource.pdf5pages.url())) 733 | } 734 | 735 | func testPDF1Page_withAttrAnno() throws -> PDFDocument { 736 | try #require(PDFDocument(url: TestResource.pdf1page_withAttributes_withAnnotations.url())) 737 | } 738 | 739 | // MARK: Expectations 740 | 741 | /// Checks that the files are generally the same. 742 | /// Not an exhaustive check but enough for unit testing. 743 | func expectFileIsEqual(_ lhs: PDFFile, _ rhs: PDFFile) throws { 744 | try expectDocumentIsEqual(lhs.doc, rhs.doc) 745 | } 746 | 747 | /// Checks that the files are generally the same. 748 | /// Not an exhaustive check but enough for unit testing. 749 | func expectDocumentIsEqual(_ lhs: PDFDocument, _ rhs: PDFDocument) throws { 750 | if let lhsAttribs = lhs.documentAttributes { 751 | guard let rhsAttribs = rhs.documentAttributes else { 752 | #fail("Attributes are not equal."); return 753 | } 754 | // both docs have attributes, so we can compare them 755 | 756 | #expect(lhsAttribs.count == rhsAttribs.count) 757 | 758 | func compare(_ attr: PDFDocumentAttribute) throws { 759 | #expect( 760 | lhsAttribs[attr] as? String == 761 | rhsAttribs[attr] as? String 762 | ) 763 | } 764 | 765 | try compare(.authorAttribute) 766 | try compare(.creationDateAttribute) 767 | try compare(.creatorAttribute) 768 | try compare(.keywordsAttribute) 769 | try compare(.modificationDateAttribute) 770 | try compare(.producerAttribute) 771 | try compare(.subjectAttribute) 772 | try compare(.titleAttribute) 773 | } 774 | 775 | try expectDocumentsAreEqual(lhs, rhs) 776 | } 777 | 778 | /// Checks that pages are equal between two PDF files, by checking page text and annotations. 779 | /// Not an exhaustive check but enough for unit testing. 780 | func expectFilesAreEqual(_ lhs: PDFFile, _ rhs: PDFFile) throws { 781 | try expectDocumentsAreEqual(lhs.doc, rhs.doc) 782 | } 783 | 784 | /// Checks that pages are equal between two PDF files, by checking page text and annotations. 785 | /// Not an exhaustive check but enough for unit testing. 786 | func expectDocumentsAreEqual( 787 | _ lhs: PDFDocument, 788 | _ rhs: PDFDocument, 789 | ignoreOpenState: Bool = false 790 | ) throws { 791 | try expectPagesAreEqual( 792 | lhs.pages(for: .all), 793 | rhs.pages(for: .all), 794 | ignoreOpenState: ignoreOpenState 795 | ) 796 | } 797 | 798 | /// Checks that pages are equal between two PDF files, by checking page text and annotations. 799 | /// Not an exhaustive check but enough for unit testing. 800 | func expectPagesAreEqual( 801 | _ lhs: [PDFPage], 802 | _ rhs: [PDFPage], 803 | ignoreOpenState: Bool = false 804 | ) throws { 805 | #expect(lhs.count == rhs.count) 806 | 807 | for (lhsPage, rhsPage) in zip(lhs, rhs) { 808 | try expectPageIsEqual(lhsPage, rhsPage, ignoreOpenState: ignoreOpenState) 809 | } 810 | } 811 | 812 | /// Checks that pages are equal between two PDF files, by checking page text and annotations. 813 | /// Not an exhaustive check but enough for unit testing. 814 | func expectPageIsEqual( 815 | _ lhs: PDFPage, 816 | _ rhs: PDFPage, 817 | ignoreOpenState: Bool = false, 818 | ignoreSurroundingTextWhitespace: Bool = true 819 | ) throws { 820 | // oddly enough, PDFKit has slightly different behaviors on different platforms (and has 821 | // changed over time). 822 | // sometimes it pads extracted text with whitespace and/or trailing line-break, sometimes it 823 | // doesn't. 824 | // for our tests we choose to ignore these differences when comparing. 825 | let lhsString = ignoreSurroundingTextWhitespace 826 | ? lhs.string?.trimmingCharacters(in: .whitespacesAndNewlines) 827 | : lhs.string 828 | let rhsString = ignoreSurroundingTextWhitespace 829 | ? rhs.string?.trimmingCharacters(in: .whitespacesAndNewlines) 830 | : rhs.string 831 | #expect(lhsString == rhsString) 832 | 833 | #expect(lhs.annotations.count == rhs.annotations.count) 834 | for (lhsAnno, rhsAnno) in zip(lhs.annotations, rhs.annotations) { 835 | try expectAnnotationIsEqual(lhsAnno, rhsAnno, ignoreOpenState: ignoreOpenState) 836 | } 837 | } 838 | 839 | /// Checks page text. Convenience to identify a page for unit testing purposes. 840 | func expect(page: PDFPage?, isTagged: String) throws { 841 | guard let page else { #fail("Page is nil."); return } 842 | #expect(page.string?.trimmed == isTagged) 843 | } 844 | 845 | /// Checks if two annotations have equal content. 846 | /// Not an exhaustive check but enough for unit testing. 847 | func expectAnnotationIsEqual( 848 | _ lhs: PDFAnnotation, 849 | _ rhs: PDFAnnotation, 850 | ignoreOpenState: Bool 851 | ) throws { 852 | #expect(lhs.type == rhs.type) 853 | #expect(lhs.bounds == rhs.bounds) 854 | #expect(lhs.contents == rhs.contents) 855 | 856 | if ignoreOpenState { 857 | // it seems at some point PDFKit gained the behavior of removing the Open 858 | // annotation property during the course of loading/manipulating PDF documents 859 | // so it may be desirable to exempt the property from comparison. 860 | let lhsAKV = lhs.annotationKeyValues.filter { $0.key.base as? String != "/Open" } 861 | let rhsAKV = rhs.annotationKeyValues.filter { $0.key.base as? String != "/Open" } 862 | #expect(lhsAKV.count == rhsAKV.count) 863 | } else { 864 | #expect(lhs.annotationKeyValues.count == rhs.annotationKeyValues.count) 865 | } 866 | } 867 | } 868 | 869 | #endif 870 | --------------------------------------------------------------------------------