├── Images ├── DCT.png ├── Hash.png ├── LowPass.png ├── Original.png ├── Downsampled.png └── DCT_Equation.png ├── .gitignore ├── .swiftpm └── xcode │ └── package.xcworkspace │ └── xcshareddata │ └── IDEWorkspaceChecks.plist ├── Package.swift ├── Sources └── SwiftPerceptualHash │ ├── Metal shaders │ └── Grayscale.metal │ ├── Utilities │ └── ConcurrencyLimiter.swift │ ├── PerceptualHashError.swift │ ├── PerceptualHash.swift │ └── PerceptualHashGenerator.swift ├── LICENSE └── LICENSE.txt └── README.md /Images/DCT.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Androp0v/SwiftPerceptualHash/HEAD/Images/DCT.png -------------------------------------------------------------------------------- /Images/Hash.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Androp0v/SwiftPerceptualHash/HEAD/Images/Hash.png -------------------------------------------------------------------------------- /Images/LowPass.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Androp0v/SwiftPerceptualHash/HEAD/Images/LowPass.png -------------------------------------------------------------------------------- /Images/Original.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Androp0v/SwiftPerceptualHash/HEAD/Images/Original.png -------------------------------------------------------------------------------- /Images/Downsampled.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Androp0v/SwiftPerceptualHash/HEAD/Images/Downsampled.png -------------------------------------------------------------------------------- /Images/DCT_Equation.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Androp0v/SwiftPerceptualHash/HEAD/Images/DCT_Equation.png -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .DS_Store 2 | /.build 3 | /Packages 4 | /*.xcodeproj 5 | xcuserdata/ 6 | DerivedData/ 7 | .swiftpm/config/registries.json 8 | .swiftpm/xcode/package.xcworkspace/contents.xcworkspacedata 9 | .netrc 10 | -------------------------------------------------------------------------------- /.swiftpm/xcode/package.xcworkspace/xcshareddata/IDEWorkspaceChecks.plist: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | IDEDidComputeMac32BitWarning 6 | 7 | 8 | 9 | -------------------------------------------------------------------------------- /Package.swift: -------------------------------------------------------------------------------- 1 | // swift-tools-version: 5.8 2 | // The swift-tools-version declares the minimum version of Swift required to build this package. 3 | 4 | import PackageDescription 5 | 6 | let package = Package( 7 | name: "SwiftPerceptualHash", 8 | platforms: [ 9 | .macOS(.v12), .iOS(.v15), 10 | ], 11 | products: [ 12 | // Products define the executables and libraries a package produces, and make them visible to other packages. 13 | .library( 14 | name: "SwiftPerceptualHash", 15 | targets: ["SwiftPerceptualHash"]), 16 | ], 17 | targets: [ 18 | .target( 19 | name: "SwiftPerceptualHash", 20 | dependencies: []), 21 | ] 22 | ) 23 | -------------------------------------------------------------------------------- /Sources/SwiftPerceptualHash/Metal shaders/Grayscale.metal: -------------------------------------------------------------------------------- 1 | // 2 | // Grayscale.metal 3 | // SwiftPerceptualHashApp 4 | // 5 | // Created by Raúl Montón Pinillos on 12/4/23. 6 | // 7 | 8 | #include 9 | using namespace metal; 10 | 11 | // Rec 709 LUMA values for grayscale image conversion 12 | constant half3 kRec709Luma = half3(0.2126, 0.7152, 0.0722); 13 | 14 | kernel void grayscale_kernel(texture2d source_texture [[texture(0)]], 15 | texture2d output_texture [[texture(1)]], 16 | uint2 gid [[thread_position_in_grid]]) { 17 | half4 inColor = source_texture.read(gid); 18 | half gray = dot(inColor.rgb, kRec709Luma); 19 | output_texture.write(gray, gid); 20 | } 21 | -------------------------------------------------------------------------------- /LICENSE/LICENSE.txt: -------------------------------------------------------------------------------- 1 | Copyright © 2023 Raúl Montón. 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 4 | 5 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 6 | 7 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 8 | 9 | -------------------------------------------------------------------------------- /Sources/SwiftPerceptualHash/Utilities/ConcurrencyLimiter.swift: -------------------------------------------------------------------------------- 1 | // 2 | // File.swift 3 | // 4 | // 5 | // Created by Raúl Montón Pinillos on 15/4/23. 6 | // 7 | 8 | import Foundation 9 | 10 | /// Limits the number of concurrent tasks using the command buffer. 11 | internal actor ConcurrencyLimiter { 12 | /// Number of actively running tasks using the command buffer. 13 | var parallelTaskCount: Int = 0 14 | 15 | /// Signals that a new task is about to use the command buffer. If 16 | /// `maxCommandBufferCount` tasks are already running, the call is 17 | /// suspended until the resource is available. 18 | /// - Parameter maxCommandBufferCount: The maximum number of 19 | /// tasks that can be run on parallel. 20 | func newRunningTask(maxCommandBufferCount: Int) async { 21 | while parallelTaskCount >= maxCommandBufferCount { 22 | await Task.yield() 23 | } 24 | parallelTaskCount += 1 25 | } 26 | /// Signals that a task using the command buffer has finished, so other 27 | /// threads can use the resource. 28 | func endRunningTask() { 29 | parallelTaskCount -= 1 30 | } 31 | } 32 | -------------------------------------------------------------------------------- /Sources/SwiftPerceptualHash/PerceptualHashError.swift: -------------------------------------------------------------------------------- 1 | // 2 | // File.swift 3 | // 4 | // 5 | // Created by Raúl Montón Pinillos on 13/4/23. 6 | // 7 | 8 | import Foundation 9 | import Metal 10 | 11 | public enum PerceptualHashError: Error, LocalizedError { 12 | case wrongDCTSize 13 | case negativeOrZeroResizedSize 14 | case resizedSizeTooSmallForDCTSize 15 | case metalDeviceCreationFailed 16 | case makeDefaultLibraryFailed 17 | case makeGrayscaleKernelFailed 18 | case makeGrayscalePSOFailed 19 | case createResizedTextureFailed(Int) 20 | case createGrayscaleResizedTextureFailed(Int) 21 | case makeCommandQueueFailed 22 | case makeCommandBufferFailed 23 | case makeComputeCommandEncoderFailed 24 | case unsupportedSourceImagePixelFormat(MTLPixelFormat) 25 | case numberOfBitsMismatch 26 | 27 | public var errorDescription: String? { 28 | switch self { 29 | case .metalDeviceCreationFailed: 30 | return "Metal device creation failed!" 31 | case .makeDefaultLibraryFailed: 32 | return "Unable to create default library!" 33 | case .makeGrayscaleKernelFailed: 34 | return "Failed to create grayscale kernel!" 35 | case .makeGrayscalePSOFailed: 36 | return "Failed to create grayscale pipeline state object!" 37 | case .createResizedTextureFailed(let size): 38 | return "Failed to create \(size)x\(size) resized texture." 39 | case .createGrayscaleResizedTextureFailed(let size): 40 | return "Failed to create \(size)x\(size) grayscale resized texture." 41 | case .makeCommandQueueFailed: 42 | return "Failed to create command queue!" 43 | case .wrongDCTSize: 44 | return "Discrete Cosine Transform (DCT) matrix can't be smaller than 2x2." 45 | case .negativeOrZeroResizedSize: 46 | return "Intermediate resized image matrix can't have negative or zero size." 47 | case .resizedSizeTooSmallForDCTSize: 48 | return "Intermediate resized image matrix can't be smaller than the DCT matrix." 49 | case .makeCommandBufferFailed: 50 | return "Failed to create command buffer!" 51 | case .makeComputeCommandEncoderFailed: 52 | return "Failed to create compute command encoder!" 53 | case .unsupportedSourceImagePixelFormat(let pixelFormat): 54 | return "Unsupported source image MTLPixelFormat: \(pixelFormat)" 55 | case .numberOfBitsMismatch: 56 | return "Number of bits of the two hashes does not match. Hashes with different number of bits can't be compared." 57 | } 58 | } 59 | } 60 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # SwiftPerceptualHash 2 | 3 | Swift package to create a *Perceptual Image Hash* from a source image. Perceptual Image Hashes output similar hashes for similar images, which allows for easy duplicate image detection that is robust to different compression algorithms or image sizes. 4 | 5 | ## How to use 6 | 7 | ```swift 8 | // Create generator only once, reuse throughout the app 9 | let hashGenerator = PerceptualHashGenerator() 10 | 11 | // There are many ways to get a Data type representing an image. For example: 12 | let imageData = UIImage(named: "SampleImage".png)!.pngData() 13 | 14 | // Once you have a reference to the Data of an image, creating the hash is easy and fast: 15 | let hash = hashGenerator.perceptualHash(imageData: imageData) 16 | 17 | // You can get different String representations from the hash. For example: 18 | print(hash.stringValue) // 2879bv9r58qsv 19 | ``` 20 | 21 | Visually similar images will have similar or identical hashes, which we can use to check if two images are the same (duplicate). You can compare hashes directly using the equality operator (`hashA == hashB`) or you can use the `similarity(hashA, hashB)` function to get a floating point value (ranging from 0 to 1) that expresses how similar two hashes are (0 for completely different hashes, and 1 for identical hashes). 22 | 23 | ## Project structure 24 | 25 | All the fun bits are in `Sources/SwiftPerceptualHash/PerceptualHashManager.swift`. 26 | 27 | ## Algorithm overview 28 | 29 | ### Original images 30 | At the beginning, the image is imported to a `MTLTexture` at its full size. Here you can see four different images, which are (from left to right): 31 | - An example image. 32 | - The previous image, saved with the maximum JPG compression (size went from 11.2MB to 501KB). 33 | - The first image, slightly cropped and with a slight exposure and saturation boost. 34 | - Another example image. 35 | 36 | ![OriginalImage](Images/Original.png) 37 | 38 | ### Low-pass filter 39 | To avoid aliasing issues when the image is downsampled, a gaussian kernel (acting as a low-pass filter) removes all the high-frequency information from the image that could cause aliasing issues when downsampling the image. The σ of the gaussian kernel is computed as `σ = 1 / maxScaleFactor`, where `maxScaleFactor` is the scale factor required to transform the full size image to the 32x32 size used in the next step (downsampling). If the image is not square, we conservatively apply the blur with the minimum radius (the one with the minimum scale factor). 40 | 41 | ![LowPassImage](Images/LowPass.png) 42 | 43 | ### Downsampling 44 | The image is then downsampled in the GPU using a bilinear filter to a 32x32 pixels texture and the color information is removed. Grayscale conversion uses a Rec. 709 Luma coefficient for no particular reason (we had to pick a grayscale conversion, it doesn't matter much which one). 45 | 46 | ![Downsampled](Images/Downsampled.png) 47 | 48 | ### Discrete Cosine Transform (DCT) 49 | A Discrete Cosine Transform (DCT) is then applied to the 32x32 image. You're probably better off trying to understand how a DCT works anywhere else than me explaining it here. Key things to know is that upper-left corner contains the low-frequency information and the bottom-right corner contains the high-frequency information. The "perceptual" part of the image hashing is based on the low-frequency part, so despite the DCT using the full 32x32 texture to compute the coefficients, only the upper 8x8 coefficients of the DCT matrix are computed. 50 | The basic formula (with the notation used in the code) is: 51 | 52 | ![DCT_Equation](Images/DCT_Equation.png) 53 | 54 | Where `u`, `v` make a 8x8 image (range: `[0 to 8)`) and `i`, `j` iterate over the 32x32 image (range: `[0 to 32)`), and `pixel(i,j)` is the grayscale value of the pixel in the 32x32 image at the i-th row and j-th column. 55 | 56 | ![DCT](Images/DCT.png) 57 | 58 | ### Hash 59 | To compute the hash from the 8x8 DCT, the `[0,0]` value is set to `0.0`, essentially removing any constant component throughout the image. Then, the mean of the 8x8 matrix is computed. Then, for each value in the matrix, a "1" is written if the DCT of that position was greater than the mean, and a "0" is written otherwise. 60 | 61 | ![Hash](Images/Hash.png) 62 | 63 | Similar images have similar hashes. The first sample image and its heavily compressed version share the same hash, but the slightly cropped + color adjusted image has a slightly different hash. The last image, which is completely different, has a completely different hash. 64 | 65 | All that's left is to compute a string value from the 8x8 hash matrix. To get a binary representation, we start with an empty string, iterate over the 8x8 matrix, and append a "1" or "0" to the string. We'll end up with something like `"1001001001111111011011011111011000111111111101110111101010111101"`. 66 | We could stop there, but it's probably not optimal to store 64 bits of information in a 64-character long string. Instead, we encode that "binary string" using a base-36 encoding, ending up with something like this: `"2879bvhn9r2kd"`, which is the value that can be accessed using the `.stringValue` of the `PerceptualHash` result. This value can now be computed for several images and check for duplicates by comparing the strings. 67 | 68 | ## Further reading 69 | - [pHash.org](http://www.phash.org) 70 | - [Implementation and Benchmarking of Perceptual Image Hash Functions (Christoph Zauner)](http://www.phash.org/docs/pubs/thesis_zauner.pdf) 71 | - [Discrete Cosine Transform for 8x8 Blocks with CUDA (NVIDIA)](https://developer.download.nvidia.com/assets/cuda/files/dct8x8.pdf) 72 | - [DCT Implementation on GPU (Serpil Tokdemir, Georgia State University)](https://core.ac.uk/download/71421787.pdf) 73 | -------------------------------------------------------------------------------- /Sources/SwiftPerceptualHash/PerceptualHash.swift: -------------------------------------------------------------------------------- 1 | // 2 | // File.swift 3 | // 4 | // 5 | // Created by Raúl Montón Pinillos on 13/4/23. 6 | // 7 | 8 | import Foundation 9 | 10 | 11 | /// A `PerceptualHash` is a hash used to compare how similar two images are. Two images are 12 | /// identical if their hashes are the same. Similar images will have similar hashes, and completely 13 | /// different images will have completely different hashes. 14 | /// 15 | /// As with any hashing algorithm, collisions can happen, where two completely different images end 16 | /// up with the same hash. This happens because, while there's an infinite number of images, a 17 | /// `PerceptualHash` has a fixed number of bits to represent all of them. 18 | /// 19 | /// Two hashes can only be compared if they have been generated using the same 20 | /// `PerceptualHashGenerator` configuration. 21 | public struct PerceptualHash: Equatable { 22 | 23 | public let numberOfBits: Int 24 | public let blocks: [UInt64] 25 | 26 | // MARK: - Strings 27 | 28 | /// A string representation of the perceptual hash, in its most compact form (i.e. "40ixng9r"). 29 | public var stringValue: String { 30 | var finalString = "" 31 | for block in blocks { 32 | finalString.append(String(block, radix: 36, uppercase: false)) 33 | } 34 | return finalString 35 | } 36 | /// A binary representation of the perceptual hash (i.e. "100100100111111101101101111101100011111"). 37 | public var binaryString: String { 38 | var finalString = "" 39 | for block in blocks { 40 | finalString.append(String(block, radix: 2)) 41 | } 42 | return finalString 43 | } 44 | /// A hexadecimal representation of the perceptual hash (i.e. "493FB6FB1F"). 45 | public var hexString: String { 46 | var finalString = "" 47 | for block in blocks { 48 | finalString.append(String(block, radix: 16, uppercase: true)) 49 | } 50 | return finalString 51 | } 52 | 53 | // MARK: - Init 54 | 55 | /// You typically don't call this initializer directly. Instead, you use a method from the `PerceptualHashGenerator` 56 | /// class to get a object of this type. 57 | /// - Parameter binaryString: A binary representation of the perceptual hash (i.e. "1001001"). 58 | public init(binaryString: String) { 59 | 60 | var blocks = [UInt64]() 61 | 62 | // Get the number of characters in the string. Since each character 63 | // represents a bit, that means that this is also the number of bits. 64 | let numberOfBits = binaryString.count 65 | 66 | // Divide the string in blocks of 64 characters (bits). 67 | 68 | // Number of blocks with full 64-bit numbers 69 | let fullBlockCount = numberOfBits / 64 70 | 71 | // The string size might not be a multiple of 64 72 | let remainder = numberOfBits % 64 73 | 74 | // Convert the most significant bits (less than or 64) to a UInt64 75 | let remainderIndex = binaryString.index(binaryString.startIndex, offsetBy: remainder) 76 | if remainder != 0 { 77 | let remainderString = binaryString[.. Double { 112 | 113 | // Make sure that the left hand side and right hand side have the 114 | // same number of bits. 115 | guard lhs.numberOfBits == rhs.numberOfBits else { 116 | throw PerceptualHashError.numberOfBitsMismatch 117 | } 118 | 119 | // Set a counter for the number of bits that are different. 120 | var differingBitCounter = 0 121 | 122 | // Iterate over all the stored blocks 123 | for i in 0.. 0 { 126 | let maskedBits = differentBits & 1 127 | if maskedBits != 0 { 128 | differingBitCounter += 1 129 | } 130 | // Right-shift bits to test next bit. 131 | differentBits = differentBits >> 1 132 | } 133 | } 134 | return 1.0 - Double(differingBitCounter) / Double(lhs.numberOfBits) 135 | } 136 | -------------------------------------------------------------------------------- /Sources/SwiftPerceptualHash/PerceptualHashGenerator.swift: -------------------------------------------------------------------------------- 1 | // 2 | // PerceptualHash.swift 3 | // SwiftPerceptualHash 4 | // 5 | // Created by Raúl Montón Pinillos on 12/4/23. 6 | // 7 | 8 | import Foundation 9 | import Metal 10 | import MetalKit 11 | import MetalPerformanceShaders 12 | 13 | /// Class used to generate a `PerceptualHash`. A `PerceptualHash` is a hash used to 14 | /// compare how similar two images are. Two images are identical if their hashes are the same. 15 | /// Similar images will have similar hashes, and completely different images will have completely 16 | /// different hashes. 17 | /// 18 | /// Create a `PerceptualHashGenerator` once, and reuse it throughout the app. Initializing 19 | /// this class creates a bunch of Metal objects that are expensive to create but can be reused to 20 | /// compute hashes for different images. 21 | public class PerceptualHashGenerator { 22 | 23 | // MARK: - Properties 24 | 25 | /// The size of the DCT matrix used to generate the hash. A hash will require `pow(dctSize,2)` 26 | /// bits to be stored. Defaults to a 8x8 matrix, to generate 64-bit hashes. 27 | public let dctSize: Int 28 | /// The size the image will be resized to before the DCT is computed. Defaults to a 32x32 image. 29 | public let resizedSize: Int 30 | 31 | /// The maximum number of command buffers that can be used simultaneously. Each call to 32 | /// `perceptualHash(imageData: Data)` creates a new command buffer. 33 | private let maxCommandBufferCount: Int = 128 34 | /// The system's default Metal device. 35 | private let device: MTLDevice 36 | /// The command queue. 37 | private let commandQueue: MTLCommandQueue 38 | /// The Pipeline State Object of a grayscale kernel. 39 | private let grayscalePSO: MTLComputePipelineState 40 | /// Used to load a `MTLTexture` from image data. 41 | private let textureLoader: MTKTextureLoader 42 | /// An actor to limit the number of concurrent tasks using the command buffer. 43 | private let concurrencyLimiter = ConcurrencyLimiter() 44 | 45 | internal class IntermediateTextures { 46 | /// Unique identifier for the group of textures. 47 | internal let id = UUID() 48 | /// Whether the current texture group is being used in a command buffer. 49 | internal var inUse: Bool = false 50 | /// The intermediate, resized image texture used to compute the DCT. 51 | internal let color: MTLTexture 52 | /// The intermediate, resized image texture used to compute the DCT, in grayscale. 53 | internal let grayscale: MTLTexture 54 | 55 | init(color: MTLTexture, grayscale: MTLTexture) { 56 | self.color = color 57 | self.grayscale = grayscale 58 | } 59 | } 60 | 61 | // MARK: - Initialization 62 | 63 | /// Initializes a `PerceptualHashGenerator` with a specific configuration. 64 | /// - Parameters: 65 | /// - resizedSize: The size the image will be resized to before the DCT is computed. 66 | /// Defaults to a 32x32 image. Bigger sizes can allow for more precise image comparisons, 67 | /// as more high-frequency data is preserved. 68 | /// - dctSize: The size of the DCT matrix used to generate the hash. Bigger sizes can 69 | /// allow for more precise image comparisons, as more high-frequency data is preserved. 70 | public init(resizedSize: Int = 32, dctSize: Int = 8) throws { 71 | 72 | // Check against wrong parameter configurations 73 | guard resizedSize > 0 else { 74 | throw PerceptualHashError.negativeOrZeroResizedSize 75 | } 76 | guard dctSize > 1 else { 77 | throw PerceptualHashError.wrongDCTSize 78 | } 79 | guard resizedSize >= dctSize else { 80 | throw PerceptualHashError.resizedSizeTooSmallForDCTSize 81 | } 82 | self.resizedSize = resizedSize 83 | self.dctSize = dctSize 84 | 85 | // Get Metal device 86 | guard let device = MTLCreateSystemDefaultDevice() else { 87 | throw PerceptualHashError.metalDeviceCreationFailed 88 | } 89 | self.device = device 90 | 91 | // Get the default library 92 | guard let defaultLibrary = try? device.makeDefaultLibrary(bundle: .module) else { 93 | throw PerceptualHashError.makeDefaultLibraryFailed 94 | } 95 | 96 | // Create the grayscale kernel function 97 | guard let grayscaleKernel = defaultLibrary.makeFunction(name: "grayscale_kernel") else { 98 | throw PerceptualHashError.makeGrayscaleKernelFailed 99 | } 100 | 101 | // Create the grayscale Pipeline State Object 102 | guard let grayscalePSO = try? device.makeComputePipelineState(function: grayscaleKernel) else { 103 | throw PerceptualHashError.makeGrayscalePSOFailed 104 | } 105 | self.grayscalePSO = grayscalePSO 106 | 107 | // Create a texture loader 108 | self.textureLoader = MTKTextureLoader(device: device) 109 | 110 | // Create command queue 111 | guard let commandQueue = device.makeCommandQueue( 112 | maxCommandBufferCount: maxCommandBufferCount 113 | ) else { 114 | throw PerceptualHashError.makeCommandQueueFailed 115 | } 116 | self.commandQueue = commandQueue 117 | } 118 | 119 | // MARK: - IntermediateTextures 120 | 121 | /// Creates a new set of `IntermediateTextures` with the given configuration options. 122 | internal func createIntermediateTextures( 123 | device: MTLDevice, 124 | resizedSize: Int, 125 | pixelFormat: MTLPixelFormat 126 | ) throws -> IntermediateTextures { 127 | // Create small intermediate texture 128 | let resizedTextureDescriptor = MTLTextureDescriptor.texture2DDescriptor( 129 | pixelFormat: pixelFormat, 130 | width: resizedSize, 131 | height: resizedSize, 132 | mipmapped: false 133 | ) 134 | resizedTextureDescriptor.usage = [.shaderRead, .shaderWrite] 135 | guard let resizedTexture = device.makeTexture(descriptor: resizedTextureDescriptor) else { 136 | throw PerceptualHashError.createResizedTextureFailed(resizedSize) 137 | } 138 | 139 | // Create small intermediate texture (grayscale) 140 | let grayscaleTextureDescriptor = MTLTextureDescriptor.texture2DDescriptor( 141 | pixelFormat: .r32Float, 142 | width: resizedSize, 143 | height: resizedSize, 144 | mipmapped: false 145 | ) 146 | grayscaleTextureDescriptor.usage = [.shaderRead, .shaderWrite] 147 | guard let grayscaleTexture = device.makeTexture(descriptor: grayscaleTextureDescriptor) else { 148 | throw PerceptualHashError.createGrayscaleResizedTextureFailed(resizedSize) 149 | } 150 | return IntermediateTextures(color: resizedTexture, grayscale: grayscaleTexture) 151 | } 152 | 153 | // MARK: - Hashing 154 | 155 | /// Creates a `PerceptualHash` for an image using its raw data. 156 | /// - Parameter imageData: The raw data for the image. Make sure that the image orientation is `.up`. 157 | /// - Returns: A `PerceptualHash` object, used to check how similar two images are. 158 | public func perceptualHash(imageData: Data) async throws -> PerceptualHash { 159 | 160 | // Before calling makeCommandBuffer, we have to ensure that no more than 161 | // the maximum number of tasks are running, or makeCommandBuffer will 162 | // block, potentially deadlocking our Swift Concurrency code. 163 | await concurrencyLimiter.newRunningTask(maxCommandBufferCount: maxCommandBufferCount) 164 | defer { 165 | Task { 166 | await concurrencyLimiter.endRunningTask() 167 | } 168 | } 169 | 170 | // Create command buffer 171 | guard let commandBuffer = commandQueue.makeCommandBuffer() else { 172 | throw PerceptualHashError.makeCommandBufferFailed 173 | } 174 | 175 | // Create the source image texture 176 | var sourceImageTexture = try await self.textureLoader.newTexture( 177 | data: imageData, 178 | options: [ 179 | MTKTextureLoader.Option.textureUsage: MTLTextureUsage.unknown.rawValue, 180 | MTKTextureLoader.Option.textureStorageMode: MTLStorageMode.shared.rawValue 181 | ] 182 | ) 183 | guard sourceImageTexture.pixelFormat == .bgra8Unorm_srgb 184 | || sourceImageTexture.pixelFormat == .bgra8Unorm 185 | || sourceImageTexture.pixelFormat == .rgba16Unorm 186 | || sourceImageTexture.pixelFormat == .rgba8Unorm_srgb 187 | || sourceImageTexture.pixelFormat == .rgba8Unorm 188 | || sourceImageTexture.pixelFormat == .rgba8Sint 189 | || sourceImageTexture.pixelFormat == .rgba8Snorm 190 | else { 191 | throw PerceptualHashError.unsupportedSourceImagePixelFormat(sourceImageTexture.pixelFormat) 192 | } 193 | 194 | // Compute the x and y axis scales required to resize the image 195 | let scaleX = Double(resizedSize) / Double(sourceImageTexture.width) 196 | let scaleY = Double(resizedSize) / Double(sourceImageTexture.height) 197 | 198 | // MARK: - Gaussian blur 199 | 200 | // Blur the image to get rid of all the high-frequency features that could 201 | // result in aliasing in the downsampled image 202 | let blur = MPSImageGaussianBlur(device: device, sigma: Float(1 / (2 * max(scaleX, scaleY)))) 203 | withUnsafeMutablePointer(to: &sourceImageTexture) { texturePointer in 204 | _ = blur.encode(commandBuffer: commandBuffer, inPlaceTexture: texturePointer) 205 | } 206 | 207 | // MARK: - Resize 208 | 209 | // Get a current set of intermediate textures or create a new one 210 | let intermediateTextures = try createIntermediateTextures( 211 | device: device, 212 | resizedSize: resizedSize, 213 | pixelFormat: sourceImageTexture.pixelFormat 214 | ) 215 | 216 | // Resize the image to target 32x32 resolution 217 | let resize = MPSImageBilinearScale(device: device) 218 | var transform = MPSScaleTransform( 219 | scaleX: scaleX, 220 | scaleY: scaleY, 221 | translateX: 0.0, 222 | translateY: 0.0 223 | ) 224 | withUnsafePointer(to: &transform) { (transformPtr: UnsafePointer) -> () in 225 | resize.scaleTransform = transformPtr 226 | } 227 | resize.encode( 228 | commandBuffer: commandBuffer, 229 | sourceTexture: sourceImageTexture, 230 | destinationTexture: intermediateTextures.color 231 | ) 232 | 233 | // MARK: - Grayscale 234 | 235 | // Create compute command encoder 236 | guard let computeEncoder = commandBuffer.makeComputeCommandEncoder() else { 237 | throw PerceptualHashError.makeComputeCommandEncoderFailed 238 | } 239 | 240 | // Set the PSO to perform a grayscale conversion 241 | computeEncoder.setComputePipelineState(grayscalePSO) 242 | 243 | // Set the source texture 244 | computeEncoder.setTexture(intermediateTextures.color, index: 0) 245 | 246 | // Set the output texture 247 | computeEncoder.setTexture(intermediateTextures.grayscale, index: 1) 248 | 249 | // Dispatch the threads 250 | let threadgroupSize = MTLSizeMake(16, 16, 1) 251 | var threadgroupCount = MTLSize() 252 | threadgroupCount.width = (intermediateTextures.color.width + threadgroupSize.width - 1) / threadgroupSize.width 253 | threadgroupCount.height = (intermediateTextures.color.height + threadgroupSize.height - 1) / threadgroupSize.height 254 | // The image data is 2D, so set depth to 1 255 | threadgroupCount.depth = 1 256 | computeEncoder.dispatchThreadgroups(threadgroupCount, threadsPerThreadgroup: threadgroupSize) 257 | 258 | // Finish encoding 259 | computeEncoder.endEncoding() 260 | 261 | // MARK: - Finish 262 | 263 | let binaryStringHash = await withCheckedContinuation { continuation in 264 | commandBuffer.addCompletedHandler { [intermediateTextures] _ in 265 | let binaryStringHash = self.computeDCT( 266 | grayscaleTexture: intermediateTextures.grayscale 267 | ) 268 | continuation.resume(returning: binaryStringHash) 269 | } 270 | // Submit work to the GPU 271 | commandBuffer.commit() 272 | } 273 | return PerceptualHash(binaryString: binaryStringHash) 274 | } 275 | 276 | // MARK: - Compute DCT 277 | 278 | private func computeDCT(grayscaleTexture: MTLTexture) -> String { 279 | let rowBytes = resizedSize * 4 280 | let length = rowBytes * resizedSize 281 | let region = MTLRegionMake2D(0, 0, resizedSize, resizedSize) 282 | var grayBytes = [Float32](repeating: 0, count: length) 283 | var dctArray = [Float](repeating: 0, count: dctSize * dctSize) 284 | 285 | var binaryHash: String = "" 286 | 287 | // Fill with the texture data 288 | grayBytes.withUnsafeMutableBytes { r32BytesPointer in 289 | guard let baseAddress = r32BytesPointer.baseAddress else { 290 | return 291 | } 292 | // Fill the array with data from the grayscale texture 293 | grayscaleTexture.getBytes( 294 | baseAddress, 295 | bytesPerRow: rowBytes, 296 | from: region, 297 | mipmapLevel: 0 298 | ) 299 | } 300 | // Compute each one of the elements of the discrete cosine transform 301 | for u in 0.. Float32(meanDCT) { 346 | binaryHash += "1" 347 | } else { 348 | binaryHash += "0" 349 | } 350 | } 351 | return binaryHash 352 | } 353 | } 354 | 355 | --------------------------------------------------------------------------------