├── Images
├── DCT.png
├── Hash.png
├── LowPass.png
├── Original.png
├── Downsampled.png
└── DCT_Equation.png
├── .gitignore
├── .swiftpm
└── xcode
│ └── package.xcworkspace
│ └── xcshareddata
│ └── IDEWorkspaceChecks.plist
├── Package.swift
├── Sources
└── SwiftPerceptualHash
│ ├── Metal shaders
│ └── Grayscale.metal
│ ├── Utilities
│ └── ConcurrencyLimiter.swift
│ ├── PerceptualHashError.swift
│ ├── PerceptualHash.swift
│ └── PerceptualHashGenerator.swift
├── LICENSE
└── LICENSE.txt
└── README.md
/Images/DCT.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Androp0v/SwiftPerceptualHash/HEAD/Images/DCT.png
--------------------------------------------------------------------------------
/Images/Hash.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Androp0v/SwiftPerceptualHash/HEAD/Images/Hash.png
--------------------------------------------------------------------------------
/Images/LowPass.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Androp0v/SwiftPerceptualHash/HEAD/Images/LowPass.png
--------------------------------------------------------------------------------
/Images/Original.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Androp0v/SwiftPerceptualHash/HEAD/Images/Original.png
--------------------------------------------------------------------------------
/Images/Downsampled.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Androp0v/SwiftPerceptualHash/HEAD/Images/Downsampled.png
--------------------------------------------------------------------------------
/Images/DCT_Equation.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Androp0v/SwiftPerceptualHash/HEAD/Images/DCT_Equation.png
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | .DS_Store
2 | /.build
3 | /Packages
4 | /*.xcodeproj
5 | xcuserdata/
6 | DerivedData/
7 | .swiftpm/config/registries.json
8 | .swiftpm/xcode/package.xcworkspace/contents.xcworkspacedata
9 | .netrc
10 |
--------------------------------------------------------------------------------
/.swiftpm/xcode/package.xcworkspace/xcshareddata/IDEWorkspaceChecks.plist:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | IDEDidComputeMac32BitWarning
6 |
7 |
8 |
9 |
--------------------------------------------------------------------------------
/Package.swift:
--------------------------------------------------------------------------------
1 | // swift-tools-version: 5.8
2 | // The swift-tools-version declares the minimum version of Swift required to build this package.
3 |
4 | import PackageDescription
5 |
6 | let package = Package(
7 | name: "SwiftPerceptualHash",
8 | platforms: [
9 | .macOS(.v12), .iOS(.v15),
10 | ],
11 | products: [
12 | // Products define the executables and libraries a package produces, and make them visible to other packages.
13 | .library(
14 | name: "SwiftPerceptualHash",
15 | targets: ["SwiftPerceptualHash"]),
16 | ],
17 | targets: [
18 | .target(
19 | name: "SwiftPerceptualHash",
20 | dependencies: []),
21 | ]
22 | )
23 |
--------------------------------------------------------------------------------
/Sources/SwiftPerceptualHash/Metal shaders/Grayscale.metal:
--------------------------------------------------------------------------------
1 | //
2 | // Grayscale.metal
3 | // SwiftPerceptualHashApp
4 | //
5 | // Created by Raúl Montón Pinillos on 12/4/23.
6 | //
7 |
8 | #include
9 | using namespace metal;
10 |
11 | // Rec 709 LUMA values for grayscale image conversion
12 | constant half3 kRec709Luma = half3(0.2126, 0.7152, 0.0722);
13 |
14 | kernel void grayscale_kernel(texture2d source_texture [[texture(0)]],
15 | texture2d output_texture [[texture(1)]],
16 | uint2 gid [[thread_position_in_grid]]) {
17 | half4 inColor = source_texture.read(gid);
18 | half gray = dot(inColor.rgb, kRec709Luma);
19 | output_texture.write(gray, gid);
20 | }
21 |
--------------------------------------------------------------------------------
/LICENSE/LICENSE.txt:
--------------------------------------------------------------------------------
1 | Copyright © 2023 Raúl Montón.
2 |
3 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
4 |
5 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
6 |
7 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
8 |
9 |
--------------------------------------------------------------------------------
/Sources/SwiftPerceptualHash/Utilities/ConcurrencyLimiter.swift:
--------------------------------------------------------------------------------
1 | //
2 | // File.swift
3 | //
4 | //
5 | // Created by Raúl Montón Pinillos on 15/4/23.
6 | //
7 |
8 | import Foundation
9 |
10 | /// Limits the number of concurrent tasks using the command buffer.
11 | internal actor ConcurrencyLimiter {
12 | /// Number of actively running tasks using the command buffer.
13 | var parallelTaskCount: Int = 0
14 |
15 | /// Signals that a new task is about to use the command buffer. If
16 | /// `maxCommandBufferCount` tasks are already running, the call is
17 | /// suspended until the resource is available.
18 | /// - Parameter maxCommandBufferCount: The maximum number of
19 | /// tasks that can be run on parallel.
20 | func newRunningTask(maxCommandBufferCount: Int) async {
21 | while parallelTaskCount >= maxCommandBufferCount {
22 | await Task.yield()
23 | }
24 | parallelTaskCount += 1
25 | }
26 | /// Signals that a task using the command buffer has finished, so other
27 | /// threads can use the resource.
28 | func endRunningTask() {
29 | parallelTaskCount -= 1
30 | }
31 | }
32 |
--------------------------------------------------------------------------------
/Sources/SwiftPerceptualHash/PerceptualHashError.swift:
--------------------------------------------------------------------------------
1 | //
2 | // File.swift
3 | //
4 | //
5 | // Created by Raúl Montón Pinillos on 13/4/23.
6 | //
7 |
8 | import Foundation
9 | import Metal
10 |
11 | public enum PerceptualHashError: Error, LocalizedError {
12 | case wrongDCTSize
13 | case negativeOrZeroResizedSize
14 | case resizedSizeTooSmallForDCTSize
15 | case metalDeviceCreationFailed
16 | case makeDefaultLibraryFailed
17 | case makeGrayscaleKernelFailed
18 | case makeGrayscalePSOFailed
19 | case createResizedTextureFailed(Int)
20 | case createGrayscaleResizedTextureFailed(Int)
21 | case makeCommandQueueFailed
22 | case makeCommandBufferFailed
23 | case makeComputeCommandEncoderFailed
24 | case unsupportedSourceImagePixelFormat(MTLPixelFormat)
25 | case numberOfBitsMismatch
26 |
27 | public var errorDescription: String? {
28 | switch self {
29 | case .metalDeviceCreationFailed:
30 | return "Metal device creation failed!"
31 | case .makeDefaultLibraryFailed:
32 | return "Unable to create default library!"
33 | case .makeGrayscaleKernelFailed:
34 | return "Failed to create grayscale kernel!"
35 | case .makeGrayscalePSOFailed:
36 | return "Failed to create grayscale pipeline state object!"
37 | case .createResizedTextureFailed(let size):
38 | return "Failed to create \(size)x\(size) resized texture."
39 | case .createGrayscaleResizedTextureFailed(let size):
40 | return "Failed to create \(size)x\(size) grayscale resized texture."
41 | case .makeCommandQueueFailed:
42 | return "Failed to create command queue!"
43 | case .wrongDCTSize:
44 | return "Discrete Cosine Transform (DCT) matrix can't be smaller than 2x2."
45 | case .negativeOrZeroResizedSize:
46 | return "Intermediate resized image matrix can't have negative or zero size."
47 | case .resizedSizeTooSmallForDCTSize:
48 | return "Intermediate resized image matrix can't be smaller than the DCT matrix."
49 | case .makeCommandBufferFailed:
50 | return "Failed to create command buffer!"
51 | case .makeComputeCommandEncoderFailed:
52 | return "Failed to create compute command encoder!"
53 | case .unsupportedSourceImagePixelFormat(let pixelFormat):
54 | return "Unsupported source image MTLPixelFormat: \(pixelFormat)"
55 | case .numberOfBitsMismatch:
56 | return "Number of bits of the two hashes does not match. Hashes with different number of bits can't be compared."
57 | }
58 | }
59 | }
60 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # SwiftPerceptualHash
2 |
3 | Swift package to create a *Perceptual Image Hash* from a source image. Perceptual Image Hashes output similar hashes for similar images, which allows for easy duplicate image detection that is robust to different compression algorithms or image sizes.
4 |
5 | ## How to use
6 |
7 | ```swift
8 | // Create generator only once, reuse throughout the app
9 | let hashGenerator = PerceptualHashGenerator()
10 |
11 | // There are many ways to get a Data type representing an image. For example:
12 | let imageData = UIImage(named: "SampleImage".png)!.pngData()
13 |
14 | // Once you have a reference to the Data of an image, creating the hash is easy and fast:
15 | let hash = hashGenerator.perceptualHash(imageData: imageData)
16 |
17 | // You can get different String representations from the hash. For example:
18 | print(hash.stringValue) // 2879bv9r58qsv
19 | ```
20 |
21 | Visually similar images will have similar or identical hashes, which we can use to check if two images are the same (duplicate). You can compare hashes directly using the equality operator (`hashA == hashB`) or you can use the `similarity(hashA, hashB)` function to get a floating point value (ranging from 0 to 1) that expresses how similar two hashes are (0 for completely different hashes, and 1 for identical hashes).
22 |
23 | ## Project structure
24 |
25 | All the fun bits are in `Sources/SwiftPerceptualHash/PerceptualHashManager.swift`.
26 |
27 | ## Algorithm overview
28 |
29 | ### Original images
30 | At the beginning, the image is imported to a `MTLTexture` at its full size. Here you can see four different images, which are (from left to right):
31 | - An example image.
32 | - The previous image, saved with the maximum JPG compression (size went from 11.2MB to 501KB).
33 | - The first image, slightly cropped and with a slight exposure and saturation boost.
34 | - Another example image.
35 |
36 | 
37 |
38 | ### Low-pass filter
39 | To avoid aliasing issues when the image is downsampled, a gaussian kernel (acting as a low-pass filter) removes all the high-frequency information from the image that could cause aliasing issues when downsampling the image. The σ of the gaussian kernel is computed as `σ = 1 / maxScaleFactor`, where `maxScaleFactor` is the scale factor required to transform the full size image to the 32x32 size used in the next step (downsampling). If the image is not square, we conservatively apply the blur with the minimum radius (the one with the minimum scale factor).
40 |
41 | 
42 |
43 | ### Downsampling
44 | The image is then downsampled in the GPU using a bilinear filter to a 32x32 pixels texture and the color information is removed. Grayscale conversion uses a Rec. 709 Luma coefficient for no particular reason (we had to pick a grayscale conversion, it doesn't matter much which one).
45 |
46 | 
47 |
48 | ### Discrete Cosine Transform (DCT)
49 | A Discrete Cosine Transform (DCT) is then applied to the 32x32 image. You're probably better off trying to understand how a DCT works anywhere else than me explaining it here. Key things to know is that upper-left corner contains the low-frequency information and the bottom-right corner contains the high-frequency information. The "perceptual" part of the image hashing is based on the low-frequency part, so despite the DCT using the full 32x32 texture to compute the coefficients, only the upper 8x8 coefficients of the DCT matrix are computed.
50 | The basic formula (with the notation used in the code) is:
51 |
52 | 
53 |
54 | Where `u`, `v` make a 8x8 image (range: `[0 to 8)`) and `i`, `j` iterate over the 32x32 image (range: `[0 to 32)`), and `pixel(i,j)` is the grayscale value of the pixel in the 32x32 image at the i-th row and j-th column.
55 |
56 | 
57 |
58 | ### Hash
59 | To compute the hash from the 8x8 DCT, the `[0,0]` value is set to `0.0`, essentially removing any constant component throughout the image. Then, the mean of the 8x8 matrix is computed. Then, for each value in the matrix, a "1" is written if the DCT of that position was greater than the mean, and a "0" is written otherwise.
60 |
61 | 
62 |
63 | Similar images have similar hashes. The first sample image and its heavily compressed version share the same hash, but the slightly cropped + color adjusted image has a slightly different hash. The last image, which is completely different, has a completely different hash.
64 |
65 | All that's left is to compute a string value from the 8x8 hash matrix. To get a binary representation, we start with an empty string, iterate over the 8x8 matrix, and append a "1" or "0" to the string. We'll end up with something like `"1001001001111111011011011111011000111111111101110111101010111101"`.
66 | We could stop there, but it's probably not optimal to store 64 bits of information in a 64-character long string. Instead, we encode that "binary string" using a base-36 encoding, ending up with something like this: `"2879bvhn9r2kd"`, which is the value that can be accessed using the `.stringValue` of the `PerceptualHash` result. This value can now be computed for several images and check for duplicates by comparing the strings.
67 |
68 | ## Further reading
69 | - [pHash.org](http://www.phash.org)
70 | - [Implementation and Benchmarking of Perceptual Image Hash Functions (Christoph Zauner)](http://www.phash.org/docs/pubs/thesis_zauner.pdf)
71 | - [Discrete Cosine Transform for 8x8 Blocks with CUDA (NVIDIA)](https://developer.download.nvidia.com/assets/cuda/files/dct8x8.pdf)
72 | - [DCT Implementation on GPU (Serpil Tokdemir, Georgia State University)](https://core.ac.uk/download/71421787.pdf)
73 |
--------------------------------------------------------------------------------
/Sources/SwiftPerceptualHash/PerceptualHash.swift:
--------------------------------------------------------------------------------
1 | //
2 | // File.swift
3 | //
4 | //
5 | // Created by Raúl Montón Pinillos on 13/4/23.
6 | //
7 |
8 | import Foundation
9 |
10 |
11 | /// A `PerceptualHash` is a hash used to compare how similar two images are. Two images are
12 | /// identical if their hashes are the same. Similar images will have similar hashes, and completely
13 | /// different images will have completely different hashes.
14 | ///
15 | /// As with any hashing algorithm, collisions can happen, where two completely different images end
16 | /// up with the same hash. This happens because, while there's an infinite number of images, a
17 | /// `PerceptualHash` has a fixed number of bits to represent all of them.
18 | ///
19 | /// Two hashes can only be compared if they have been generated using the same
20 | /// `PerceptualHashGenerator` configuration.
21 | public struct PerceptualHash: Equatable {
22 |
23 | public let numberOfBits: Int
24 | public let blocks: [UInt64]
25 |
26 | // MARK: - Strings
27 |
28 | /// A string representation of the perceptual hash, in its most compact form (i.e. "40ixng9r").
29 | public var stringValue: String {
30 | var finalString = ""
31 | for block in blocks {
32 | finalString.append(String(block, radix: 36, uppercase: false))
33 | }
34 | return finalString
35 | }
36 | /// A binary representation of the perceptual hash (i.e. "100100100111111101101101111101100011111").
37 | public var binaryString: String {
38 | var finalString = ""
39 | for block in blocks {
40 | finalString.append(String(block, radix: 2))
41 | }
42 | return finalString
43 | }
44 | /// A hexadecimal representation of the perceptual hash (i.e. "493FB6FB1F").
45 | public var hexString: String {
46 | var finalString = ""
47 | for block in blocks {
48 | finalString.append(String(block, radix: 16, uppercase: true))
49 | }
50 | return finalString
51 | }
52 |
53 | // MARK: - Init
54 |
55 | /// You typically don't call this initializer directly. Instead, you use a method from the `PerceptualHashGenerator`
56 | /// class to get a object of this type.
57 | /// - Parameter binaryString: A binary representation of the perceptual hash (i.e. "1001001").
58 | public init(binaryString: String) {
59 |
60 | var blocks = [UInt64]()
61 |
62 | // Get the number of characters in the string. Since each character
63 | // represents a bit, that means that this is also the number of bits.
64 | let numberOfBits = binaryString.count
65 |
66 | // Divide the string in blocks of 64 characters (bits).
67 |
68 | // Number of blocks with full 64-bit numbers
69 | let fullBlockCount = numberOfBits / 64
70 |
71 | // The string size might not be a multiple of 64
72 | let remainder = numberOfBits % 64
73 |
74 | // Convert the most significant bits (less than or 64) to a UInt64
75 | let remainderIndex = binaryString.index(binaryString.startIndex, offsetBy: remainder)
76 | if remainder != 0 {
77 | let remainderString = binaryString[.. Double {
112 |
113 | // Make sure that the left hand side and right hand side have the
114 | // same number of bits.
115 | guard lhs.numberOfBits == rhs.numberOfBits else {
116 | throw PerceptualHashError.numberOfBitsMismatch
117 | }
118 |
119 | // Set a counter for the number of bits that are different.
120 | var differingBitCounter = 0
121 |
122 | // Iterate over all the stored blocks
123 | for i in 0.. 0 {
126 | let maskedBits = differentBits & 1
127 | if maskedBits != 0 {
128 | differingBitCounter += 1
129 | }
130 | // Right-shift bits to test next bit.
131 | differentBits = differentBits >> 1
132 | }
133 | }
134 | return 1.0 - Double(differingBitCounter) / Double(lhs.numberOfBits)
135 | }
136 |
--------------------------------------------------------------------------------
/Sources/SwiftPerceptualHash/PerceptualHashGenerator.swift:
--------------------------------------------------------------------------------
1 | //
2 | // PerceptualHash.swift
3 | // SwiftPerceptualHash
4 | //
5 | // Created by Raúl Montón Pinillos on 12/4/23.
6 | //
7 |
8 | import Foundation
9 | import Metal
10 | import MetalKit
11 | import MetalPerformanceShaders
12 |
13 | /// Class used to generate a `PerceptualHash`. A `PerceptualHash` is a hash used to
14 | /// compare how similar two images are. Two images are identical if their hashes are the same.
15 | /// Similar images will have similar hashes, and completely different images will have completely
16 | /// different hashes.
17 | ///
18 | /// Create a `PerceptualHashGenerator` once, and reuse it throughout the app. Initializing
19 | /// this class creates a bunch of Metal objects that are expensive to create but can be reused to
20 | /// compute hashes for different images.
21 | public class PerceptualHashGenerator {
22 |
23 | // MARK: - Properties
24 |
25 | /// The size of the DCT matrix used to generate the hash. A hash will require `pow(dctSize,2)`
26 | /// bits to be stored. Defaults to a 8x8 matrix, to generate 64-bit hashes.
27 | public let dctSize: Int
28 | /// The size the image will be resized to before the DCT is computed. Defaults to a 32x32 image.
29 | public let resizedSize: Int
30 |
31 | /// The maximum number of command buffers that can be used simultaneously. Each call to
32 | /// `perceptualHash(imageData: Data)` creates a new command buffer.
33 | private let maxCommandBufferCount: Int = 128
34 | /// The system's default Metal device.
35 | private let device: MTLDevice
36 | /// The command queue.
37 | private let commandQueue: MTLCommandQueue
38 | /// The Pipeline State Object of a grayscale kernel.
39 | private let grayscalePSO: MTLComputePipelineState
40 | /// Used to load a `MTLTexture` from image data.
41 | private let textureLoader: MTKTextureLoader
42 | /// An actor to limit the number of concurrent tasks using the command buffer.
43 | private let concurrencyLimiter = ConcurrencyLimiter()
44 |
45 | internal class IntermediateTextures {
46 | /// Unique identifier for the group of textures.
47 | internal let id = UUID()
48 | /// Whether the current texture group is being used in a command buffer.
49 | internal var inUse: Bool = false
50 | /// The intermediate, resized image texture used to compute the DCT.
51 | internal let color: MTLTexture
52 | /// The intermediate, resized image texture used to compute the DCT, in grayscale.
53 | internal let grayscale: MTLTexture
54 |
55 | init(color: MTLTexture, grayscale: MTLTexture) {
56 | self.color = color
57 | self.grayscale = grayscale
58 | }
59 | }
60 |
61 | // MARK: - Initialization
62 |
63 | /// Initializes a `PerceptualHashGenerator` with a specific configuration.
64 | /// - Parameters:
65 | /// - resizedSize: The size the image will be resized to before the DCT is computed.
66 | /// Defaults to a 32x32 image. Bigger sizes can allow for more precise image comparisons,
67 | /// as more high-frequency data is preserved.
68 | /// - dctSize: The size of the DCT matrix used to generate the hash. Bigger sizes can
69 | /// allow for more precise image comparisons, as more high-frequency data is preserved.
70 | public init(resizedSize: Int = 32, dctSize: Int = 8) throws {
71 |
72 | // Check against wrong parameter configurations
73 | guard resizedSize > 0 else {
74 | throw PerceptualHashError.negativeOrZeroResizedSize
75 | }
76 | guard dctSize > 1 else {
77 | throw PerceptualHashError.wrongDCTSize
78 | }
79 | guard resizedSize >= dctSize else {
80 | throw PerceptualHashError.resizedSizeTooSmallForDCTSize
81 | }
82 | self.resizedSize = resizedSize
83 | self.dctSize = dctSize
84 |
85 | // Get Metal device
86 | guard let device = MTLCreateSystemDefaultDevice() else {
87 | throw PerceptualHashError.metalDeviceCreationFailed
88 | }
89 | self.device = device
90 |
91 | // Get the default library
92 | guard let defaultLibrary = try? device.makeDefaultLibrary(bundle: .module) else {
93 | throw PerceptualHashError.makeDefaultLibraryFailed
94 | }
95 |
96 | // Create the grayscale kernel function
97 | guard let grayscaleKernel = defaultLibrary.makeFunction(name: "grayscale_kernel") else {
98 | throw PerceptualHashError.makeGrayscaleKernelFailed
99 | }
100 |
101 | // Create the grayscale Pipeline State Object
102 | guard let grayscalePSO = try? device.makeComputePipelineState(function: grayscaleKernel) else {
103 | throw PerceptualHashError.makeGrayscalePSOFailed
104 | }
105 | self.grayscalePSO = grayscalePSO
106 |
107 | // Create a texture loader
108 | self.textureLoader = MTKTextureLoader(device: device)
109 |
110 | // Create command queue
111 | guard let commandQueue = device.makeCommandQueue(
112 | maxCommandBufferCount: maxCommandBufferCount
113 | ) else {
114 | throw PerceptualHashError.makeCommandQueueFailed
115 | }
116 | self.commandQueue = commandQueue
117 | }
118 |
119 | // MARK: - IntermediateTextures
120 |
121 | /// Creates a new set of `IntermediateTextures` with the given configuration options.
122 | internal func createIntermediateTextures(
123 | device: MTLDevice,
124 | resizedSize: Int,
125 | pixelFormat: MTLPixelFormat
126 | ) throws -> IntermediateTextures {
127 | // Create small intermediate texture
128 | let resizedTextureDescriptor = MTLTextureDescriptor.texture2DDescriptor(
129 | pixelFormat: pixelFormat,
130 | width: resizedSize,
131 | height: resizedSize,
132 | mipmapped: false
133 | )
134 | resizedTextureDescriptor.usage = [.shaderRead, .shaderWrite]
135 | guard let resizedTexture = device.makeTexture(descriptor: resizedTextureDescriptor) else {
136 | throw PerceptualHashError.createResizedTextureFailed(resizedSize)
137 | }
138 |
139 | // Create small intermediate texture (grayscale)
140 | let grayscaleTextureDescriptor = MTLTextureDescriptor.texture2DDescriptor(
141 | pixelFormat: .r32Float,
142 | width: resizedSize,
143 | height: resizedSize,
144 | mipmapped: false
145 | )
146 | grayscaleTextureDescriptor.usage = [.shaderRead, .shaderWrite]
147 | guard let grayscaleTexture = device.makeTexture(descriptor: grayscaleTextureDescriptor) else {
148 | throw PerceptualHashError.createGrayscaleResizedTextureFailed(resizedSize)
149 | }
150 | return IntermediateTextures(color: resizedTexture, grayscale: grayscaleTexture)
151 | }
152 |
153 | // MARK: - Hashing
154 |
155 | /// Creates a `PerceptualHash` for an image using its raw data.
156 | /// - Parameter imageData: The raw data for the image. Make sure that the image orientation is `.up`.
157 | /// - Returns: A `PerceptualHash` object, used to check how similar two images are.
158 | public func perceptualHash(imageData: Data) async throws -> PerceptualHash {
159 |
160 | // Before calling makeCommandBuffer, we have to ensure that no more than
161 | // the maximum number of tasks are running, or makeCommandBuffer will
162 | // block, potentially deadlocking our Swift Concurrency code.
163 | await concurrencyLimiter.newRunningTask(maxCommandBufferCount: maxCommandBufferCount)
164 | defer {
165 | Task {
166 | await concurrencyLimiter.endRunningTask()
167 | }
168 | }
169 |
170 | // Create command buffer
171 | guard let commandBuffer = commandQueue.makeCommandBuffer() else {
172 | throw PerceptualHashError.makeCommandBufferFailed
173 | }
174 |
175 | // Create the source image texture
176 | var sourceImageTexture = try await self.textureLoader.newTexture(
177 | data: imageData,
178 | options: [
179 | MTKTextureLoader.Option.textureUsage: MTLTextureUsage.unknown.rawValue,
180 | MTKTextureLoader.Option.textureStorageMode: MTLStorageMode.shared.rawValue
181 | ]
182 | )
183 | guard sourceImageTexture.pixelFormat == .bgra8Unorm_srgb
184 | || sourceImageTexture.pixelFormat == .bgra8Unorm
185 | || sourceImageTexture.pixelFormat == .rgba16Unorm
186 | || sourceImageTexture.pixelFormat == .rgba8Unorm_srgb
187 | || sourceImageTexture.pixelFormat == .rgba8Unorm
188 | || sourceImageTexture.pixelFormat == .rgba8Sint
189 | || sourceImageTexture.pixelFormat == .rgba8Snorm
190 | else {
191 | throw PerceptualHashError.unsupportedSourceImagePixelFormat(sourceImageTexture.pixelFormat)
192 | }
193 |
194 | // Compute the x and y axis scales required to resize the image
195 | let scaleX = Double(resizedSize) / Double(sourceImageTexture.width)
196 | let scaleY = Double(resizedSize) / Double(sourceImageTexture.height)
197 |
198 | // MARK: - Gaussian blur
199 |
200 | // Blur the image to get rid of all the high-frequency features that could
201 | // result in aliasing in the downsampled image
202 | let blur = MPSImageGaussianBlur(device: device, sigma: Float(1 / (2 * max(scaleX, scaleY))))
203 | withUnsafeMutablePointer(to: &sourceImageTexture) { texturePointer in
204 | _ = blur.encode(commandBuffer: commandBuffer, inPlaceTexture: texturePointer)
205 | }
206 |
207 | // MARK: - Resize
208 |
209 | // Get a current set of intermediate textures or create a new one
210 | let intermediateTextures = try createIntermediateTextures(
211 | device: device,
212 | resizedSize: resizedSize,
213 | pixelFormat: sourceImageTexture.pixelFormat
214 | )
215 |
216 | // Resize the image to target 32x32 resolution
217 | let resize = MPSImageBilinearScale(device: device)
218 | var transform = MPSScaleTransform(
219 | scaleX: scaleX,
220 | scaleY: scaleY,
221 | translateX: 0.0,
222 | translateY: 0.0
223 | )
224 | withUnsafePointer(to: &transform) { (transformPtr: UnsafePointer) -> () in
225 | resize.scaleTransform = transformPtr
226 | }
227 | resize.encode(
228 | commandBuffer: commandBuffer,
229 | sourceTexture: sourceImageTexture,
230 | destinationTexture: intermediateTextures.color
231 | )
232 |
233 | // MARK: - Grayscale
234 |
235 | // Create compute command encoder
236 | guard let computeEncoder = commandBuffer.makeComputeCommandEncoder() else {
237 | throw PerceptualHashError.makeComputeCommandEncoderFailed
238 | }
239 |
240 | // Set the PSO to perform a grayscale conversion
241 | computeEncoder.setComputePipelineState(grayscalePSO)
242 |
243 | // Set the source texture
244 | computeEncoder.setTexture(intermediateTextures.color, index: 0)
245 |
246 | // Set the output texture
247 | computeEncoder.setTexture(intermediateTextures.grayscale, index: 1)
248 |
249 | // Dispatch the threads
250 | let threadgroupSize = MTLSizeMake(16, 16, 1)
251 | var threadgroupCount = MTLSize()
252 | threadgroupCount.width = (intermediateTextures.color.width + threadgroupSize.width - 1) / threadgroupSize.width
253 | threadgroupCount.height = (intermediateTextures.color.height + threadgroupSize.height - 1) / threadgroupSize.height
254 | // The image data is 2D, so set depth to 1
255 | threadgroupCount.depth = 1
256 | computeEncoder.dispatchThreadgroups(threadgroupCount, threadsPerThreadgroup: threadgroupSize)
257 |
258 | // Finish encoding
259 | computeEncoder.endEncoding()
260 |
261 | // MARK: - Finish
262 |
263 | let binaryStringHash = await withCheckedContinuation { continuation in
264 | commandBuffer.addCompletedHandler { [intermediateTextures] _ in
265 | let binaryStringHash = self.computeDCT(
266 | grayscaleTexture: intermediateTextures.grayscale
267 | )
268 | continuation.resume(returning: binaryStringHash)
269 | }
270 | // Submit work to the GPU
271 | commandBuffer.commit()
272 | }
273 | return PerceptualHash(binaryString: binaryStringHash)
274 | }
275 |
276 | // MARK: - Compute DCT
277 |
278 | private func computeDCT(grayscaleTexture: MTLTexture) -> String {
279 | let rowBytes = resizedSize * 4
280 | let length = rowBytes * resizedSize
281 | let region = MTLRegionMake2D(0, 0, resizedSize, resizedSize)
282 | var grayBytes = [Float32](repeating: 0, count: length)
283 | var dctArray = [Float](repeating: 0, count: dctSize * dctSize)
284 |
285 | var binaryHash: String = ""
286 |
287 | // Fill with the texture data
288 | grayBytes.withUnsafeMutableBytes { r32BytesPointer in
289 | guard let baseAddress = r32BytesPointer.baseAddress else {
290 | return
291 | }
292 | // Fill the array with data from the grayscale texture
293 | grayscaleTexture.getBytes(
294 | baseAddress,
295 | bytesPerRow: rowBytes,
296 | from: region,
297 | mipmapLevel: 0
298 | )
299 | }
300 | // Compute each one of the elements of the discrete cosine transform
301 | for u in 0.. Float32(meanDCT) {
346 | binaryHash += "1"
347 | } else {
348 | binaryHash += "0"
349 | }
350 | }
351 | return binaryHash
352 | }
353 | }
354 |
355 |
--------------------------------------------------------------------------------