├── .gitignore ├── ImageHashingTest ├── Program.cs ├── Properties │ └── AssemblyInfo.cs └── ImageHashingTest.csproj ├── ImageHashing ├── Properties │ └── AssemblyInfo.cs ├── ImageHashing.csproj └── ImageHashing.cs ├── README.md └── ImageHashing.sln /.gitignore: -------------------------------------------------------------------------------- 1 | # Build Folders (you can keep bin if you'd like, to store dlls and pdbs) 2 | bin 3 | obj 4 | 5 | # mstest test results 6 | TestResults 7 | 8 | # Stupid files that should never be included 9 | *.suo 10 | *.vcxproj.user 11 | -------------------------------------------------------------------------------- /ImageHashingTest/Program.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Collections.Generic; 3 | using System.Linq; 4 | using System.Text; 5 | 6 | using ImageHashing; 7 | 8 | namespace ImageHashingTest 9 | { 10 | class Program 11 | { 12 | public static string diff_1 = "diff1.jpg"; 13 | public static string same_1 = "same1.jpg"; // My original test has same_1 and same_2 as the same image 14 | public static string same_2 = "same2.jpg"; // at different resolutions, so similarity = 100% 15 | 16 | static void Main(string[] args) 17 | { 18 | Console.WriteLine(String.Format("Similarity, diff-same: {0}", 19 | ImageHashing.ImageHashing.Similarity(diff_1, same_1))); 20 | Console.WriteLine(String.Format("Similarity, same-same: {0}", 21 | ImageHashing.ImageHashing.Similarity(same_1, same_2))); 22 | return; 23 | } 24 | } 25 | } 26 | -------------------------------------------------------------------------------- /ImageHashing/Properties/AssemblyInfo.cs: -------------------------------------------------------------------------------- 1 | using System.Reflection; 2 | using System.Runtime.CompilerServices; 3 | using System.Runtime.InteropServices; 4 | 5 | // General Information about an assembly is controlled through the following 6 | // set of attributes. Change these attribute values to modify the information 7 | // associated with an assembly. 8 | [assembly: AssemblyTitle("ImageHashing")] 9 | [assembly: AssemblyDescription("")] 10 | [assembly: AssemblyConfiguration("")] 11 | [assembly: AssemblyCompany("")] 12 | [assembly: AssemblyProduct("ImageHashing")] 13 | [assembly: AssemblyCopyright("Copyright © 2012")] 14 | [assembly: AssemblyTrademark("")] 15 | [assembly: AssemblyCulture("")] 16 | 17 | // Setting ComVisible to false makes the types in this assembly not visible 18 | // to COM components. If you need to access a type in this assembly from 19 | // COM, set the ComVisible attribute to true on that type. 20 | [assembly: ComVisible(false)] 21 | 22 | // The following GUID is for the ID of the typelib if this project is exposed to COM 23 | [assembly: Guid("e7e3047e-8e5e-4931-8cdb-fddd7b50d018")] 24 | 25 | // Version information for an assembly consists of the following four values: 26 | // 27 | // Major Version 28 | // Minor Version 29 | // Build Number 30 | // Revision 31 | // 32 | // You can specify all the values or you can default the Build and Revision Numbers 33 | // by using the '*' as shown below: 34 | // [assembly: AssemblyVersion("1.0.*")] 35 | [assembly: AssemblyVersion("1.0.0.0")] 36 | [assembly: AssemblyFileVersion("1.0.0.0")] 37 | -------------------------------------------------------------------------------- /ImageHashingTest/Properties/AssemblyInfo.cs: -------------------------------------------------------------------------------- 1 | using System.Reflection; 2 | using System.Runtime.CompilerServices; 3 | using System.Runtime.InteropServices; 4 | 5 | // General Information about an assembly is controlled through the following 6 | // set of attributes. Change these attribute values to modify the information 7 | // associated with an assembly. 8 | [assembly: AssemblyTitle("ImageHashingTest")] 9 | [assembly: AssemblyDescription("")] 10 | [assembly: AssemblyConfiguration("")] 11 | [assembly: AssemblyCompany("")] 12 | [assembly: AssemblyProduct("ImageHashingTest")] 13 | [assembly: AssemblyCopyright("Copyright © 2012")] 14 | [assembly: AssemblyTrademark("")] 15 | [assembly: AssemblyCulture("")] 16 | 17 | // Setting ComVisible to false makes the types in this assembly not visible 18 | // to COM components. If you need to access a type in this assembly from 19 | // COM, set the ComVisible attribute to true on that type. 20 | [assembly: ComVisible(false)] 21 | 22 | // The following GUID is for the ID of the typelib if this project is exposed to COM 23 | [assembly: Guid("7e6efebb-d57c-453d-80fa-9729c34f73c8")] 24 | 25 | // Version information for an assembly consists of the following four values: 26 | // 27 | // Major Version 28 | // Minor Version 29 | // Build Number 30 | // Revision 31 | // 32 | // You can specify all the values or you can default the Build and Revision Numbers 33 | // by using the '*' as shown below: 34 | // [assembly: AssemblyVersion("1.0.*")] 35 | [assembly: AssemblyVersion("1.0.0.0")] 36 | [assembly: AssemblyFileVersion("1.0.0.0")] 37 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ImageHashing 2 | ============ 3 | 4 | Overview 5 | -------- 6 | 7 | This is a very simple C# library used to generate average perceptual hashes of images and files containing image content. A similarity function is also included to compare hashes to one another and obtain a percentage on just how alike those hashes are. Various convenience methods are also provided to reduce the number of calls made by the user. 8 | 9 | This library is free to use and based heavily on: 10 | 11 | * Dr. Neal Krawetz's average hashing algorithm, outlined at http://www.hackerfactor.com/blog/index.php?/archives/432-Looks-Like-It.html 12 | * David Oftedal's C# implementation of Dr. Krawetz's algorithm, available at http://folk.uio.no/davidjo/computing.php 13 | 14 | Credit goes to the both of them for the base logic and an initial implementation. My edits to Oftedal's code are mostly readability, style, and a few additional convenience functions. 15 | 16 | Perceptual Hashing 17 | ------------------ 18 | 19 | For those not familiar with perceptual hashing, pHash.org gives a good definition: 20 | 21 | > A perceptual hash is a fingerprint of a multimedia file derived from various features from its content. Unlike cryptographic hash functions which rely on the avalanche effect of small changes in input leading to drastic changes in output, perceptual hashes are "close" to one another if the features are similar. 22 | 23 | So how is this useful? Well, let's say you're a forensic scientist with a lead on a case. You have an image that you are looking for in some other collection of photos. If the manager of that collection changes even one pixel of the image, then the MD5/SHA-1/whatever hash will end up being *completely* different. Herein lies a problem: traditional cryptographic hashes are either a yes or a no. While this is great for verifying very content-sensitive files (like a downloaded software package), it doesn't work well with multimedia recognition. Changing one pixel in a picture won't change the content according to the human eye; we will still recognize it as the same image. 24 | 25 | Enter perceptual hashing. Perceptual hashes are based on the multimedia content of a file, not just the bytes contained in that file. This allows us to run comparisons on image files and determine similarity both easily and efficiently. This technology is widely used: both Google and TinEye use it for image searches by crawling the web for images, computing hashes, storing them in some database, then running your given image against those known hashes for matches. -------------------------------------------------------------------------------- /ImageHashing/ImageHashing.csproj: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Debug 5 | AnyCPU 6 | 8.0.30703 7 | 2.0 8 | {5B189394-0B91-4BFF-B4FB-5CEA51174C09} 9 | Library 10 | Properties 11 | ImageHashing 12 | ImageHashing 13 | v4.0 14 | 512 15 | 16 | 17 | true 18 | full 19 | false 20 | bin\Debug\ 21 | DEBUG;TRACE 22 | prompt 23 | 4 24 | 25 | 26 | pdbonly 27 | true 28 | bin\Release\ 29 | TRACE 30 | prompt 31 | 4 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 55 | -------------------------------------------------------------------------------- /ImageHashing.sln: -------------------------------------------------------------------------------- 1 | 2 | Microsoft Visual Studio Solution File, Format Version 11.00 3 | # Visual Studio 2010 4 | Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "ImageHashing", "ImageHashing\ImageHashing.csproj", "{5B189394-0B91-4BFF-B4FB-5CEA51174C09}" 5 | EndProject 6 | Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "ImageHashingTest", "ImageHashingTest\ImageHashingTest.csproj", "{DB397672-EAF5-4199-A90B-B28FD0431A1A}" 7 | EndProject 8 | Global 9 | GlobalSection(SolutionConfigurationPlatforms) = preSolution 10 | Debug|Any CPU = Debug|Any CPU 11 | Debug|Mixed Platforms = Debug|Mixed Platforms 12 | Debug|x86 = Debug|x86 13 | Release|Any CPU = Release|Any CPU 14 | Release|Mixed Platforms = Release|Mixed Platforms 15 | Release|x86 = Release|x86 16 | EndGlobalSection 17 | GlobalSection(ProjectConfigurationPlatforms) = postSolution 18 | {5B189394-0B91-4BFF-B4FB-5CEA51174C09}.Debug|Any CPU.ActiveCfg = Debug|Any CPU 19 | {5B189394-0B91-4BFF-B4FB-5CEA51174C09}.Debug|Any CPU.Build.0 = Debug|Any CPU 20 | {5B189394-0B91-4BFF-B4FB-5CEA51174C09}.Debug|Mixed Platforms.ActiveCfg = Debug|Any CPU 21 | {5B189394-0B91-4BFF-B4FB-5CEA51174C09}.Debug|Mixed Platforms.Build.0 = Debug|Any CPU 22 | {5B189394-0B91-4BFF-B4FB-5CEA51174C09}.Debug|x86.ActiveCfg = Debug|Any CPU 23 | {5B189394-0B91-4BFF-B4FB-5CEA51174C09}.Release|Any CPU.ActiveCfg = Release|Any CPU 24 | {5B189394-0B91-4BFF-B4FB-5CEA51174C09}.Release|Any CPU.Build.0 = Release|Any CPU 25 | {5B189394-0B91-4BFF-B4FB-5CEA51174C09}.Release|Mixed Platforms.ActiveCfg = Release|Any CPU 26 | {5B189394-0B91-4BFF-B4FB-5CEA51174C09}.Release|Mixed Platforms.Build.0 = Release|Any CPU 27 | {5B189394-0B91-4BFF-B4FB-5CEA51174C09}.Release|x86.ActiveCfg = Release|Any CPU 28 | {DB397672-EAF5-4199-A90B-B28FD0431A1A}.Debug|Any CPU.ActiveCfg = Debug|x86 29 | {DB397672-EAF5-4199-A90B-B28FD0431A1A}.Debug|Mixed Platforms.ActiveCfg = Debug|x86 30 | {DB397672-EAF5-4199-A90B-B28FD0431A1A}.Debug|Mixed Platforms.Build.0 = Debug|x86 31 | {DB397672-EAF5-4199-A90B-B28FD0431A1A}.Debug|x86.ActiveCfg = Debug|x86 32 | {DB397672-EAF5-4199-A90B-B28FD0431A1A}.Debug|x86.Build.0 = Debug|x86 33 | {DB397672-EAF5-4199-A90B-B28FD0431A1A}.Release|Any CPU.ActiveCfg = Release|x86 34 | {DB397672-EAF5-4199-A90B-B28FD0431A1A}.Release|Mixed Platforms.ActiveCfg = Release|x86 35 | {DB397672-EAF5-4199-A90B-B28FD0431A1A}.Release|Mixed Platforms.Build.0 = Release|x86 36 | {DB397672-EAF5-4199-A90B-B28FD0431A1A}.Release|x86.ActiveCfg = Release|x86 37 | {DB397672-EAF5-4199-A90B-B28FD0431A1A}.Release|x86.Build.0 = Release|x86 38 | EndGlobalSection 39 | GlobalSection(SolutionProperties) = preSolution 40 | HideSolutionNode = FALSE 41 | EndGlobalSection 42 | EndGlobal 43 | -------------------------------------------------------------------------------- /ImageHashingTest/ImageHashingTest.csproj: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Debug 5 | x86 6 | 8.0.30703 7 | 2.0 8 | {DB397672-EAF5-4199-A90B-B28FD0431A1A} 9 | Exe 10 | Properties 11 | ImageHashingTest 12 | ImageHashingTest 13 | v4.0 14 | Client 15 | 512 16 | 17 | 18 | x86 19 | true 20 | full 21 | false 22 | bin\Debug\ 23 | DEBUG;TRACE 24 | prompt 25 | 4 26 | 27 | 28 | x86 29 | pdbonly 30 | true 31 | bin\Release\ 32 | TRACE 33 | prompt 34 | 4 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | {5B189394-0B91-4BFF-B4FB-5CEA51174C09} 53 | ImageHashing 54 | 55 | 56 | 57 | 64 | -------------------------------------------------------------------------------- /ImageHashing/ImageHashing.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Drawing; 3 | using System.Drawing.Drawing2D; 4 | using System.Drawing.Imaging; 5 | 6 | namespace ImageHashing 7 | { 8 | /// 9 | /// Contains a variety of methods useful in generating image hashes for image comparison 10 | /// and recognition. 11 | /// 12 | /// Credit for the AverageHash implementation to David Oftedal of the University of Oslo. 13 | /// 14 | public class ImageHashing 15 | { 16 | #region Private constants and utility methods 17 | /// 18 | /// Bitcounts array used for BitCount method (used in Similarity comparisons). 19 | /// Don't try to read this or understand it, I certainly don't. Credit goes to 20 | /// David Oftedal of the University of Oslo, Norway for this. 21 | /// http://folk.uio.no/davidjo/computing.php 22 | /// 23 | private static byte[] bitCounts = { 24 | 0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,1,2,2,3,2,3,3,4,2,3,3,4,3,4,4,5,1,2,2,3,2,3,3,4, 25 | 2,3,3,4,3,4,4,5,2,3,3,4,3,4,4,5,3,4,4,5,4,5,5,6,1,2,2,3,2,3,3,4,2,3,3,4,3,4,4,5, 26 | 2,3,3,4,3,4,4,5,3,4,4,5,4,5,5,6,2,3,3,4,3,4,4,5,3,4,4,5,4,5,5,6,3,4,4,5,4,5,5,6, 27 | 4,5,5,6,5,6,6,7,1,2,2,3,2,3,3,4,2,3,3,4,3,4,4,5,2,3,3,4,3,4,4,5,3,4,4,5,4,5,5,6, 28 | 2,3,3,4,3,4,4,5,3,4,4,5,4,5,5,6,3,4,4,5,4,5,5,6,4,5,5,6,5,6,6,7,2,3,3,4,3,4,4,5, 29 | 3,4,4,5,4,5,5,6,3,4,4,5,4,5,5,6,4,5,5,6,5,6,6,7,3,4,4,5,4,5,5,6,4,5,5,6,5,6,6,7, 30 | 4,5,5,6,5,6,6,7,5,6,6,7,6,7,7,8 31 | }; 32 | 33 | /// 34 | /// Counts bits (duh). Utility function for similarity. 35 | /// I wouldn't try to understand this. I just copy-pasta'd it 36 | /// from Oftedal's implementation. It works. 37 | /// 38 | /// The hash we are counting. 39 | /// The total bit count. 40 | private static uint BitCount(ulong num) 41 | { 42 | uint count = 0; 43 | for (; num > 0; num >>= 8) 44 | count += bitCounts[(num & 0xff)]; 45 | return count; 46 | } 47 | #endregion 48 | 49 | #region Public interface methods 50 | /// 51 | /// Computes the average hash of an image according to the algorithm given by Dr. Neal Krawetz 52 | /// on his blog: http://www.hackerfactor.com/blog/index.php?/archives/432-Looks-Like-It.html. 53 | /// 54 | /// The image to hash. 55 | /// The hash of the image. 56 | public static ulong AverageHash(Image image) 57 | { 58 | // Squeeze the image into an 8x8 canvas 59 | Bitmap squeezed = new Bitmap(8, 8, PixelFormat.Format32bppRgb); 60 | Graphics canvas = Graphics.FromImage(squeezed); 61 | canvas.CompositingQuality = CompositingQuality.HighQuality; 62 | canvas.InterpolationMode = InterpolationMode.HighQualityBilinear; 63 | canvas.SmoothingMode = SmoothingMode.HighQuality; 64 | canvas.DrawImage(image, 0, 0, 8, 8); 65 | 66 | // Reduce colors to 6-bit grayscale and calculate average color value 67 | byte[] grayscale = new byte[64]; 68 | uint averageValue = 0; 69 | for (int y = 0; y < 8; y++) 70 | for (int x = 0; x < 8; x++) 71 | { 72 | uint pixel = (uint)squeezed.GetPixel(x, y).ToArgb(); 73 | uint gray = (pixel & 0x00ff0000) >> 16; 74 | gray += (pixel & 0x0000ff00) >> 8; 75 | gray += (pixel & 0x000000ff); 76 | gray /= 12; 77 | 78 | grayscale[x + (y * 8)] = (byte)gray; 79 | averageValue += gray; 80 | } 81 | averageValue /= 64; 82 | 83 | // Compute the hash: each bit is a pixel 84 | // 1 = higher than average, 0 = lower than average 85 | ulong hash = 0; 86 | for (int i = 0; i < 64; i++) 87 | if (grayscale[i] >= averageValue) 88 | hash |= (1UL << (63 - i)); 89 | 90 | return hash; 91 | } 92 | 93 | /// 94 | /// Computes the average hash of the image content in the given file. 95 | /// 96 | /// Path to the input file. 97 | /// The hash of the input file's image content. 98 | public static ulong AverageHash(String path) 99 | { 100 | Bitmap bmp = new Bitmap(path); 101 | return AverageHash(bmp); 102 | } 103 | 104 | /// 105 | /// Returns a percentage-based similarity value between the two given hashes. The higher 106 | /// the percentage, the closer the hashes are to being identical. 107 | /// 108 | /// The first hash. 109 | /// The second hash. 110 | /// The similarity percentage. 111 | public static double Similarity(ulong hash1, ulong hash2) 112 | { 113 | return ((64 - BitCount(hash1 ^ hash2)) * 100) / 64.0; 114 | } 115 | 116 | /// 117 | /// Returns a percentage-based similarity value between the two given images. The higher 118 | /// the percentage, the closer the images are to being identical. 119 | /// 120 | /// The first image. 121 | /// The second image. 122 | /// The similarity percentage. 123 | public static double Similarity(Image image1, Image image2) 124 | { 125 | ulong hash1 = AverageHash(image1); 126 | ulong hash2 = AverageHash(image2); 127 | return Similarity(hash1, hash2); 128 | } 129 | 130 | /// 131 | /// Returns a percentage-based similarity value between the image content of the two given 132 | /// files. The higher the percentage, the closer the image contents are to being identical. 133 | /// 134 | /// The first image file. 135 | /// The second image file. 136 | /// The similarity percentage. 137 | public static double Similarity(String path1, String path2) 138 | { 139 | ulong hash1 = AverageHash(path1); 140 | ulong hash2 = AverageHash(path2); 141 | return Similarity(hash1, hash2); 142 | } 143 | #endregion 144 | } 145 | } 146 | --------------------------------------------------------------------------------