├── .gitignore
├── ImageHashingTest
├── Program.cs
├── Properties
│ └── AssemblyInfo.cs
└── ImageHashingTest.csproj
├── ImageHashing
├── Properties
│ └── AssemblyInfo.cs
├── ImageHashing.csproj
└── ImageHashing.cs
├── README.md
└── ImageHashing.sln
/.gitignore:
--------------------------------------------------------------------------------
1 | # Build Folders (you can keep bin if you'd like, to store dlls and pdbs)
2 | bin
3 | obj
4 |
5 | # mstest test results
6 | TestResults
7 |
8 | # Stupid files that should never be included
9 | *.suo
10 | *.vcxproj.user
11 |
--------------------------------------------------------------------------------
/ImageHashingTest/Program.cs:
--------------------------------------------------------------------------------
1 | using System;
2 | using System.Collections.Generic;
3 | using System.Linq;
4 | using System.Text;
5 |
6 | using ImageHashing;
7 |
8 | namespace ImageHashingTest
9 | {
10 | class Program
11 | {
12 | public static string diff_1 = "diff1.jpg";
13 | public static string same_1 = "same1.jpg"; // My original test has same_1 and same_2 as the same image
14 | public static string same_2 = "same2.jpg"; // at different resolutions, so similarity = 100%
15 |
16 | static void Main(string[] args)
17 | {
18 | Console.WriteLine(String.Format("Similarity, diff-same: {0}",
19 | ImageHashing.ImageHashing.Similarity(diff_1, same_1)));
20 | Console.WriteLine(String.Format("Similarity, same-same: {0}",
21 | ImageHashing.ImageHashing.Similarity(same_1, same_2)));
22 | return;
23 | }
24 | }
25 | }
26 |
--------------------------------------------------------------------------------
/ImageHashing/Properties/AssemblyInfo.cs:
--------------------------------------------------------------------------------
1 | using System.Reflection;
2 | using System.Runtime.CompilerServices;
3 | using System.Runtime.InteropServices;
4 |
5 | // General Information about an assembly is controlled through the following
6 | // set of attributes. Change these attribute values to modify the information
7 | // associated with an assembly.
8 | [assembly: AssemblyTitle("ImageHashing")]
9 | [assembly: AssemblyDescription("")]
10 | [assembly: AssemblyConfiguration("")]
11 | [assembly: AssemblyCompany("")]
12 | [assembly: AssemblyProduct("ImageHashing")]
13 | [assembly: AssemblyCopyright("Copyright © 2012")]
14 | [assembly: AssemblyTrademark("")]
15 | [assembly: AssemblyCulture("")]
16 |
17 | // Setting ComVisible to false makes the types in this assembly not visible
18 | // to COM components. If you need to access a type in this assembly from
19 | // COM, set the ComVisible attribute to true on that type.
20 | [assembly: ComVisible(false)]
21 |
22 | // The following GUID is for the ID of the typelib if this project is exposed to COM
23 | [assembly: Guid("e7e3047e-8e5e-4931-8cdb-fddd7b50d018")]
24 |
25 | // Version information for an assembly consists of the following four values:
26 | //
27 | // Major Version
28 | // Minor Version
29 | // Build Number
30 | // Revision
31 | //
32 | // You can specify all the values or you can default the Build and Revision Numbers
33 | // by using the '*' as shown below:
34 | // [assembly: AssemblyVersion("1.0.*")]
35 | [assembly: AssemblyVersion("1.0.0.0")]
36 | [assembly: AssemblyFileVersion("1.0.0.0")]
37 |
--------------------------------------------------------------------------------
/ImageHashingTest/Properties/AssemblyInfo.cs:
--------------------------------------------------------------------------------
1 | using System.Reflection;
2 | using System.Runtime.CompilerServices;
3 | using System.Runtime.InteropServices;
4 |
5 | // General Information about an assembly is controlled through the following
6 | // set of attributes. Change these attribute values to modify the information
7 | // associated with an assembly.
8 | [assembly: AssemblyTitle("ImageHashingTest")]
9 | [assembly: AssemblyDescription("")]
10 | [assembly: AssemblyConfiguration("")]
11 | [assembly: AssemblyCompany("")]
12 | [assembly: AssemblyProduct("ImageHashingTest")]
13 | [assembly: AssemblyCopyright("Copyright © 2012")]
14 | [assembly: AssemblyTrademark("")]
15 | [assembly: AssemblyCulture("")]
16 |
17 | // Setting ComVisible to false makes the types in this assembly not visible
18 | // to COM components. If you need to access a type in this assembly from
19 | // COM, set the ComVisible attribute to true on that type.
20 | [assembly: ComVisible(false)]
21 |
22 | // The following GUID is for the ID of the typelib if this project is exposed to COM
23 | [assembly: Guid("7e6efebb-d57c-453d-80fa-9729c34f73c8")]
24 |
25 | // Version information for an assembly consists of the following four values:
26 | //
27 | // Major Version
28 | // Minor Version
29 | // Build Number
30 | // Revision
31 | //
32 | // You can specify all the values or you can default the Build and Revision Numbers
33 | // by using the '*' as shown below:
34 | // [assembly: AssemblyVersion("1.0.*")]
35 | [assembly: AssemblyVersion("1.0.0.0")]
36 | [assembly: AssemblyFileVersion("1.0.0.0")]
37 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | ImageHashing
2 | ============
3 |
4 | Overview
5 | --------
6 |
7 | This is a very simple C# library used to generate average perceptual hashes of images and files containing image content. A similarity function is also included to compare hashes to one another and obtain a percentage on just how alike those hashes are. Various convenience methods are also provided to reduce the number of calls made by the user.
8 |
9 | This library is free to use and based heavily on:
10 |
11 | * Dr. Neal Krawetz's average hashing algorithm, outlined at http://www.hackerfactor.com/blog/index.php?/archives/432-Looks-Like-It.html
12 | * David Oftedal's C# implementation of Dr. Krawetz's algorithm, available at http://folk.uio.no/davidjo/computing.php
13 |
14 | Credit goes to the both of them for the base logic and an initial implementation. My edits to Oftedal's code are mostly readability, style, and a few additional convenience functions.
15 |
16 | Perceptual Hashing
17 | ------------------
18 |
19 | For those not familiar with perceptual hashing, pHash.org gives a good definition:
20 |
21 | > A perceptual hash is a fingerprint of a multimedia file derived from various features from its content. Unlike cryptographic hash functions which rely on the avalanche effect of small changes in input leading to drastic changes in output, perceptual hashes are "close" to one another if the features are similar.
22 |
23 | So how is this useful? Well, let's say you're a forensic scientist with a lead on a case. You have an image that you are looking for in some other collection of photos. If the manager of that collection changes even one pixel of the image, then the MD5/SHA-1/whatever hash will end up being *completely* different. Herein lies a problem: traditional cryptographic hashes are either a yes or a no. While this is great for verifying very content-sensitive files (like a downloaded software package), it doesn't work well with multimedia recognition. Changing one pixel in a picture won't change the content according to the human eye; we will still recognize it as the same image.
24 |
25 | Enter perceptual hashing. Perceptual hashes are based on the multimedia content of a file, not just the bytes contained in that file. This allows us to run comparisons on image files and determine similarity both easily and efficiently. This technology is widely used: both Google and TinEye use it for image searches by crawling the web for images, computing hashes, storing them in some database, then running your given image against those known hashes for matches.
--------------------------------------------------------------------------------
/ImageHashing/ImageHashing.csproj:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | Debug
5 | AnyCPU
6 | 8.0.30703
7 | 2.0
8 | {5B189394-0B91-4BFF-B4FB-5CEA51174C09}
9 | Library
10 | Properties
11 | ImageHashing
12 | ImageHashing
13 | v4.0
14 | 512
15 |
16 |
17 | true
18 | full
19 | false
20 | bin\Debug\
21 | DEBUG;TRACE
22 | prompt
23 | 4
24 |
25 |
26 | pdbonly
27 | true
28 | bin\Release\
29 | TRACE
30 | prompt
31 | 4
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
43 |
44 |
45 |
46 |
47 |
48 |
55 |
--------------------------------------------------------------------------------
/ImageHashing.sln:
--------------------------------------------------------------------------------
1 |
2 | Microsoft Visual Studio Solution File, Format Version 11.00
3 | # Visual Studio 2010
4 | Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "ImageHashing", "ImageHashing\ImageHashing.csproj", "{5B189394-0B91-4BFF-B4FB-5CEA51174C09}"
5 | EndProject
6 | Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "ImageHashingTest", "ImageHashingTest\ImageHashingTest.csproj", "{DB397672-EAF5-4199-A90B-B28FD0431A1A}"
7 | EndProject
8 | Global
9 | GlobalSection(SolutionConfigurationPlatforms) = preSolution
10 | Debug|Any CPU = Debug|Any CPU
11 | Debug|Mixed Platforms = Debug|Mixed Platforms
12 | Debug|x86 = Debug|x86
13 | Release|Any CPU = Release|Any CPU
14 | Release|Mixed Platforms = Release|Mixed Platforms
15 | Release|x86 = Release|x86
16 | EndGlobalSection
17 | GlobalSection(ProjectConfigurationPlatforms) = postSolution
18 | {5B189394-0B91-4BFF-B4FB-5CEA51174C09}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
19 | {5B189394-0B91-4BFF-B4FB-5CEA51174C09}.Debug|Any CPU.Build.0 = Debug|Any CPU
20 | {5B189394-0B91-4BFF-B4FB-5CEA51174C09}.Debug|Mixed Platforms.ActiveCfg = Debug|Any CPU
21 | {5B189394-0B91-4BFF-B4FB-5CEA51174C09}.Debug|Mixed Platforms.Build.0 = Debug|Any CPU
22 | {5B189394-0B91-4BFF-B4FB-5CEA51174C09}.Debug|x86.ActiveCfg = Debug|Any CPU
23 | {5B189394-0B91-4BFF-B4FB-5CEA51174C09}.Release|Any CPU.ActiveCfg = Release|Any CPU
24 | {5B189394-0B91-4BFF-B4FB-5CEA51174C09}.Release|Any CPU.Build.0 = Release|Any CPU
25 | {5B189394-0B91-4BFF-B4FB-5CEA51174C09}.Release|Mixed Platforms.ActiveCfg = Release|Any CPU
26 | {5B189394-0B91-4BFF-B4FB-5CEA51174C09}.Release|Mixed Platforms.Build.0 = Release|Any CPU
27 | {5B189394-0B91-4BFF-B4FB-5CEA51174C09}.Release|x86.ActiveCfg = Release|Any CPU
28 | {DB397672-EAF5-4199-A90B-B28FD0431A1A}.Debug|Any CPU.ActiveCfg = Debug|x86
29 | {DB397672-EAF5-4199-A90B-B28FD0431A1A}.Debug|Mixed Platforms.ActiveCfg = Debug|x86
30 | {DB397672-EAF5-4199-A90B-B28FD0431A1A}.Debug|Mixed Platforms.Build.0 = Debug|x86
31 | {DB397672-EAF5-4199-A90B-B28FD0431A1A}.Debug|x86.ActiveCfg = Debug|x86
32 | {DB397672-EAF5-4199-A90B-B28FD0431A1A}.Debug|x86.Build.0 = Debug|x86
33 | {DB397672-EAF5-4199-A90B-B28FD0431A1A}.Release|Any CPU.ActiveCfg = Release|x86
34 | {DB397672-EAF5-4199-A90B-B28FD0431A1A}.Release|Mixed Platforms.ActiveCfg = Release|x86
35 | {DB397672-EAF5-4199-A90B-B28FD0431A1A}.Release|Mixed Platforms.Build.0 = Release|x86
36 | {DB397672-EAF5-4199-A90B-B28FD0431A1A}.Release|x86.ActiveCfg = Release|x86
37 | {DB397672-EAF5-4199-A90B-B28FD0431A1A}.Release|x86.Build.0 = Release|x86
38 | EndGlobalSection
39 | GlobalSection(SolutionProperties) = preSolution
40 | HideSolutionNode = FALSE
41 | EndGlobalSection
42 | EndGlobal
43 |
--------------------------------------------------------------------------------
/ImageHashingTest/ImageHashingTest.csproj:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | Debug
5 | x86
6 | 8.0.30703
7 | 2.0
8 | {DB397672-EAF5-4199-A90B-B28FD0431A1A}
9 | Exe
10 | Properties
11 | ImageHashingTest
12 | ImageHashingTest
13 | v4.0
14 | Client
15 | 512
16 |
17 |
18 | x86
19 | true
20 | full
21 | false
22 | bin\Debug\
23 | DEBUG;TRACE
24 | prompt
25 | 4
26 |
27 |
28 | x86
29 | pdbonly
30 | true
31 | bin\Release\
32 | TRACE
33 | prompt
34 | 4
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
43 |
44 |
45 |
46 |
47 |
48 |
49 |
50 |
51 |
52 | {5B189394-0B91-4BFF-B4FB-5CEA51174C09}
53 | ImageHashing
54 |
55 |
56 |
57 |
64 |
--------------------------------------------------------------------------------
/ImageHashing/ImageHashing.cs:
--------------------------------------------------------------------------------
1 | using System;
2 | using System.Drawing;
3 | using System.Drawing.Drawing2D;
4 | using System.Drawing.Imaging;
5 |
6 | namespace ImageHashing
7 | {
8 | ///
9 | /// Contains a variety of methods useful in generating image hashes for image comparison
10 | /// and recognition.
11 | ///
12 | /// Credit for the AverageHash implementation to David Oftedal of the University of Oslo.
13 | ///
14 | public class ImageHashing
15 | {
16 | #region Private constants and utility methods
17 | ///
18 | /// Bitcounts array used for BitCount method (used in Similarity comparisons).
19 | /// Don't try to read this or understand it, I certainly don't. Credit goes to
20 | /// David Oftedal of the University of Oslo, Norway for this.
21 | /// http://folk.uio.no/davidjo/computing.php
22 | ///
23 | private static byte[] bitCounts = {
24 | 0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,1,2,2,3,2,3,3,4,2,3,3,4,3,4,4,5,1,2,2,3,2,3,3,4,
25 | 2,3,3,4,3,4,4,5,2,3,3,4,3,4,4,5,3,4,4,5,4,5,5,6,1,2,2,3,2,3,3,4,2,3,3,4,3,4,4,5,
26 | 2,3,3,4,3,4,4,5,3,4,4,5,4,5,5,6,2,3,3,4,3,4,4,5,3,4,4,5,4,5,5,6,3,4,4,5,4,5,5,6,
27 | 4,5,5,6,5,6,6,7,1,2,2,3,2,3,3,4,2,3,3,4,3,4,4,5,2,3,3,4,3,4,4,5,3,4,4,5,4,5,5,6,
28 | 2,3,3,4,3,4,4,5,3,4,4,5,4,5,5,6,3,4,4,5,4,5,5,6,4,5,5,6,5,6,6,7,2,3,3,4,3,4,4,5,
29 | 3,4,4,5,4,5,5,6,3,4,4,5,4,5,5,6,4,5,5,6,5,6,6,7,3,4,4,5,4,5,5,6,4,5,5,6,5,6,6,7,
30 | 4,5,5,6,5,6,6,7,5,6,6,7,6,7,7,8
31 | };
32 |
33 | ///
34 | /// Counts bits (duh). Utility function for similarity.
35 | /// I wouldn't try to understand this. I just copy-pasta'd it
36 | /// from Oftedal's implementation. It works.
37 | ///
38 | /// The hash we are counting.
39 | /// The total bit count.
40 | private static uint BitCount(ulong num)
41 | {
42 | uint count = 0;
43 | for (; num > 0; num >>= 8)
44 | count += bitCounts[(num & 0xff)];
45 | return count;
46 | }
47 | #endregion
48 |
49 | #region Public interface methods
50 | ///
51 | /// Computes the average hash of an image according to the algorithm given by Dr. Neal Krawetz
52 | /// on his blog: http://www.hackerfactor.com/blog/index.php?/archives/432-Looks-Like-It.html.
53 | ///
54 | /// The image to hash.
55 | /// The hash of the image.
56 | public static ulong AverageHash(Image image)
57 | {
58 | // Squeeze the image into an 8x8 canvas
59 | Bitmap squeezed = new Bitmap(8, 8, PixelFormat.Format32bppRgb);
60 | Graphics canvas = Graphics.FromImage(squeezed);
61 | canvas.CompositingQuality = CompositingQuality.HighQuality;
62 | canvas.InterpolationMode = InterpolationMode.HighQualityBilinear;
63 | canvas.SmoothingMode = SmoothingMode.HighQuality;
64 | canvas.DrawImage(image, 0, 0, 8, 8);
65 |
66 | // Reduce colors to 6-bit grayscale and calculate average color value
67 | byte[] grayscale = new byte[64];
68 | uint averageValue = 0;
69 | for (int y = 0; y < 8; y++)
70 | for (int x = 0; x < 8; x++)
71 | {
72 | uint pixel = (uint)squeezed.GetPixel(x, y).ToArgb();
73 | uint gray = (pixel & 0x00ff0000) >> 16;
74 | gray += (pixel & 0x0000ff00) >> 8;
75 | gray += (pixel & 0x000000ff);
76 | gray /= 12;
77 |
78 | grayscale[x + (y * 8)] = (byte)gray;
79 | averageValue += gray;
80 | }
81 | averageValue /= 64;
82 |
83 | // Compute the hash: each bit is a pixel
84 | // 1 = higher than average, 0 = lower than average
85 | ulong hash = 0;
86 | for (int i = 0; i < 64; i++)
87 | if (grayscale[i] >= averageValue)
88 | hash |= (1UL << (63 - i));
89 |
90 | return hash;
91 | }
92 |
93 | ///
94 | /// Computes the average hash of the image content in the given file.
95 | ///
96 | /// Path to the input file.
97 | /// The hash of the input file's image content.
98 | public static ulong AverageHash(String path)
99 | {
100 | Bitmap bmp = new Bitmap(path);
101 | return AverageHash(bmp);
102 | }
103 |
104 | ///
105 | /// Returns a percentage-based similarity value between the two given hashes. The higher
106 | /// the percentage, the closer the hashes are to being identical.
107 | ///
108 | /// The first hash.
109 | /// The second hash.
110 | /// The similarity percentage.
111 | public static double Similarity(ulong hash1, ulong hash2)
112 | {
113 | return ((64 - BitCount(hash1 ^ hash2)) * 100) / 64.0;
114 | }
115 |
116 | ///
117 | /// Returns a percentage-based similarity value between the two given images. The higher
118 | /// the percentage, the closer the images are to being identical.
119 | ///
120 | /// The first image.
121 | /// The second image.
122 | /// The similarity percentage.
123 | public static double Similarity(Image image1, Image image2)
124 | {
125 | ulong hash1 = AverageHash(image1);
126 | ulong hash2 = AverageHash(image2);
127 | return Similarity(hash1, hash2);
128 | }
129 |
130 | ///
131 | /// Returns a percentage-based similarity value between the image content of the two given
132 | /// files. The higher the percentage, the closer the image contents are to being identical.
133 | ///
134 | /// The first image file.
135 | /// The second image file.
136 | /// The similarity percentage.
137 | public static double Similarity(String path1, String path2)
138 | {
139 | ulong hash1 = AverageHash(path1);
140 | ulong hash2 = AverageHash(path2);
141 | return Similarity(hash1, hash2);
142 | }
143 | #endregion
144 | }
145 | }
146 |
--------------------------------------------------------------------------------