├── .gitignore ├── LICENSE ├── README.md ├── blockhash.c ├── blockhash.h ├── imagehash.c ├── test.sh ├── testdata ├── images │ ├── 00002701.jpg │ ├── 00011601.jpg │ ├── 00094701.jpg │ ├── 00106901.jpg │ ├── 00133601.jpg │ ├── 00136101.jpg │ ├── 00631701.jpg │ ├── 00631801.jpg │ ├── 01109801.jpg │ ├── 06855701.jpg │ ├── 24442301.jpg │ ├── 32499201.jpg │ ├── Babylonian.png │ ├── clipper_ship.jpg │ ├── emptyBasket.png │ ├── exact-hashes.txt │ ├── puffy_white.png │ ├── quick-hashes.txt │ ├── sources.txt │ └── stoplights.jpg └── videos │ ├── 29684.29680.WEEKNUMMER263-HRE0000D0AB.mpeg │ ├── Medvedev_-_Lubyanka.ogv │ ├── exact-hashes.txt │ └── quick-hashes.txt ├── videohash.c ├── videorooter.conf ├── waf └── wscript /.gitignore: -------------------------------------------------------------------------------- 1 | # for projects that use Waf for building: http://code.google.com/p/waf/ 2 | .waf-* 3 | .lock-* 4 | 5 | # build directory 6 | build 7 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2014 Commons Machinery 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | blockhash 2 | ========= 3 | 4 | This is a perceptual image and video hash calculation tool based on algorithm descibed in 5 | Block Mean Value Based Image Perceptual Hashing by Bian Yang, Fan Gu and Xiamu Niu. 6 | 7 | Build and install 8 | ----------------- 9 | 10 | Blockhash requires libmagickwand and libopencv. On Debian/Ubuntu it can be installed using 11 | the following command: 12 | 13 | sudo apt-get install libmagickwand-dev libopencv-dev 14 | 15 | On Fedora and friends: 16 | 17 | sudo dnf install ImageMagick-devel opencv-devel 18 | 19 | To build blockhash cd to the source directory and type: 20 | 21 | ./waf configure 22 | ./waf 23 | 24 | The program binary will land in `./build`. To install it to `/usr/local/bin/` type: 25 | 26 | ./waf install 27 | 28 | Usage 29 | ----- 30 | 31 | Run `blockhash [list of images]` for calculating hashes for image files. 32 | 33 | Run `blockhash_video [list of videos]` for calculating hashes for video files. 34 | 35 | Run `blockhash --help` for the list of options. 36 | 37 | WARNING 38 | ------- 39 | 40 | Please note that the status of the video algorithm is EXPERIMENTAL. It is 41 | subject to substantial changes and should not be relied upon. 42 | 43 | License 44 | ------- 45 | 46 | Copyright (c) 2014-2015 Commons Machinery http://commonsmachinery.se/ 47 | 48 | Distributed under an MIT license, please see LICENSE in the top dir. 49 | 50 | Contact: dev@commonsmachinery.se 51 | -------------------------------------------------------------------------------- /blockhash.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Perceptual image hash calculation library based on algorithm descibed in 3 | * Block Mean Value Based Image Perceptual Hashing by Bian Yang, Fan Gu and Xiamu Niu 4 | * 5 | * Copyright 2014-2015 Commons Machinery http://commonsmachinery.se/ 6 | * Distributed under an MIT license, please see LICENSE in the top dir. 7 | */ 8 | 9 | #include 10 | #include 11 | #include 12 | #include 13 | 14 | 15 | // comparison function for the qsort 16 | static int cmpint(const void *pa, const void *pb) 17 | { 18 | int a = *(const int *) pa; 19 | int b = *(const int *) pb; 20 | return (a < b) ? -1 : (a > b); 21 | } 22 | 23 | 24 | // comparison function for the qsort 25 | static int cmpfloat(const void *pa, const void *pb) 26 | { 27 | float a = *(const float *) pa; 28 | float b = *(const float *) pb; 29 | return (a < b) ? -1 : (a > b); 30 | } 31 | 32 | 33 | static float median(int *data, int n) 34 | { 35 | int *sorted; 36 | float result; 37 | 38 | sorted = malloc(n * sizeof(int)); 39 | memcpy(sorted, data, n * sizeof(int)); 40 | qsort(sorted, n, sizeof(int), cmpint); 41 | 42 | if (n % 2 == 0) { 43 | result = (float) (sorted[n / 2] + sorted[n / 2 + 1]) / 2; 44 | } else { 45 | result = (float) sorted[n / 2]; 46 | } 47 | 48 | free(sorted); 49 | return result; 50 | } 51 | 52 | 53 | static float medianf(float *data, int n) 54 | { 55 | float *sorted; 56 | float result; 57 | 58 | sorted = malloc(n * sizeof(float)); 59 | memcpy(sorted, data, n * sizeof(float)); 60 | qsort(sorted, n, sizeof(float), cmpfloat); 61 | 62 | if (n % 2 == 0) { 63 | result = (sorted[n / 2] + sorted[n / 2 + 1]) / 2; 64 | } else { 65 | result = sorted[n / 2]; 66 | } 67 | 68 | free(sorted); 69 | return result; 70 | } 71 | 72 | /** Compare medians across four horizontal bands 73 | * 74 | * Output a 1 if the block is brighter than the median. 75 | * With images dominated by black or white, the median may 76 | * end up being 0 or the max value, and thus having a lot 77 | * of blocks of value equal to the median. To avoid 78 | * generating hashes of all zeros or ones, in that case output 79 | * 0 if the median is in the lower value space, 1 otherwise 80 | */ 81 | void translate_blocks_to_bits(int *blocks, int nblocks, int pixels_per_block) 82 | { 83 | float half_block_value; 84 | int bandsize, i, j, v; 85 | float m; 86 | 87 | half_block_value = pixels_per_block * 256 * 3 / 2; 88 | bandsize = nblocks / 4; 89 | 90 | for (i = 0; i < 4; i++) { 91 | m = median(&blocks[i * bandsize], bandsize); 92 | for (j = i * bandsize; j < (i + 1) * bandsize; j++) { 93 | v = blocks[j]; 94 | blocks[j] = v > m || (abs(v - m) < 1 && m > half_block_value); 95 | } 96 | } 97 | } 98 | 99 | void translate_blocks_to_bitsf(float *blocks, int *result, int nblocks, int pixels_per_block) 100 | { 101 | float half_block_value; 102 | int bandsize, i, j; 103 | float m, v; 104 | 105 | half_block_value = pixels_per_block * 256 * 3 / 2; 106 | bandsize = nblocks / 4; 107 | 108 | for (i = 0; i < 4; i++) { 109 | m = medianf(&blocks[i * bandsize], bandsize); 110 | for (j = i * bandsize; j < (i + 1) * bandsize; j++) { 111 | v = blocks[j]; 112 | result[j] = v > m || (abs(v - m) < 1 && m > half_block_value); 113 | } 114 | } 115 | } 116 | 117 | /** Convert array of bits to hexadecimal string representation. 118 | * Hash length should be a multiple of 4. 119 | * 120 | * Returns: null-terminated hexadecimal string hash. 121 | */ 122 | char* bits_to_hexhash(int *bits, int nbits) 123 | { 124 | int i, j, b; 125 | int len; 126 | int tmp; 127 | char *hex; 128 | char *stmp; 129 | 130 | len = nbits / 4; 131 | 132 | hex = malloc(len + 1); 133 | if(!hex) return NULL; 134 | stmp = malloc(2); 135 | if(!stmp) { 136 | free(hex); 137 | return NULL; 138 | } 139 | hex[len] = '\0'; 140 | 141 | for (i = 0; i < len; i++) { 142 | tmp = 0; 143 | for (j = 0; j < 4; j++) { 144 | b = i * 4 + j; 145 | tmp = tmp | (bits[b] << 3 >> j); 146 | } 147 | 148 | sprintf(stmp, "%1x", tmp); 149 | hex[i] = stmp[0]; 150 | } 151 | 152 | free(stmp); 153 | return hex; 154 | } 155 | 156 | 157 | int blockhash_quick(int bits, unsigned char *data, int width, int height, int **hash) 158 | { 159 | int x, y, ix, iy; 160 | int ii, alpha, value; 161 | int block_width; 162 | int block_height; 163 | int *blocks; 164 | 165 | block_width = width / bits; 166 | block_height = height / bits; 167 | 168 | blocks = calloc(bits * bits, sizeof(int)); 169 | if(!blocks) return -1; 170 | 171 | for (y = 0; y < bits; y++) { 172 | for (x = 0; x < bits; x++) { 173 | value = 0; 174 | 175 | for (iy = 0; iy < block_height; iy++) { 176 | for (ix = 0; ix < block_width; ix++) { 177 | ii = ((y * block_height + iy) * width + (x * block_width + ix)) * 4; 178 | 179 | alpha = data[ii+3]; 180 | if (alpha == 0) { 181 | value += 765; 182 | } else { 183 | value += data[ii] + data[ii+1] + data[ii+2]; 184 | } 185 | } 186 | } 187 | 188 | blocks[y * bits + x] = value; 189 | } 190 | } 191 | 192 | translate_blocks_to_bits(blocks, bits * bits, block_width * block_height); 193 | *hash = blocks; 194 | return 0; 195 | } 196 | 197 | 198 | int blockhash(int bits, unsigned char *data, int width, int height, int **hash) 199 | { 200 | float block_width; 201 | float block_height; 202 | float y_frac, y_int; 203 | float x_frac, x_int; 204 | float x_mod, y_mod; 205 | float weight_top, weight_bottom, weight_left, weight_right; 206 | int block_top, block_bottom, block_left, block_right; 207 | int x, y, ii, alpha; 208 | float value; 209 | float *blocks; 210 | int *result; 211 | 212 | if (width % bits == 0 && height % bits == 0) { 213 | return blockhash_quick(bits, data, width, height, hash); 214 | } 215 | 216 | block_width = (float) width / (float) bits; 217 | block_height = (float) height / (float) bits; 218 | 219 | blocks = calloc(bits * bits, sizeof(float)); 220 | if(!blocks) return -1; 221 | 222 | result = malloc(bits * bits * sizeof(int)); 223 | if(!result) { 224 | free(blocks); 225 | return -1; 226 | } 227 | 228 | for (y = 0; y < height; y++) { 229 | y_mod = fmodf(y + 1, block_height); 230 | y_frac = modff(y_mod, &y_int); 231 | 232 | weight_top = (1 - y_frac); 233 | weight_bottom = y_frac; 234 | 235 | // y_int will be 0 on bottom/right borders and on block boundaries 236 | if (y_int > 0 || (y + 1) == height) { 237 | block_top = block_bottom = (int) floor((float) y / block_height); 238 | } else { 239 | block_top = (int) floor((float) y / block_height); 240 | block_bottom = (int) ceil((float) y / block_height); 241 | } 242 | 243 | for (x = 0; x < width; x++) { 244 | x_mod = fmodf(x + 1, block_width); 245 | x_frac = modff(x_mod, &x_int); 246 | 247 | weight_left = (1 - x_frac); 248 | weight_right = x_frac; 249 | 250 | // x_int will be 0 on bottom/right borders and on block boundaries 251 | if (x_int > 0 || (x + 1) == width) { 252 | block_left = block_right = (int) floor((float) x / block_width); 253 | } else { 254 | block_left = (int) floor((float) x / block_width); 255 | block_right = (int) ceil((float) x / block_width); 256 | } 257 | 258 | ii = (y * width + x) * 4; 259 | 260 | alpha = data[ii + 3]; 261 | if (alpha == 0) { 262 | value = 765; 263 | } else { 264 | value = data[ii] + data[ii + 1] + data[ii + 2]; 265 | } 266 | 267 | // add weighted pixel value to relevant blocks 268 | blocks[block_top * bits + block_left] += value * weight_top * weight_left; 269 | blocks[block_top * bits + block_right] += value * weight_top * weight_right; 270 | blocks[block_bottom * bits + block_left] += value * weight_bottom * weight_left; 271 | blocks[block_bottom * bits + block_right] += value * weight_bottom * weight_right; 272 | } 273 | } 274 | 275 | translate_blocks_to_bitsf(blocks, result, bits * bits, block_width * block_height); 276 | *hash = result; 277 | free(blocks); 278 | 279 | *hash = result; 280 | return 0; 281 | } 282 | -------------------------------------------------------------------------------- /blockhash.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Perceptual image hash calculation library based on algorithm descibed in 3 | * Block Mean Value Based Image Perceptual Hashing by Bian Yang, Fan Gu and Xiamu Niu 4 | * 5 | * Copyright 2014-2015 Commons Machinery http://commonsmachinery.se/ 6 | * Distributed under an MIT license, please see LICENSE in the top dir. 7 | */ 8 | 9 | #ifndef __BLOCKHASH_BLOCKHASH_H__ 10 | #define __BLOCKHASH_BLOCKHASH_H__ 11 | 12 | #ifdef __cplusplus 13 | extern "C" { 14 | #endif 15 | 16 | /** Convert array of bits to hexadecimal string representation. 17 | * Hash length should be a multiple of 4. 18 | * 19 | * Returns: null-terminated hexadecimal string hash on succes, NULL on failure. 20 | */ 21 | char* bits_to_hexhash(int *bits, int nbits); 22 | 23 | 24 | /** Calculate perceptual hash for an RGBA image using quick method. 25 | * 26 | * Quick method uses rounded block sizes and is less accurate in case image 27 | * width and height are not divisible by the number of bits. 28 | * 29 | * Parameters: 30 | * 31 | * bits - number of blocks to divide the image by horizontally and vertically. 32 | * data - RGBA image data. 33 | * width - image width. 34 | * height - image height. 35 | * hash - the resulting hash will be allocated and stored in the given array as bits. 36 | * 37 | * Returns: 0 on success, -1 on failure. 38 | */ 39 | int blockhash_quick(int bits, unsigned char *data, int width, int height, int **hash); 40 | 41 | 42 | /** Calculate perceptual hash for an RGBA image using precise method. 43 | * 44 | * Precise method puts weighted pixel values to blocks according to pixel 45 | * area falling within a given block and provides more accurate results 46 | * in case width and height are not divisible by the number of bits. 47 | * 48 | * Parameters: 49 | * 50 | * bits - number of blocks to divide the image by horizontally and vertically. 51 | * data - RGBA image data. 52 | * width - image width. 53 | * height - image height. 54 | * hash - the resulting hash will be allocated and stored in the given array as bits. 55 | * 56 | * Returns: 0 on success, -1 on failure. 57 | */ 58 | int blockhash(int bits, unsigned char *data, int width, int height, int **hash); 59 | 60 | 61 | #ifdef __cplusplus 62 | } 63 | #endif 64 | 65 | #endif 66 | -------------------------------------------------------------------------------- /imagehash.c: -------------------------------------------------------------------------------- 1 | /* Perceptual image hash calculation tool based on algorithm descibed in 2 | * Block Mean Value Based Image Perceptual Hashing by Bian Yang, Fan Gu and Xiamu Niu 3 | * 4 | * Copyright 2014 Commons Machinery http://commonsmachinery.se/ 5 | * Distributed under an MIT license, please see LICENSE in the top dir. 6 | */ 7 | 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include "blockhash.h" 14 | 15 | #include 16 | 17 | int process_image(char * fn, int bits, int quick, int debug) 18 | { 19 | int i, j; 20 | size_t width, height; 21 | unsigned char *image_data; 22 | int *hash; 23 | MagickBooleanType status; 24 | MagickWand *magick_wand; 25 | 26 | magick_wand = NewMagickWand(); 27 | status = MagickReadImage(magick_wand, fn); 28 | 29 | if (status == MagickFalse) { 30 | printf("Error opening image file %s\n", fn); 31 | exit(-1); 32 | } 33 | 34 | // Remove color profiles for interoperability with other hashing tools 35 | MagickProfileImage(magick_wand, "*", NULL, 0); 36 | 37 | width = MagickGetImageWidth(magick_wand); 38 | height = MagickGetImageHeight(magick_wand); 39 | 40 | image_data = malloc(width * height * 4); 41 | 42 | status = MagickExportImagePixels(magick_wand, 0, 0, width, height, "RGBA", CharPixel, image_data); 43 | 44 | if (status == MagickFalse) { 45 | printf("Error converting image data to RGBA\n"); 46 | exit(-1); 47 | } 48 | 49 | hash = malloc(bits * bits * sizeof(int)); 50 | 51 | if (quick) { 52 | blockhash_quick(bits, image_data, width, height, &hash); 53 | } else { 54 | blockhash(bits, image_data, width, height, &hash); 55 | } 56 | 57 | if (debug) { 58 | for (i = 0; i < bits * bits; i++) { 59 | if (i != 0 && i % bits == 0) 60 | printf("\n"); 61 | printf("%d", hash[i]); 62 | } 63 | printf("\n"); 64 | } 65 | 66 | char* hex = bits_to_hexhash(hash, bits*bits); 67 | printf("%s %s\n", hex, fn); 68 | 69 | free(hex); 70 | free(hash); 71 | free(image_data); 72 | 73 | DestroyMagickWand(magick_wand); 74 | } 75 | 76 | void help() { 77 | printf("Usage: blockhash [-h|--help] [--quick] [--bits BITS] [--debug] filenames...\n" 78 | "\n" 79 | "Optional arguments:\n" 80 | "-h, --help Show this help message and exit\n" 81 | "-q, --quick Use quick hashing method.\n" 82 | "-b, --bits BITS Create hash of size N^2 bits. Default: 16\n" 83 | "--debug Print hashes as 2D maps (for debugging)\n"); 84 | } 85 | 86 | void main (int argc, char **argv) { 87 | MagickWandGenesis(); 88 | 89 | int quick = 0; 90 | int debug = 0; 91 | int bits = 16; 92 | int x; 93 | 94 | int option_index = 0; 95 | int c; 96 | 97 | struct option long_options[] = { 98 | {"help", no_argument, 0, 'h'}, 99 | {"quick", no_argument, 0, 'q'}, 100 | {"bits", required_argument, 0, 'b'}, 101 | {"debug", no_argument, 0, 'd'}, 102 | {0, 0, 0, 0} 103 | }; 104 | 105 | if (argc < 2) { 106 | help(); 107 | exit(0); 108 | } 109 | 110 | while ((c = getopt_long(argc, argv, "hqb:d", 111 | long_options, &option_index)) != -1) { 112 | switch (c) { 113 | case 'h': 114 | help(); 115 | exit(0); 116 | break; 117 | case 'q': 118 | quick = 1; 119 | break; 120 | case 'b': 121 | if (sscanf(optarg, "%d", &bits) != 1) {; 122 | printf("Error: couldn't parse bits argument\n"); 123 | exit(-1); 124 | } 125 | if (bits % 4 != 0) { 126 | printf("Error: bits argument should be a multiple of 4\n"); 127 | exit(-1); 128 | } 129 | break; 130 | case 'd': 131 | debug = 1; 132 | break; 133 | case '?': 134 | default: 135 | exit(-1); 136 | } 137 | } 138 | 139 | if (optind < argc) { 140 | while (optind < argc) { 141 | process_image(argv[optind++], bits, quick, debug); 142 | } 143 | } 144 | 145 | MagickWandTerminus(); 146 | 147 | exit(0); 148 | } 149 | -------------------------------------------------------------------------------- /test.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | set -e 4 | 5 | d=`dirname "$0"` 6 | 7 | (cd "$d/testdata/images" 8 | 9 | ../../build/blockhash -b 16 *.jpg *.png | sort | diff -su exact-hashes.txt - 10 | ../../build/blockhash -q -b 16 *.jpg *.png | sort | diff -su quick-hashes.txt - 11 | ) 12 | 13 | (cd "$d/testdata/videos" 14 | ../../build/blockhash_video -b 16 *.mpeg *.ogv | sort | diff -su exact-hashes.txt - 15 | ../../build/blockhash_video -q -b 16 *.mpeg *.ogv | sort | diff -su quick-hashes.txt - 16 | ) 17 | echo 18 | echo OK 19 | echo 20 | 21 | -------------------------------------------------------------------------------- /testdata/images/00002701.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jonasob/blockhash/c6652877b1780bd7c4bc11c3fdc978ae94318186/testdata/images/00002701.jpg -------------------------------------------------------------------------------- /testdata/images/00011601.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jonasob/blockhash/c6652877b1780bd7c4bc11c3fdc978ae94318186/testdata/images/00011601.jpg -------------------------------------------------------------------------------- /testdata/images/00094701.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jonasob/blockhash/c6652877b1780bd7c4bc11c3fdc978ae94318186/testdata/images/00094701.jpg -------------------------------------------------------------------------------- /testdata/images/00106901.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jonasob/blockhash/c6652877b1780bd7c4bc11c3fdc978ae94318186/testdata/images/00106901.jpg -------------------------------------------------------------------------------- /testdata/images/00133601.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jonasob/blockhash/c6652877b1780bd7c4bc11c3fdc978ae94318186/testdata/images/00133601.jpg -------------------------------------------------------------------------------- /testdata/images/00136101.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jonasob/blockhash/c6652877b1780bd7c4bc11c3fdc978ae94318186/testdata/images/00136101.jpg -------------------------------------------------------------------------------- /testdata/images/00631701.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jonasob/blockhash/c6652877b1780bd7c4bc11c3fdc978ae94318186/testdata/images/00631701.jpg -------------------------------------------------------------------------------- /testdata/images/00631801.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jonasob/blockhash/c6652877b1780bd7c4bc11c3fdc978ae94318186/testdata/images/00631801.jpg -------------------------------------------------------------------------------- /testdata/images/01109801.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jonasob/blockhash/c6652877b1780bd7c4bc11c3fdc978ae94318186/testdata/images/01109801.jpg -------------------------------------------------------------------------------- /testdata/images/06855701.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jonasob/blockhash/c6652877b1780bd7c4bc11c3fdc978ae94318186/testdata/images/06855701.jpg -------------------------------------------------------------------------------- /testdata/images/24442301.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jonasob/blockhash/c6652877b1780bd7c4bc11c3fdc978ae94318186/testdata/images/24442301.jpg -------------------------------------------------------------------------------- /testdata/images/32499201.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jonasob/blockhash/c6652877b1780bd7c4bc11c3fdc978ae94318186/testdata/images/32499201.jpg -------------------------------------------------------------------------------- /testdata/images/Babylonian.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jonasob/blockhash/c6652877b1780bd7c4bc11c3fdc978ae94318186/testdata/images/Babylonian.png -------------------------------------------------------------------------------- /testdata/images/clipper_ship.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jonasob/blockhash/c6652877b1780bd7c4bc11c3fdc978ae94318186/testdata/images/clipper_ship.jpg -------------------------------------------------------------------------------- /testdata/images/emptyBasket.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jonasob/blockhash/c6652877b1780bd7c4bc11c3fdc978ae94318186/testdata/images/emptyBasket.png -------------------------------------------------------------------------------- /testdata/images/exact-hashes.txt: -------------------------------------------------------------------------------- 1 | 00000000ffbffffff803f807f807f807f90ff90fb90c9900ffff7fff00000000 puffy_white.png 2 | 000003f887fe8fff0fe00fe00ff00ff807fc07f807f803f003e003e007e0fffe 00136101.jpg 3 | 00000fe03ff8fff81ff8399831883bcc35ac301c384c3ffc0ef00660c003ffff 00133601.jpg 4 | 00007ff07fe07fe07fe67ff07520600077fe601e7f5e000079fd40410001fffe clipper_ship.jpg 5 | 0002001f0fff1fff7ff71ff300f300007f1278f0f8700fc0ff98cc88c1cc01fc 32499201.jpg 6 | 03ff007d025a33f325e366690501b79fd103f3077c0474f93ff31e113f550700 00094701.jpg 7 | 0fe01fe007e027c037c077c00fc00fe007e047c001c003c007e007e007e003c0 stoplights.jpg 8 | 3fff1c2004e80fe81fe00fe01fe81de0fffb0ffb00210031003b03ff00df01ff 00011601.jpg 9 | f80ff80ff007f003f007f183e4c3c5c3c7c3c6e3c1e3c007c007c007e007efff 01109801.jpg 10 | f81bf91ffb803400e07f0c7d049f058706013e033fe33fe11f600e618ea30def 00002701.jpg 11 | fe7ffe7ffe7ffc7ffc7ffc7f7c000000e107e183f89fe007c00ff81ff81ff83f Babylonian.png 12 | fefd07ff000f0003037f02bc01bc11ff01fe01be01bc03ff007f007f07fd0bf0 06855701.jpg 13 | ff80fe80fc01fe00fe00ff00ff00ff00fec0fd00f0e0fac0ecf0e7f0e7e06040 00106901.jpg 14 | ffff8001bfa182018aa186e1873786e78623827385319fcf0000800fc0ffffff 00631801.jpg 15 | ffff8101bf0192019005860f9d8fbf899f818f818f998e198fc182618001ffff 24442301.jpg 16 | fffff803b9018001886382e7877382afceef8e61813187070001800f80ffffff 00631701.jpg 17 | fffffc07e00380009f099f818f0b8c4b9f9fc00fcc07cc03c003e00fe07ff1ff emptyBasket.png 18 | -------------------------------------------------------------------------------- /testdata/images/puffy_white.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jonasob/blockhash/c6652877b1780bd7c4bc11c3fdc978ae94318186/testdata/images/puffy_white.png -------------------------------------------------------------------------------- /testdata/images/quick-hashes.txt: -------------------------------------------------------------------------------- 1 | 00000000ffbffffff803f807f807f807f90ff90fb90c9900ffff7fff00000000 puffy_white.png 2 | 000001f887ff8fff0fe00fe00ff00ff807fc07f803f803f801e001e007f0ffff 00136101.jpg 3 | 00000fe03ff8fff81f7839d831883bcc31dc31ac300c3ffc9ef88e608041f1bf 00133601.jpg 4 | 00007ff07fe07fe07fe67ff07520600077fe601e7f5e000079fd40410001fffe clipper_ship.jpg 5 | 0002000f1fff1fff7ff31ff301f300006f9078f0f8701ee0ffc8ce88c1c401fc 32499201.jpg 6 | 03ff007d025a33f325e366f905013797d103c12ffd04fc687ff31e111f111702 00094701.jpg 7 | 0fe01fe007e027c037c077c00fe00fe007e047c001c003c007e007e007e003e0 stoplights.jpg 8 | 3fff1c2004e80fe81fe00fe00fe81de8fffb8ffb0063000000f303ff81ff4185 00011601.jpg 9 | f80ff80ff007f003f007f183f4c3c1c3c7c3c7e3c1e3c003c003c003e087ffff 01109801.jpg 10 | f81bf91ffb803400e07f8c5d049f049706033a033fe33fe1bfe00e618ee30ca3 00002701.jpg 11 | fe7d0fff000f0003037f03bc00bc11ff01ff01be01bc01ff01ff003f03fd03f8 06855701.jpg 12 | fe7ffe7ffe7ffc7ffc7ffc7ffc7f0000c103e187f18ff00f8003fc3ff81ff81f Babylonian.png 13 | ff80fe80fc00fe01fe00ff00ff00ff00fec0ff00f820f8e0fcc0e5f0e7e0e0c0 00106901.jpg 14 | ffff8001bfb180018e8186e58dd187e586418071879f9be780018007c07fffff 00631801.jpg 15 | ffff8101bf018205900586079d8fbf8d9f818f818f998e0b8f8982618001ffff 24442301.jpg 16 | fffffc019d0180018c61806fcf7181cfae6d8e71891983878000800fc0ffefff 00631701.jpg 17 | fffffc07e00380009b099f818f8f8c439f9fcc0fc807c803c003c00fe03ff1ff emptyBasket.png 18 | -------------------------------------------------------------------------------- /testdata/images/sources.txt: -------------------------------------------------------------------------------- 1 | puffy_white.jpg - http://www.nasa.gov/multimedia/imagegallery/image_feature_2466.html 2 | clipper_ship.jpg - http://commons.wikimedia.org/wiki/File:%27Clipper_Ship_Great_Republic%27_by_James_Butterworth,_c._1853_-_Old_State_House_Museum,_Boston,_MA_-_IMG_6709.JPG 3 | emptyBasket.png - https://openclipart.org/detail/194657/plastic-laundry-basket-by-gubrww2-194657 4 | Babylonian.png - https://openclipart.org/detail/194594/babylonian-or-summerian-religious-deity-by-lordoftheloch-194594 5 | 6 | The remaining test images come from http://www.getty.edu/about/opencontent.html 7 | -------------------------------------------------------------------------------- /testdata/images/stoplights.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jonasob/blockhash/c6652877b1780bd7c4bc11c3fdc978ae94318186/testdata/images/stoplights.jpg -------------------------------------------------------------------------------- /testdata/videos/29684.29680.WEEKNUMMER263-HRE0000D0AB.mpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jonasob/blockhash/c6652877b1780bd7c4bc11c3fdc978ae94318186/testdata/videos/29684.29680.WEEKNUMMER263-HRE0000D0AB.mpeg -------------------------------------------------------------------------------- /testdata/videos/Medvedev_-_Lubyanka.ogv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jonasob/blockhash/c6652877b1780bd7c4bc11c3fdc978ae94318186/testdata/videos/Medvedev_-_Lubyanka.ogv -------------------------------------------------------------------------------- /testdata/videos/exact-hashes.txt: -------------------------------------------------------------------------------- 1 | 63c2c1c585c5c1c92f0a3a13471347072b617c0c9c1c1ea8e1c1e1c1e1c1c1e1 29684.29680.WEEKNUMMER263-HRE0000D0AB.mpeg 2 | 7a187f00063e0c7c161e1a1e1a1e1c1e742c6c262e640cbc166666262e260cbc Medvedev_-_Lubyanka.ogv 3 | -------------------------------------------------------------------------------- /testdata/videos/quick-hashes.txt: -------------------------------------------------------------------------------- 1 | 63c2c1c585c5c1c92f0a3a13471347072b617c0c9c1c1ea8e1c1e1c1e1c1c1e1 29684.29680.WEEKNUMMER263-HRE0000D0AB.mpeg 2 | 7a187f00063e0c7c161e1a1e1a1e1c1e742c6c262e640cbc166666262e260cbc Medvedev_-_Lubyanka.ogv 3 | -------------------------------------------------------------------------------- /videohash.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Perceptual image hash calculation tool based on algorithm descibed in 3 | * Block Mean Value Based Image Perceptual Hashing by Bian Yang, Fan Gu and Xiamu Niu 4 | * 5 | * Copyright 2014-2015 Commons Machinery http://commonsmachinery.se/ 6 | * Distributed under an MIT license, please see LICENSE in the top dir. 7 | */ 8 | 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include "blockhash.h" 18 | 19 | MagickWand* load_image_from_frame(char* filename, size_t frame_number, CvMat* frame_data) 20 | { 21 | // Create MagickWand object 22 | MagickWand* magick_wand = NewMagickWand(); 23 | 24 | if(magick_wand) { 25 | // Read an image file into memory 26 | MagickBooleanType status = MagickReadImageBlob(magick_wand, frame_data->data.ptr, frame_data->cols); 27 | if (status == MagickFalse) { 28 | fprintf(stderr, "Error reading converted to image frame #%llu of video file '%s'.\n", 29 | (unsigned long long)frame_number, 30 | filename); 31 | DestroyMagickWand(magick_wand); 32 | magick_wand = NULL; 33 | } 34 | } 35 | return magick_wand; 36 | } 37 | 38 | static int compute_image_hash(MagickWand* magick_wand, int bits, int quick, int** hash) 39 | { 40 | MagickBooleanType status; 41 | size_t width, height, data_size; 42 | unsigned char *image_data; 43 | 44 | // Remove color profiles for interoperability with other hashing tools 45 | MagickProfileImage(magick_wand, "*", NULL, 0); 46 | 47 | // Compute pixel data size 48 | width = MagickGetImageWidth(magick_wand); 49 | height = MagickGetImageHeight(magick_wand); 50 | data_size = width * height * 4; 51 | 52 | // Handle special zero size case 53 | if(data_size == 0) { 54 | size_t hash_size = bits * bits * sizeof(int); 55 | int* h = malloc(hash_size); 56 | if(!h) return 1; 57 | memset(h, 0, hash_size); 58 | *hash = h; 59 | return 0; 60 | } 61 | 62 | // Export pixel data 63 | image_data = malloc(data_size); 64 | if(!image_data) return 2; 65 | 66 | status = MagickExportImagePixels(magick_wand, 0, 0, width, height, "RGBA", CharPixel, image_data); 67 | if (status == MagickFalse) return 3; 68 | 69 | // Compute blockhash 70 | return (quick) 71 | ? blockhash_quick(bits, image_data, width, height, hash) 72 | : blockhash(bits, image_data, width, height, hash); 73 | } 74 | 75 | 76 | int *process_video_frame(char *filename, int bits, int quick, size_t frame_number, CvMat* frame_data) 77 | { 78 | int result = 0; 79 | int *hash; 80 | MagickWand *magick_wand; 81 | 82 | // Load Image 83 | magick_wand = load_image_from_frame(filename, frame_number, frame_data); 84 | if(!magick_wand) return NULL; 85 | 86 | // Compute Image Hash 87 | result = compute_image_hash(magick_wand, bits, quick, &hash); 88 | 89 | switch(result) 90 | { 91 | case 0: break; 92 | 93 | case 1: { 94 | fprintf(stderr, "Error computing blockhash for the zero-sized frame #%llu of the video file '%s'.", 95 | (unsigned long long)frame_number, filename); 96 | break; 97 | } 98 | 99 | case 2: { 100 | fprintf(stderr, "Error converting image data to RGBA for the frame #%llu of the video file '%s'.\n", 101 | (unsigned long long)frame_number, filename); 102 | break; 103 | } 104 | default: { 105 | fprintf(stderr, "Error computing blockhash for the frame #%llu of the video file '%s'.", 106 | (unsigned long long)frame_number, filename); 107 | break; 108 | } 109 | } 110 | 111 | // Cleanup temporary data 112 | DestroyMagickWand(magick_wand); 113 | 114 | // Report the result 115 | return hash; 116 | } 117 | 118 | typedef struct _video_frame_info { 119 | size_t pos; 120 | int* hash; 121 | } video_frame_info; 122 | 123 | #define HASH_PART_COUNT 4 124 | 125 | int process_video(char *filename, int bits, int quick, int debug) 126 | { 127 | size_t i; 128 | int result = 0; 129 | CvCapture* capture; 130 | size_t frame_count; 131 | video_frame_info hash_frames[HASH_PART_COUNT]; 132 | size_t next_hash_frame; 133 | size_t current_frame; 134 | int* hash = NULL; 135 | 136 | // Initialize data 137 | memset(&hash_frames[0], 0, sizeof(hash_frames)); 138 | 139 | // Open video file 140 | capture = cvCreateFileCapture(filename); 141 | if(!capture) { 142 | fprintf(stderr, "Error opening video file '%s'.", filename); 143 | return -1; 144 | } 145 | 146 | /* 147 | * Unfortunate, CV_CAP_PROP_FRAME_COUNT isn't reliable since it 148 | * doesn't consider duplicate frames (dropped when reading) and 149 | * makes assumptions that may not hold true when decoding frames. 150 | * 151 | */ 152 | cvSetCaptureProperty(capture, CV_CAP_PROP_POS_FRAMES, 0); 153 | frame_count = 0; 154 | while (cvQueryFrame(capture)) frame_count++; 155 | 156 | // Determine middle hash frames 157 | hash_frames[0].pos = 1; 158 | hash_frames[1].pos = (size_t)floor(frame_count*0.35); 159 | hash_frames[2].pos = (size_t)floor(frame_count*0.7); 160 | hash_frames[3].pos = frame_count; 161 | 162 | next_hash_frame = 0; 163 | 164 | // Rewind to start of stream 165 | cvSetCaptureProperty(capture, CV_CAP_PROP_POS_FRAMES, 0); 166 | 167 | for(current_frame = 1; current_frame <= frame_count; current_frame++) { 168 | IplImage* frame_image = cvQueryFrame(capture); 169 | if(!frame_image) { 170 | result = -1; 171 | fprintf(stderr, "Error capturing frame #%llu of #%llu from the video file '%s'.", 172 | (unsigned long long)current_frame, 173 | (unsigned long long)frame_count, 174 | filename); 175 | goto cleanup; 176 | } else { 177 | CvMat* mat = NULL; 178 | for(i = next_hash_frame; i < HASH_PART_COUNT; ++i) { 179 | if(hash_frames[i].pos == current_frame) { 180 | if(!mat) { 181 | mat = cvEncodeImage(".bmp", frame_image, NULL); 182 | if(!mat) { 183 | result = -1; 184 | fprintf(stderr, "Error converting to image frame #%llu of the video file '%s'.", 185 | (unsigned long long)current_frame, 186 | filename); 187 | goto cleanup; 188 | } 189 | } 190 | hash_frames[i].hash = process_video_frame(filename, bits, quick, current_frame, mat); 191 | ++next_hash_frame; 192 | // Do not break here, becasue in certain cases 193 | // a single frame may happen multiple times as hash_frame 194 | } 195 | } 196 | } 197 | } 198 | hash_complete: 199 | hash = malloc(bits * bits * sizeof(int) * HASH_PART_COUNT); 200 | if(!hash) { 201 | fprintf(stderr, "Error creating hash for video file '%s'.\n", filename); 202 | goto cleanup; 203 | } else { 204 | size_t block_element_count = bits * bits; 205 | size_t block_size = block_element_count * sizeof(int); 206 | int* dest = hash; 207 | for(i = 0; i < HASH_PART_COUNT; ++i, dest += block_element_count) 208 | memcpy(dest, hash_frames[i].hash, block_size); 209 | } 210 | char* hex = bits_to_hexhash(hash, HASH_PART_COUNT * bits * bits); 211 | if(hex) { 212 | result = 0; 213 | printf("%s %s\n", hex, filename); 214 | free(hex); 215 | } else { 216 | result = -1; 217 | fprintf(stderr, "Error converting blockhash value to string for the image file '%s'.\n", 218 | filename); 219 | } 220 | 221 | 222 | // Free hash buffer 223 | free(hash); 224 | 225 | cleanup: 226 | // Free partial hash buffers 227 | for(i = 0; i < HASH_PART_COUNT; ++i) { 228 | int* h = hash_frames[i].hash; 229 | if(h) free(h); 230 | } 231 | 232 | // Close video file 233 | cvReleaseCapture(&capture); 234 | 235 | // report the result 236 | return result; 237 | } 238 | 239 | void help() { 240 | printf("Usage: blockhash_video [-h|--help] [--quick] [--video] [--bits BITS] [--debug] filenames...\n" 241 | "\n" 242 | "Optional arguments:\n" 243 | "-h, --help Show this help message and exit\n" 244 | "-q, --quick Use quick hashing method.\n" 245 | "-b, --bits BITS Specify hash size (N^2) bits. Default: 16\n" 246 | "--debug Print hashes as 2D maps (for debugging)\n"); 247 | } 248 | 249 | 250 | void main (int argc, char **argv) 251 | { 252 | MagickWandGenesis(); 253 | 254 | int quick = 0; 255 | int debug = 0; 256 | int bits = 16; 257 | int x; 258 | 259 | int option_index = 0; 260 | int c; 261 | 262 | struct option long_options[] = { 263 | {"help", no_argument, 0, 'h'}, 264 | {"quick", no_argument, 0, 'q'}, 265 | {"bits", required_argument, 0, 'b'}, 266 | {"debug", no_argument, 0, 'd'}, 267 | {0, 0, 0, 0} 268 | }; 269 | 270 | if (argc < 2) { 271 | help(); 272 | exit(0); 273 | } 274 | 275 | 276 | while ((c = getopt_long(argc, argv, "hqb:d", 277 | long_options, &option_index)) != -1) { 278 | switch (c) { 279 | case 'h': 280 | help(); 281 | exit(0); 282 | break; 283 | case 'q': 284 | quick = 1; 285 | break; 286 | case 'b': 287 | if (sscanf(optarg, "%d", &bits) != 1) {; 288 | printf("Error: couldn't parse bits argument\n"); 289 | exit(-1); 290 | } 291 | if (bits % 4 != 0) { 292 | printf("Error: bits argument should be a multiple of 4\n"); 293 | exit(-1); 294 | } 295 | break; 296 | case 'd': 297 | debug = 1; 298 | break; 299 | case '?': 300 | default: 301 | exit(-1); 302 | } 303 | } 304 | 305 | if (optind < argc) { 306 | while (optind < argc) { 307 | process_video(argv[optind++], bits/2, quick, debug); 308 | } 309 | } 310 | 311 | MagickWandTerminus(); 312 | 313 | exit(0); 314 | } 315 | -------------------------------------------------------------------------------- /videorooter.conf: -------------------------------------------------------------------------------- 1 | # 2 | # This is a configuration file needed to include the project in the 3 | # Videorooter testing of image/movie algorithms. It defines which 4 | # media this software can work with (images, movies) and how to work 5 | # with each. 6 | # 7 | 8 | images=1 # Set to 0 if this software can not work with images 9 | movies=1 # Set to 0 if this software can not work with movies 10 | 11 | # 12 | # Called without arguments, should compile everything from a prestine 13 | # source directory. 14 | # 15 | function compile { 16 | ./waf configure 17 | ./waf 18 | } 19 | 20 | # 21 | # Called with 2+ arguments: the type of work (image, movie) and 22 | # the list of files to calculate a fingerprint for. 23 | # 24 | function calc { 25 | type=$1 26 | shift 27 | if test $type = "image"; then 28 | build/blockhash $* 29 | else 30 | build/blockhash_video $* 31 | fi 32 | } 33 | 34 | -------------------------------------------------------------------------------- /waf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jonasob/blockhash/c6652877b1780bd7c4bc11c3fdc978ae94318186/waf -------------------------------------------------------------------------------- /wscript: -------------------------------------------------------------------------------- 1 | APPNAME = 'blockhash' 2 | VERSION = '0.2' 3 | 4 | top = '.' 5 | out = 'build' 6 | 7 | def options(opt): 8 | opt.load('compiler_c') 9 | opt.add_option('--debug', dest='debug', action='store_true', default=False, help='Set to debug to enable debug symbols') 10 | 11 | def configure(conf): 12 | conf.load('compiler_c') 13 | 14 | conf.check_cc(lib='m') 15 | conf.check_cfg(package='MagickWand', args=['--cflags', '--libs']) 16 | conf.check_cfg(package='opencv', args=['--cflags', '--libs']) 17 | 18 | def build(bld): 19 | bld.stlib(source='blockhash.c', target='stblockhash') 20 | if bld.options.debug: 21 | cflags=['-g3', '-ggdb'] 22 | else: 23 | cflags=['-O3'] 24 | 25 | bld.program(source='imagehash.c', 26 | features='c cprogram', 27 | target='blockhash', 28 | use=['MAGICKWAND', 'M', 'stblockhash'], 29 | cflags=cflags, 30 | ) 31 | bld.program(source='videohash.c', 32 | features='c cprogram', 33 | target='blockhash_video', 34 | use=['MAGICKWAND', 'M', 'stblockhash', 'OPENCV'], 35 | cflags=cflags, 36 | ) 37 | --------------------------------------------------------------------------------