├── .gitignore ├── .npmignore ├── .travis.yml ├── LICENSE ├── README.md ├── binding.gyp ├── examples ├── a.jpg ├── b.png ├── c.png ├── d.jpg ├── f.png ├── g.jpg └── h.jpg ├── index.js ├── package.json ├── phash.cpp └── test └── test.js /.gitignore: -------------------------------------------------------------------------------- 1 | lib-cov 2 | *.seed 3 | *.log 4 | *.csv 5 | *.dat 6 | *.out 7 | *.pid 8 | *.gz 9 | /demo 10 | pids 11 | logs 12 | results 13 | 14 | node_modules 15 | npm-debug.log 16 | 17 | *.un~ 18 | .DS_Store 19 | 20 | build 21 | coverage 22 | -------------------------------------------------------------------------------- /.npmignore: -------------------------------------------------------------------------------- 1 | examples 2 | test 3 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: node_js 2 | node_js: 3 | - '4' 4 | - '5' 5 | - '6' 6 | - 'node' 7 | before_install: 8 | - "sudo apt-get update" 9 | - "sudo apt-get install cimg-dev libphash0-dev libmagickcore-dev" 10 | - export JOBS=max 11 | - export prebuild_compile=true 12 | script: "npm run-script test-travis" 13 | after_script: "npm install coveralls@2 && cat ./coverage/lcov.info | coveralls" 14 | env: 15 | - CXX=g++-4.8 16 | addons: 17 | apt: 18 | sources: 19 | - ubuntu-toolchain-r-test 20 | packages: 21 | - g++-4.8 22 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2013 Aaron Marasco. All rights reserved. 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining 4 | a copy of this software and associated documentation files (the 5 | "Software"), to deal in the Software without restriction, including 6 | without limitation the rights to use, copy, modify, merge, publish, 7 | distribute, sublicense, and/or sell copies of the Software, and to 8 | permit persons to whom the Software is furnished to do so, subject to 9 | the following conditions: 10 | 11 | The above copyright notice and this permission notice shall be 12 | included in all copies or substantial portions of the Software. 13 | 14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 15 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 16 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 17 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE 18 | LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 19 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 20 | WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 21 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # phash-image 2 | 3 | [![NPM version][npm-image]][npm-url] 4 | [![Build status][travis-image]][travis-url] 5 | [![Test coverage][coveralls-image]][coveralls-url] 6 | [![Dependency Status][david-image]][david-url] 7 | [![License][license-image]][license-url] 8 | [![Downloads][downloads-image]][downloads-url] 9 | 10 | [pHash](http://www.phash.org/) for images in node.js. 11 | 12 | Phash is a library that will create a "perceptual hash" of media files, so similar files will return similar hashes. Typically to compare hashes, 13 | a simple [Hamming distance](http://en.wikipedia.org/wiki/Hamming_distance) between the two hashes is a good indicator of how similar two 14 | media files are. 15 | 16 | ## Installation 17 | 18 | phash-image depends on [CImg](http://cimg.sourceforge.net/), [pHash](http://www.phash.org/), [ImageMagicK](http://www.imagemagick.org/). 19 | 20 | On Ubuntu: 21 | 22 | ```bash 23 | sudo apt-get install cimg-dev libphash0-dev libmagickcore-dev 24 | ``` 25 | 26 | On OSX: 27 | 28 | ```bash 29 | brew install phash imagemagick 30 | ``` 31 | 32 | Then, install using npm: 33 | 34 | ```bash 35 | $ npm install phash-image 36 | ``` 37 | 38 | ## API 39 | 40 | ### phash(filename, [returnBigInt], [callback]).then( hash => ) 41 | 42 | ```js 43 | var phash = require('phash-image'); 44 | // with a callback 45 | phash(filename, (err, hash) => ); 46 | // as a promise 47 | phash(filename).then( hash => ); 48 | ``` 49 | 50 | If you want to return a ulong64 as a string to store in a database, 51 | set `true` as the second parameter. 52 | 53 | ```js 54 | phash(filename, true).then( bigint => ) 55 | ``` 56 | 57 | ### phash.mh(filename, [callback]).then( hash => ) 58 | 59 | Phash's [MH](http://www.phash.org/docs/design.html) 72-byte length hash. 60 | This is slower, but should be more accurate. 61 | 62 | ```js 63 | phash.mh(filename).then( hash => ) 64 | ``` 65 | 66 | ### Comparing phashes 67 | 68 | To compare phashes, use [hamming-distance](https://github.com/math-utils/hamming-distance). 69 | 70 | [npm-image]: https://img.shields.io/npm/v/phash-image.svg?style=flat-square 71 | [npm-url]: https://npmjs.org/package/phash-image 72 | [github-tag]: http://img.shields.io/github/tag/mgmtio/phash-image.svg?style=flat-square 73 | [github-url]: https://github.com/mgmtio/phash-image/tags 74 | [travis-image]: https://img.shields.io/travis/mgmtio/phash-image.svg?style=flat-square 75 | [travis-url]: https://travis-ci.org/mgmtio/phash-image 76 | [coveralls-image]: https://img.shields.io/coveralls/mgmtio/phash-image.svg?style=flat-square 77 | [coveralls-url]: https://coveralls.io/r/mgmtio/phash-image 78 | [david-image]: http://img.shields.io/david/mgmtio/phash-image.svg?style=flat-square 79 | [david-url]: https://david-dm.org/mgmtio/phash-image 80 | [license-image]: http://img.shields.io/npm/l/phash-image.svg?style=flat-square 81 | [license-url]: LICENSE 82 | [downloads-image]: http://img.shields.io/npm/dm/phash-image.svg?style=flat-square 83 | [downloads-url]: https://npmjs.org/package/phash-image 84 | [gittip-image]: https://img.shields.io/gratipay/jonathanong.svg?style=flat-square 85 | [gittip-url]: https://gratipay.com/jonathanong/ 86 | -------------------------------------------------------------------------------- /binding.gyp: -------------------------------------------------------------------------------- 1 | { 2 | 'targets': [ 3 | { 4 | 'target_name': 'pHash', 5 | 'sources': [ 'phash.cpp' ], 6 | 'cflags!': [ 7 | '-fno-exceptions', 8 | ' Buffer 16 | */ 17 | 18 | function pHashImage(file, returnBigInt, cb) { 19 | if (typeof returnBigInt === 'function') { 20 | cb = returnBigInt; 21 | returnBigInt = false; 22 | } 23 | 24 | var promise = new Promise(function(resolve, reject) { 25 | pHash.imageHash(file, function(err, hash, bigint) { 26 | if (err) return reject(err); 27 | if (returnBigInt === true) return resolve(bigint); 28 | return resolve(hash); 29 | }); 30 | }); 31 | 32 | if (typeof cb === 'function') { 33 | promise.then(function (hash) { 34 | cb(null, hash); 35 | }, cb); 36 | } 37 | 38 | return promise; 39 | } 40 | 41 | /** 42 | * http://www.phash.org/docs/design.html 43 | * Returns an 72-byte buffer. 44 | * 45 | * @param {String} filename 46 | * @param {Function} callback(err, Buffer) 47 | * @return Promise => Buffer 48 | */ 49 | 50 | pHashImage.mh = function (file, callback) { 51 | var promise = new Promise(function (resolve, reject) { 52 | pHash.imageHashMH(file, function (err, hash) { 53 | if (err) return reject(err); 54 | resolve(hash); 55 | }); 56 | }); 57 | 58 | if (typeof callback === 'function') { 59 | promise.then(function phashReturned(hash) { 60 | callback(null, hash); 61 | }, callback); 62 | } 63 | 64 | return promise; 65 | } 66 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "phash-image", 3 | "version": "3.5.0", 4 | "description": "phash for images", 5 | "repository": "mgmtio/phash-image", 6 | "devDependencies": { 7 | "bluebird": "^3.1.5", 8 | "hamming-distance": "^1.0.0", 9 | "istanbul": "^0.4.2", 10 | "mocha": "^3.0.0", 11 | "node-gyp": "^3.3.0" 12 | }, 13 | "script": { 14 | "preinstall": "node-gyp configure build", 15 | "preuninstall": "rm -rf build/*" 16 | }, 17 | "scripts": { 18 | "build": "node-gyp configure build", 19 | "test": "npm run build && mocha", 20 | "test-cov": "npm run build && istanbul cover node_modules/mocha/bin/_mocha -- --reporter dot", 21 | "test-travis": "npm run build && istanbul cover node_modules/mocha/bin/_mocha --report lcovonly -- --reporter dot" 22 | }, 23 | "keywords": [ 24 | "pHash", 25 | "phash", 26 | "libpHash", 27 | "native", 28 | "binding", 29 | "addon" 30 | ], 31 | "author": "Aaron Marasco ", 32 | "contributors": [ 33 | "Aaron Marasco ", 34 | "Taeho Kim ", 35 | "Jeremy Dowell ", 36 | "Rod Vagg (https://github.com/rvagg)", 37 | "Jonathan Ong (https://github.com/jonathanong)" 38 | ], 39 | "dependencies": { 40 | "any-promise": "^1.1.0", 41 | "nan": "^2.0.5" 42 | }, 43 | "license": "MIT" 44 | } 45 | -------------------------------------------------------------------------------- /phash.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | using namespace node; 9 | 10 | bool fileExists(const char* filename) { 11 | ifstream file(filename); 12 | return !!file; 13 | } 14 | 15 | template 16 | string NumberToString ( T Number ) { 17 | ostringstream ss; 18 | ss << Number; 19 | return ss.str(); 20 | } 21 | 22 | // https://gist.github.com/rvagg/bb08a8bd2b6cbc264056#file-phash-cpp 23 | class PhashRequest : public Nan::AsyncWorker { 24 | public: 25 | PhashRequest(Nan::Callback *callback, string file) 26 | : Nan::AsyncWorker(callback), error(false), file(file), bigint("") {} 27 | ~PhashRequest() {} 28 | 29 | void Execute () { 30 | // prevent segfault on an empty file, see https://github.com/aaronm67/node-phash/issues/8 31 | const char* _file = file.c_str(); 32 | if (!fileExists(_file)) { 33 | error = true; 34 | return; 35 | } 36 | 37 | try { 38 | ph_dct_imagehash(_file, hash); 39 | bigint = NumberToString(hash); 40 | } 41 | catch(...) { 42 | error = true; 43 | // something went wrong with hashing 44 | // probably a CImg or ImageMagick IO Problem 45 | } 46 | } 47 | 48 | void HandleOKCallback () { 49 | Nan::HandleScope scope; 50 | 51 | v8::Local argv[3]; 52 | 53 | if (error) { 54 | argv[0] = Nan::Error("Error getting image phash."); 55 | } 56 | else { 57 | argv[0] = Nan::Null(); 58 | } 59 | 60 | // A bit messy - converts the ulong64 into a char* (byte array) needed to create a Buffer 61 | // The problem is that the number's bytes are in the reverse of the needed order 62 | // That's why this loop pulls values in reverse 63 | size_t size = sizeof hash; 64 | char* hashPtr = (char*) &hash; 65 | char* buffer = (char*) malloc(size); 66 | for (unsigned int i = 0; i < size; i++) { 67 | buffer[i] = hashPtr[size - i - 1]; 68 | } 69 | 70 | argv[1] = Nan::NewBuffer(buffer, size).ToLocalChecked(); 71 | argv[2] = Nan::New(bigint).ToLocalChecked(); 72 | 73 | callback->Call(3, argv); 74 | } 75 | 76 | private: 77 | bool error; 78 | string file; 79 | ulong64 hash; 80 | string bigint; 81 | }; 82 | 83 | NAN_METHOD(ImageHashAsync) { 84 | Nan::Utf8String str(info[0]); 85 | Nan::Callback *callback = new Nan::Callback(info[1].As()); 86 | Nan::AsyncQueueWorker(new PhashRequest(callback, string(*str))); 87 | return; 88 | } 89 | 90 | // https://gist.github.com/rvagg/bb08a8bd2b6cbc264056#file-phash-cpp 91 | class MHPhashRequest : public Nan::AsyncWorker { 92 | public: 93 | MHPhashRequest(Nan::Callback *callback, string file) 94 | : Nan::AsyncWorker(callback), error(false), file(file) {} 95 | ~MHPhashRequest() {} 96 | 97 | void Execute () { 98 | // prevent segfault on an empty file, see https://github.com/aaronm67/node-phash/issues/8 99 | const char* _file = file.c_str(); 100 | if (!fileExists(_file)) { 101 | error = true; 102 | return; 103 | } 104 | 105 | try { 106 | int alpha = 2; 107 | int level = 1; 108 | hash = ph_mh_imagehash(_file, hashlen, alpha, level); 109 | } 110 | catch(...) { 111 | error = true; 112 | // something went wrong with hashing 113 | // probably a CImg or ImageMagick IO Problem 114 | } 115 | } 116 | 117 | void HandleOKCallback () { 118 | Nan::HandleScope scope; 119 | 120 | v8::Local argv[2]; 121 | 122 | if (error) { 123 | argv[0] = Nan::Error("Error getting image phash."); 124 | } 125 | else { 126 | argv[0] = Nan::Null(); 127 | } 128 | 129 | argv[1] = Nan::NewBuffer((char*) hash, hashlen * sizeof hash[0]).ToLocalChecked(); 130 | 131 | callback->Call(2, argv); 132 | 133 | } 134 | 135 | private: 136 | bool error; 137 | string file; 138 | uint8_t* hash; 139 | int hashlen = 0; 140 | }; 141 | 142 | NAN_METHOD(MHImageHashAsync) { 143 | Nan::Utf8String str(info[0]); 144 | Nan::Callback *callback = new Nan::Callback(info[1].As()); 145 | Nan::AsyncQueueWorker(new MHPhashRequest(callback, string(*str))); 146 | return; 147 | } 148 | 149 | void RegisterModule(v8::Local target) { 150 | Nan::SetMethod(target, "imageHash", ImageHashAsync); 151 | Nan::SetMethod(target, "imageHashMH", MHImageHashAsync); 152 | } 153 | 154 | NODE_MODULE(pHash, RegisterModule); 155 | -------------------------------------------------------------------------------- /test/test.js: -------------------------------------------------------------------------------- 1 | 2 | var compare = require('hamming-distance'); 3 | var assert = require("assert"); 4 | 5 | var pHash = require('..'); 6 | 7 | var examples = [{ 8 | path: "./examples/a.jpg", 9 | hash: "27166fd624cb9439" 10 | }, { 11 | path: "./examples/c.png", 12 | hash: "d63078d8de3236c6" 13 | }, { 14 | path: "./examples/d.jpg", 15 | hash: "a71a2de6269b9469" 16 | }]; 17 | 18 | describe("pHash", function() { 19 | // https://github.com/aaronm67/node-phash/issues/8 20 | describe("invalid file test", function() { 21 | it("should fail", function(done) { 22 | pHash("fake/path/here", function(err, hash) { 23 | assert(err); 24 | done(); 25 | }); 26 | }); 27 | }) 28 | 29 | describe("async test", function() { 30 | var test = examples[0]; 31 | examples.forEach(function(i) { 32 | it('cb:' + i.path, function(done) { 33 | pHash(i.path, function(err, hash) { 34 | assert.ifError(err); 35 | assert.equal(i.hash, hash.toString('hex')); 36 | done(); 37 | }); 38 | }); 39 | 40 | it('promise:' + i.path, function() { 41 | return pHash(i.path).then(function(hash) { 42 | assert.equal(i.hash, hash.toString('hex')); 43 | }); 44 | }); 45 | }); 46 | 47 | it("cb: should fail", function(done) { 48 | pHash("../examples/f.png", function(err, hash) { 49 | assert(err); 50 | done(); 51 | }); 52 | }); 53 | 54 | it("promise: should fail", function() { 55 | return pHash("../examples/f.png").then(function() { 56 | throw new Error('boom'); 57 | }).catch(function(err) { 58 | assert(err.message !== 'boom'); 59 | }) 60 | }); 61 | }); 62 | 63 | describe('hammingDistance()', function() { 64 | it('should be done', function() { 65 | var hammingAA = compare(examples[0].hash, examples[0].hash); 66 | var hammingAC = compare(examples[0].hash, examples[1].hash); 67 | var hammingAD = compare(examples[0].hash, examples[2].hash); 68 | assert.equal(hammingAA, 0); 69 | assert.equal(hammingAC, 38); 70 | assert.equal(hammingAD, 12); 71 | }); 72 | }); 73 | 74 | it('should return a bigint', function () { 75 | return pHash('./examples/a.jpg', true).then(function (hash) { 76 | assert(/^[0-9]+$/.test(hash)) 77 | }) 78 | }) 79 | }); 80 | 81 | describe('MH', function () { 82 | it('should return a buffer as a promise', function () { 83 | return pHash.mh('./examples/a.jpg').then(function (buf) { 84 | assert(Buffer.isBuffer(buf)) 85 | assert.equal(72, buf.length) 86 | }) 87 | }) 88 | 89 | it('should return a buffer in the callback', function (done) { 90 | pHash.mh('./examples/a.jpg', function (err, buf) { 91 | if (err) return done(err) 92 | assert(Buffer.isBuffer(buf)) 93 | assert.equal(72, buf.length) 94 | done() 95 | }) 96 | }) 97 | }) 98 | --------------------------------------------------------------------------------