├── photohash ├── tests │ ├── __init__.py │ ├── assets │ │ ├── snow.jpg │ │ ├── minneapolis.jpg │ │ ├── santa_monica.jpg │ │ └── santa_monica_small.jpg │ └── test_photohash.py ├── __init__.py └── photohash.py ├── requirements.txt ├── .gitignore ├── MANIFEST.in ├── .travis.yml ├── contributors.md ├── .drone.yml ├── CHANGES.txt ├── setup.py ├── LICENSE.txt └── README.rst /photohash/tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | Pillow>=2.1.0 2 | pytest>=2.7.2 3 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | .DS_Store 3 | MANIFEST 4 | *.egg-info 5 | build/ 6 | dist/ -------------------------------------------------------------------------------- /photohash/__init__.py: -------------------------------------------------------------------------------- 1 | from .photohash import * 2 | 3 | __version__ = '0.4.1' 4 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include *.txt 2 | include *.rst 3 | recursive-include docs *.txt 4 | recursive-include docs *.rst -------------------------------------------------------------------------------- /photohash/tests/assets/snow.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bunchesofdonald/photohash/HEAD/photohash/tests/assets/snow.jpg -------------------------------------------------------------------------------- /photohash/tests/assets/minneapolis.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bunchesofdonald/photohash/HEAD/photohash/tests/assets/minneapolis.jpg -------------------------------------------------------------------------------- /photohash/tests/assets/santa_monica.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bunchesofdonald/photohash/HEAD/photohash/tests/assets/santa_monica.jpg -------------------------------------------------------------------------------- /photohash/tests/assets/santa_monica_small.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bunchesofdonald/photohash/HEAD/photohash/tests/assets/santa_monica_small.jpg -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: python 2 | python: 3 | - "2.7" 4 | - "3.4" 5 | install: "pip install -r requirements.txt" 6 | script: py.test 7 | sudo: false 8 | -------------------------------------------------------------------------------- /contributors.md: -------------------------------------------------------------------------------- 1 | # Photohash Contributors 2 | 3 | - [BunchesOfDonald](https://github.com/bunchesofdonald/) 4 | - [bebound](https://github.com/bebound) 5 | - [kaikuehne](https://github.com/kaikuehne) 6 | - [yoshpe](https://github.com/yoshpe) 7 | - [ahawker](https://github.com/ahawker) -------------------------------------------------------------------------------- /.drone.yml: -------------------------------------------------------------------------------- 1 | --- 2 | kind: pipeline 3 | name: python-2 4 | 5 | steps: 6 | - name: test 7 | image: python:2 8 | commands: 9 | - pip install -r requirements.txt 10 | - pytest 11 | --- 12 | kind: pipeline 13 | name: python-3 14 | 15 | steps: 16 | - name: test 17 | image: python:3 18 | commands: 19 | - pip install -r requirements.txt 20 | - pytest 21 | -------------------------------------------------------------------------------- /CHANGES.txt: -------------------------------------------------------------------------------- 1 | v0.4.1, 2016-09-24 -- Fix issue where `distance` wasn't being imported by __init__. 2 | v0.3.2, 2015-08-01 -- Fixes a loss of precision when using hash_size other than 8. 3 | v0.3.1, 2015-07-25 -- Fixes a memory leak in average hash algorithm. 4 | v0.2.1, 2013-11-27 -- Fixes license in setup.py. 5 | v0.2.0, 2013-11-27 -- Changes average hash algorithm to convert image to 6 | grayscale for more accurate hashing. Drops bitarray dependency. 7 | v0.1.0, 2013-08-15 -- Initial release. -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup, find_packages 2 | 3 | setup( 4 | author='Chris Pickett', 5 | author_email='chris.pickett@gmail.com', 6 | classifiers=[ 7 | 'Development Status :: 4 - Beta', 8 | 'Intended Audience :: Developers', 9 | 'License :: OSI Approved :: MIT License', 10 | 'Operating System :: OS Independent', 11 | 'Programming Language :: Python', 12 | 'Programming Language :: Python :: 2', 13 | 'Programming Language :: Python :: 3', 14 | 'Topic :: Scientific/Engineering :: Image Recognition', 15 | ], 16 | description='A Python Perceptual Image Hashing Module', 17 | license='MIT', 18 | long_description=open('README.rst').read(), 19 | name='Photohash', 20 | packages=find_packages(), 21 | url='https://github.com/bunchesofdonald/photohash', 22 | version='0.4.1', 23 | install_requires=[ 24 | 'Pillow>=2.1.0', 25 | ], 26 | ) 27 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2013 Christopher J Pickett 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy of 6 | this software and associated documentation files (the "Software"), to deal in 7 | the Software without restriction, including without limitation the rights to 8 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of 9 | the Software, and to permit persons to whom the Software is furnished to do so, 10 | subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 17 | FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 18 | COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 19 | IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 20 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -------------------------------------------------------------------------------- /photohash/photohash.py: -------------------------------------------------------------------------------- 1 | from __future__ import division 2 | 3 | from PIL import Image 4 | 5 | 6 | def hash_distance(left_hash, right_hash): 7 | """Compute the hamming distance between two hashes""" 8 | if len(left_hash) != len(right_hash): 9 | raise ValueError('Hamming distance requires two strings of equal length') 10 | 11 | return sum(map(lambda x: 0 if x[0] == x[1] else 1, zip(left_hash, right_hash))) 12 | 13 | 14 | def hashes_are_similar(left_hash, right_hash, tolerance=6): 15 | """ 16 | Return True if the hamming distance between 17 | the image hashes are less than the given tolerance. 18 | """ 19 | return hash_distance(left_hash, right_hash) <= tolerance 20 | 21 | 22 | def average_hash(image_path, hash_size=8): 23 | """ Compute the average hash of the given image. """ 24 | with open(image_path, 'rb') as f: 25 | # Open the image, resize it and convert it to black & white. 26 | image = Image.open(f).resize((hash_size, hash_size), Image.ANTIALIAS).convert('L') 27 | pixels = list(image.getdata()) 28 | 29 | avg = sum(pixels) / len(pixels) 30 | 31 | # Compute the hash based on each pixels value compared to the average. 32 | bits = "".join(map(lambda pixel: '1' if pixel > avg else '0', pixels)) 33 | hashformat = "0{hashlength}x".format(hashlength=hash_size ** 2 // 4) 34 | return int(bits, 2).__format__(hashformat) 35 | 36 | 37 | def distance(image_path, other_image_path): 38 | """ Compute the hamming distance between two images""" 39 | image_hash = average_hash(image_path) 40 | other_image_hash = average_hash(other_image_path) 41 | 42 | return hash_distance(image_hash, other_image_hash) 43 | 44 | 45 | def is_look_alike(image_path, other_image_path, tolerance=6): 46 | image_hash = average_hash(image_path) 47 | other_image_hash = average_hash(other_image_path) 48 | 49 | return hashes_are_similar(image_hash, other_image_hash, tolerance) 50 | -------------------------------------------------------------------------------- /README.rst: -------------------------------------------------------------------------------- 1 | ========= 2 | PhotoHash 3 | ========= 4 | 5 | .. image:: https://travis-ci.org/bunchesofdonald/django-hermes.svg?branch=master 6 | :target: https://travis-ci.org/bunchesofdonald/photohash 7 | 8 | This was mainly created just for my own use and education. It's a perceptual 9 | hash algorithm, used to find if two images are similar. 10 | 11 | Installation 12 | ============ 13 | 14 | :: 15 | 16 | pip install PhotoHash 17 | 18 | 19 | Usage 20 | ===== 21 | 22 | average_hash 23 | ------------ 24 | Returns the hash of the image using an average hash algorithm. This algorithm 25 | compares each pixel in the image to the average value of all the pixels.:: 26 | 27 | import photohash 28 | hash = photohash.average_hash('/path/to/myimage.jpg') 29 | 30 | distance 31 | -------- 32 | Returns the hamming distance between the average_hash of the given images.:: 33 | 34 | import photohash 35 | distance = photohash.distance('/path/to/myimage.jpg', '/path/to/myotherimage.jpg') 36 | 37 | is_look_alike 38 | ------------- 39 | Returns a boolean of whether or not the photos look similar.:: 40 | 41 | import photohash 42 | similar = photohash.is_look_alike('/path/to/myimage.jpg', '/path/to/myotherimage.jpg') 43 | 44 | is_look_alike also takes an optional tolerance argument that defines how strict 45 | the comparison should be.:: 46 | 47 | import photohash 48 | similar = photohash.is_look_alike('/path/to/myimage.jpg', '/path/to/myimage.jpg', tolerance=3) 49 | 50 | hash_distance 51 | ------------- 52 | Returns the hamming distance between two hashes of the same length:: 53 | 54 | import photohash 55 | hash_one = average_hash('/path/to/myimage.jpg') 56 | hash_two = average_hash('/path/to/myotherimage.jpg') 57 | distance = photohash.hash_distance(hash_one, hash_two) 58 | 59 | hashes_are_similar 60 | ------------------ 61 | Returns a boolean of whether or not the two hashes are within the given tolerance. Same as 62 | is_look_alike, but takes hashes instead of image paths:: 63 | 64 | import photohash 65 | hash_one = average_hash('/path/to/myimage.jpg') 66 | hash_two = average_hash('/path/to/myotherimage.jpg') 67 | similar = photohash.hashes_are_similar(hash_one, hash_two) 68 | 69 | hashes_are_similar also takes the same optional tolerance argument that is_look_alike does. 70 | -------------------------------------------------------------------------------- /photohash/tests/test_photohash.py: -------------------------------------------------------------------------------- 1 | from os.path import abspath, dirname, join 2 | import unittest 3 | 4 | from photohash.photohash import average_hash, distance, is_look_alike, hash_distance 5 | 6 | TESTS_ROOT = join(dirname(abspath(__file__))) 7 | ASSETS_ROOT = join(TESTS_ROOT, 'assets') 8 | MODULE_ROOT = join(TESTS_ROOT, '../') 9 | 10 | 11 | class PhotoHashTestCase(unittest.TestCase): 12 | def setUp(self): 13 | self.photos = [ 14 | { 15 | 'path': join(ASSETS_ROOT, 'minneapolis.jpg'), 16 | 'average_hash': 'ffffffffc7000000', 17 | }, 18 | { 19 | 'path': join(ASSETS_ROOT, 'santa_monica.jpg'), 20 | 'average_hash': '01ffffff1f3f0008', 21 | }, 22 | { 23 | 'path': join(ASSETS_ROOT, 'snow.jpg'), 24 | 'average_hash': '67047df9fcf0408f', 25 | }, 26 | { 27 | 'path': join(ASSETS_ROOT, 'santa_monica_small.jpg'), 28 | 'average_hash': '01ffffff1f3f0008', 29 | }, 30 | 31 | ] 32 | 33 | def test_average_hash(self): 34 | """average_hash should output the expected hash for each test image""" 35 | for photo in self.photos: 36 | self.assertEqual(photo['average_hash'], average_hash(photo['path'])) 37 | 38 | def test_hash_distance(self): 39 | """hash_distance should know the hamming distance between two strings""" 40 | self.assertEqual(hash_distance('roses', 'toned'), 3) 41 | self.assertEqual(hash_distance('are', 'are'), 0) 42 | self.assertEqual(hash_distance('read', 'daer'), 4) 43 | 44 | def testhash_distance_same_length_required(self): 45 | """hash_distance should throw a ValueError if the two strings are not the same length""" 46 | self.assertRaises(ValueError, hash_distance, 'short', 'very long') 47 | 48 | def test_distance(self): 49 | """distance should know the distance between the average_hash of two test images""" 50 | for i in range(len(self.photos)): 51 | for j in range(i, len(self.photos)): 52 | hamming_distance = hash_distance( 53 | self.photos[i]['average_hash'], 54 | self.photos[j]['average_hash'] 55 | ) 56 | 57 | self.assertEqual( 58 | hamming_distance, 59 | distance(self.photos[i]['path'], self.photos[j]['path']) 60 | ) 61 | 62 | def test_is_look_alike(self): 63 | """is_look_alike should know if two images look similar""" 64 | 65 | # Test that the same image will return True. 66 | self.assertTrue(is_look_alike(self.photos[2]['path'], self.photos[2]['path'])) 67 | 68 | # And if we use the most strict tolerance. 69 | self.assertTrue(is_look_alike(self.photos[2]['path'], self.photos[2]['path'], tolerance=0)) 70 | 71 | # And if we use the least strict tolerance. 72 | self.assertTrue( 73 | is_look_alike(self.photos[2]['path'], self.photos[2]['path'], tolerance=16) 74 | ) 75 | 76 | # Test that different images return False 77 | self.assertFalse(is_look_alike(self.photos[0]['path'], self.photos[1]['path'])) 78 | 79 | # Test that a scaled verision of the same image is within default tolerance. 80 | self.assertTrue(is_look_alike(self.photos[1]['path'], self.photos[3]['path'])) 81 | --------------------------------------------------------------------------------