├── requirements-linting-old.txt ├── requirements-conda.txt ├── MANIFEST.in ├── imagehash ├── py.typed └── __init__.py ├── tests ├── data │ ├── peppers.png │ └── imagehash.png ├── __init__.py ├── test_readme.py ├── test_dhash.py ├── test_phash.py ├── test_average_hash.py ├── test_old_hex_conversions.py ├── test_hex_conversions_multihash.py ├── test_colorhash.py ├── utils.py ├── test_whash.py ├── test_hex_conversions.py ├── test_crop_resistant_hash.py └── test_hash_is_constant.py ├── requirements-linting-anaconda.txt ├── requirements-linting.txt ├── .editorconfig ├── .gitignore ├── .bumpversion.cfg ├── .coveragerc ├── examples ├── github-urls.txt ├── run_art.sh ├── crop_resistance.py ├── crop_resistant_segmentation.py ├── run_icons.sh └── hashimages.py ├── setup.cfg ├── setup.py ├── LICENSE ├── find_similar_images.py ├── .github └── workflows │ └── testing.yml ├── Makefile ├── README.rst └── output.html /requirements-linting-old.txt: -------------------------------------------------------------------------------- 1 | autopep8 2 | flake8 3 | mccabe 4 | -------------------------------------------------------------------------------- /requirements-conda.txt: -------------------------------------------------------------------------------- 1 | pillow 2 | numpy 3 | scipy 4 | pywavelets 5 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include README.rst 2 | include *.txt 3 | include LICENSE 4 | -------------------------------------------------------------------------------- /imagehash/py.typed: -------------------------------------------------------------------------------- 1 | # Marker file for PEP 561. The imagehash package uses inline types. 2 | -------------------------------------------------------------------------------- /tests/data/peppers.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JohannesBuchner/imagehash/HEAD/tests/data/peppers.png -------------------------------------------------------------------------------- /tests/data/imagehash.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JohannesBuchner/imagehash/HEAD/tests/data/imagehash.png -------------------------------------------------------------------------------- /requirements-linting-anaconda.txt: -------------------------------------------------------------------------------- 1 | autopep8 2 | flake8 3 | isort 4 | mccabe 5 | pillow 6 | numpy 7 | scipy 8 | pywavelets 9 | -------------------------------------------------------------------------------- /requirements-linting.txt: -------------------------------------------------------------------------------- 1 | autopep8 2 | flake8 3 | flake8-bugbear 4 | flake8-isort 5 | flake8-simplify 6 | isort 7 | mccabe 8 | -------------------------------------------------------------------------------- /.editorconfig: -------------------------------------------------------------------------------- 1 | [*] 2 | indent_style = tab 3 | indent_size = 2 4 | [*.yml] 5 | indent_style = space 6 | [*.py] 7 | indent_size = 4 8 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *~ 2 | *.pyc 3 | env 4 | *.jpg 5 | build 6 | dist 7 | ImageHash.egg-info/ 8 | .eggs 9 | .DS_Store 10 | .python-version 11 | .coverage 12 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import, division, print_function 2 | 3 | from .utils import TestImageHash # noqa: F401 Testing the import 4 | -------------------------------------------------------------------------------- /.bumpversion.cfg: -------------------------------------------------------------------------------- 1 | [bumpversion] 2 | current_version = 4.3.2 3 | commit = True 4 | tag = True 5 | 6 | [bumpversion:file:setup.py] 7 | 8 | [bumpversion:file:imagehash/__init__.py] 9 | -------------------------------------------------------------------------------- /.coveragerc: -------------------------------------------------------------------------------- 1 | # 2 | # .coveragerc to control coverage.py 3 | # 4 | 5 | [run] 6 | branch = True 7 | include = 8 | imagehash/__init__.py 9 | find_similar_images.py 10 | 11 | 12 | [report] 13 | exclude_lines = 14 | pragma: no cover 15 | def __repr__ 16 | if __name__ == .__main__.: 17 | -------------------------------------------------------------------------------- /examples/github-urls.txt: -------------------------------------------------------------------------------- 1 | https://github.com/akveo/eva-icons 2 | https://github.com/CodeMouse92/VividityIcons 3 | https://github.com/franksouza183/Evolvere-Icons 4 | https://github.com/HackeSta/atom-icons 5 | https://github.com/icons8/welovesvg 6 | https://github.com/joaobborges/minimal-icons 7 | https://github.com/pluwen/awesome-iconjar 8 | https://github.com/simple-icons/simple-icons 9 | https://github.com/synthagency/icons-flat-osx 10 | https://github.com/twbs/icons 11 | -------------------------------------------------------------------------------- /tests/test_readme.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import six 4 | 5 | 6 | def test_run(): 7 | # test code in README.rst file 8 | # find any chunks after :: 9 | # which code lines, which start with >>> 10 | parent_dir = os.path.dirname(os.path.dirname(__file__)) 11 | with open(os.path.join(parent_dir, 'README.rst')) as f: 12 | chunk = [line.replace('\t>>> ', '') for line in f if line.startswith('\t>>> ')] 13 | 14 | code = ''.join(chunk) 15 | print("running::\n" + code) 16 | print("result:", six.exec_(code, {}, {})) 17 | -------------------------------------------------------------------------------- /examples/run_art.sh: -------------------------------------------------------------------------------- 1 | 2 | for j in 2 3 4 5 6 7 8 9 10 11 3 | do 4 | echo "${j} ..." 5 | paste urls.txt hashes.txt | 6 | grep -v '0000000000000000 0000000000000000 0000000000000000' | 7 | awk '{ print $1,$'$((j+1))'}' > hashesfull.txt 8 | 9 | Cluster $k"; awk '($2 == "'"$k"'"){print $1}' hashesfull.txt | 12 | while read path; do echo ""; done 13 | } 14 | done > art${j}.html 15 | done 16 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [bdist_wheel] 2 | universal = 1 3 | 4 | [flake8] 5 | count = True 6 | statistics = True 7 | max-line-length = 127 8 | max-complexity = 10 9 | avoid-escape=True 10 | ; This ignore differs than autopep8's ignore as to not autofix tabs to spaces, but still warn when mixed 11 | ; variable "hash" is shadowing a python builtin 12 | ; tabs are prefered indentation; 13 | ; Bug with pycodestyle for Python 2.7 where it thinks everything is over-indented with tabs 14 | ignore= 15 | A001, 16 | W191,E111 17 | E117 18 | per-file-ignores= 19 | ; False positive with multiline strings https://github.com/PyCQA/pycodestyle/issues/376 20 | find_similar_images.py: E101 21 | -------------------------------------------------------------------------------- /tests/test_dhash.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import, division, print_function 2 | 3 | import unittest 4 | 5 | import imagehash 6 | 7 | from .utils import TestImageHash 8 | 9 | 10 | class Test(TestImageHash): 11 | def setUp(self): 12 | self.image = self.get_data_image() 13 | self.func = imagehash.dhash 14 | 15 | def test_dhash(self): 16 | self.check_hash_algorithm(self.func, self.image) 17 | 18 | def test_dhash_length(self): 19 | self.check_hash_length(self.func, self.image) 20 | 21 | def test_dhash_stored(self): 22 | self.check_hash_stored(self.func, self.image) 23 | 24 | def test_dhash_size(self): 25 | self.check_hash_size(self.func, self.image) 26 | 27 | 28 | if __name__ == '__main__': 29 | unittest.main() 30 | -------------------------------------------------------------------------------- /tests/test_phash.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import, division, print_function 2 | 3 | import unittest 4 | 5 | import imagehash 6 | 7 | from .utils import TestImageHash 8 | 9 | 10 | class Test(TestImageHash): 11 | def setUp(self): 12 | self.image = self.get_data_image() 13 | self.func = imagehash.phash 14 | 15 | def test_phash(self): 16 | self.check_hash_algorithm(self.func, self.image) 17 | 18 | def test_phash_length(self): 19 | self.check_hash_length(self.func, self.image) 20 | 21 | def test_phash_stored(self): 22 | self.check_hash_stored(self.func, self.image) 23 | 24 | def test_phash_size(self): 25 | self.check_hash_size(self.func, self.image) 26 | 27 | 28 | if __name__ == '__main__': 29 | unittest.main() 30 | -------------------------------------------------------------------------------- /tests/test_average_hash.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import, division, print_function 2 | 3 | import unittest 4 | 5 | import imagehash 6 | 7 | from .utils import TestImageHash 8 | 9 | 10 | class Test(TestImageHash): 11 | def setUp(self): 12 | self.image = self.get_data_image() 13 | self.func = imagehash.average_hash 14 | 15 | def test_average_hash(self): 16 | self.check_hash_algorithm(self.func, self.image) 17 | 18 | def test_average_hash_length(self): 19 | self.check_hash_length(self.func, self.image) 20 | 21 | def test_average_hash_stored(self): 22 | self.check_hash_stored(self.func, self.image) 23 | 24 | def test_average_hash_size(self): 25 | self.check_hash_size(self.func, self.image) 26 | 27 | 28 | if __name__ == '__main__': 29 | unittest.main() 30 | -------------------------------------------------------------------------------- /examples/crop_resistance.py: -------------------------------------------------------------------------------- 1 | from PIL import Image 2 | 3 | import imagehash 4 | 5 | SAVE_IMAGES = False 6 | 7 | # Load image 8 | full_image = Image.open('../tests/data/peppers.png') 9 | width, height = full_image.size 10 | # Hash it 11 | full_hash = imagehash.crop_resistant_hash(full_image) 12 | 13 | # Crop it 14 | for x in range(5, 50, 5): 15 | start = x / 100 16 | end = 1 - start 17 | crop_img = full_image.crop((start * width, start * height, end * width, end * height)) 18 | crop_hash = imagehash.crop_resistant_hash(crop_img) 19 | if SAVE_IMAGES: 20 | crop_img.save('crop_{}.png'.format(str(x).zfill(2))) 21 | crop_diff = full_hash.hash_diff(crop_hash) 22 | print( 23 | 'Cropped {}% from each side. Hash has {} matching segments with {} total hamming distance'.format( 24 | x, crop_diff[0], crop_diff[1] 25 | ) 26 | ) 27 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | try: 5 | from setuptools import setup 6 | except BaseException: 7 | from distutils.core import setup 8 | 9 | long_description = '' 10 | with open('README.rst') as f: 11 | long_description = f.read() 12 | 13 | setup( 14 | name='ImageHash', 15 | version='4.3.2', 16 | author='Johannes Buchner', 17 | author_email='buchner.johannes@gmx.at', 18 | packages=['imagehash'], 19 | package_data={'imagehash': ['py.typed']}, 20 | data_files=[('images', ['tests/data/imagehash.png'])], 21 | scripts=['find_similar_images.py'], 22 | url='https://github.com/JohannesBuchner/imagehash', 23 | license='2-clause BSD License', 24 | description='Image Hashing library', 25 | long_description=long_description, 26 | long_description_content_type='text/x-rst', 27 | install_requires=[ 28 | 'numpy', 29 | 'scipy', # for phash 30 | 'pillow', # or PIL 31 | 'PyWavelets', # for whash 32 | ], 33 | test_suite='tests', 34 | tests_require=['pytest>=3'], 35 | ) 36 | -------------------------------------------------------------------------------- /tests/test_old_hex_conversions.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | import numpy as np 4 | 5 | import imagehash 6 | 7 | # Each row is a test case where the first value is a hexadecimal 8 | # sequence and the second value is the expected bool array for it. 9 | old_hexadecimal_to_bool_array = [ 10 | ['ffeb89818193ffff', np.array([ 11 | [True, True, True, True, True, True, True, True], 12 | [True, True, False, True, False, True, True, True], 13 | [True, False, False, True, False, False, False, True], 14 | [True, False, False, False, False, False, False, True], 15 | [True, False, False, False, False, False, False, True], 16 | [True, True, False, False, True, False, False, True], 17 | [True, True, True, True, True, True, True, True], 18 | [True, True, True, True, True, True, True, True]])], 19 | ] 20 | 21 | 22 | class TestOldHexConversions(unittest.TestCase): 23 | 24 | def setUp(self): 25 | self.from_hex = imagehash.old_hex_to_hash 26 | 27 | def test_hex_to_hash_output(self): 28 | for case in old_hexadecimal_to_bool_array: 29 | self.assertTrue(np.array_equal(case[1], self.from_hex(case[0]).hash)) 30 | 31 | 32 | if __name__ == '__main__': 33 | unittest.main() 34 | -------------------------------------------------------------------------------- /examples/crop_resistant_segmentation.py: -------------------------------------------------------------------------------- 1 | import numpy 2 | from PIL import Image, ImageFilter 3 | 4 | import imagehash 5 | 6 | IMAGE_FILE = '../tests/data/peppers.png' 7 | IMG_SIZE = 300 8 | SEGMENT_THRESHOLD = 128 9 | MIN_SEGMENT_SIZE = 500 10 | RAINBOW = [ 11 | (141, 211, 199), 12 | (255, 255, 179), 13 | (190, 186, 218), 14 | (251, 128, 114), 15 | (128, 177, 211), 16 | (253, 180, 98), 17 | (179, 222, 105), 18 | (252, 205, 229), 19 | (217, 217, 217), 20 | (188, 128, 189) 21 | ] 22 | 23 | # Load image 24 | full_image = Image.open(IMAGE_FILE) 25 | width, height = full_image.size 26 | # Image pre-processing 27 | image = full_image.convert('L').resize((IMG_SIZE, IMG_SIZE), Image.ANTIALIAS) 28 | # Add filters 29 | image = image.filter(ImageFilter.GaussianBlur()).filter(ImageFilter.MedianFilter()) 30 | pixels = numpy.array(image).astype(numpy.float32) 31 | # Split segments 32 | segments = imagehash._find_all_segments(pixels, SEGMENT_THRESHOLD, MIN_SEGMENT_SIZE) 33 | # Change back to RGB 34 | image = image.convert('RGB') 35 | # Colour in segments 36 | for num, segment in enumerate(segments): 37 | for x, y in segment: 38 | image.putpixel((y, x), RAINBOW[num % len(RAINBOW)]) 39 | image.show() 40 | -------------------------------------------------------------------------------- /tests/test_hex_conversions_multihash.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import, division, print_function 2 | 3 | import unittest 4 | 5 | import imagehash 6 | 7 | from .utils import TestImageHash 8 | 9 | 10 | class Test (TestImageHash): 11 | def setUp(self): 12 | self.image = self.get_data_image() 13 | 14 | def test_hex_to_multi_hash(self): 15 | generated_hash = imagehash.crop_resistant_hash( 16 | self.image, 17 | # these arguments are required for a true multi image hash with multiple segments 18 | min_segment_size=500, segmentation_image_size=1000 19 | ) 20 | string = str(generated_hash) 21 | emsg = ('Stringified multihash did not match original hash') 22 | self.assertEqual( 23 | generated_hash, 24 | imagehash.hex_to_multihash(string), 25 | emsg 26 | ) 27 | string = '0026273b2b19550e,6286373334662535,6636192c47639573,999d6d67a3e82125,27a327c38191a4ad,938971382b328a46' 28 | emsg = ('Stringified multihash did not match hardcoded original hash') 29 | self.assertEqual( 30 | generated_hash, 31 | imagehash.hex_to_multihash(string), 32 | emsg 33 | ) 34 | 35 | 36 | if __name__ == '__main__': 37 | unittest.main() 38 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2013-2022, Johannes Buchner 2 | All rights reserved. 3 | 4 | Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 5 | 6 | Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 7 | Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 8 | 9 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 10 | 11 | -------------------------------------------------------------------------------- /examples/run_icons.sh: -------------------------------------------------------------------------------- 1 | cat github-urls.txt | while read url; do git clone $url; done 2 | 3 | find -name '*.svg' | while read path; do convert $path ${path/.svg/.png}; done 4 | 5 | echo "collecting files ..." 6 | for i in */; do pushd $i >/dev/null; prefix=$(git remote get-url origin|sed 's,https://github.com/,https://raw.githubusercontent.com/,g'); find */ -name '*.svg' | while read path; do test -e "${path/.svg/.png}" && echo $prefix/master/$path $prefix; done; popd >/dev/null; done | 7 | grep -Ev '\.min\.' > urls.txt 8 | echo "hashing ..." 9 | for i in */; do pushd $i >/dev/null; prefix=$(git remote get-url origin|sed 's,https://github.com/,https://raw.githubusercontent.com/,g'); find */ -name '*.svg' | while read path; do test -e "${path/.svg/.png}" && echo $i/${path/.svg/.png}; done; popd >/dev/null; done| 10 | grep -vE '\.min\.' | xargs python3 ~/Downloads/imagehash/hashimage.py > hashes.txt 11 | 12 | for j in 2 3 4 5 6 7 8 9 10 11 12 13 | do 14 | echo "${j} ..." 15 | paste urls.txt hashes.txt | grep -v '0000000000000000 0000000000000000 0000000000000000' | 16 | awk '{print $1,$2,$'$((j+2))'}' > urlhashes.txt 17 | sort -k3,3 urlhashes.txt | uniq -f 2 -D | awk '{print $3}' | uniq | 18 | while read k; do 19 | { echo "

Cluster $k

"; awk '($3 == "'$k'"){print " "}' urlhashes.txt; } 20 | done > index${j}.html 21 | done 22 | -------------------------------------------------------------------------------- /examples/hashimages.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | from __future__ import absolute_import, division, print_function 3 | 4 | import sys 5 | 6 | import numpy as np 7 | from PIL import Image 8 | 9 | import imagehash 10 | 11 | hashfuncs = [ 12 | ('ahash', imagehash.average_hash), 13 | ('phash', imagehash.phash), 14 | ('dhash', imagehash.dhash), 15 | ('whash-haar', imagehash.whash), 16 | ('whash-db4', lambda img: imagehash.whash(img, mode='db4')), 17 | ('colorhash', imagehash.colorhash), 18 | ] 19 | 20 | 21 | def alpharemover(image): 22 | if image.mode != 'RGBA': 23 | return image 24 | canvas = Image.new('RGBA', image.size, (255, 255, 255, 255)) 25 | canvas.paste(image, mask=image) 26 | return canvas.convert('RGB') 27 | 28 | 29 | def image_loader(hashfunc, hash_size=8): 30 | def function(path): 31 | image = alpharemover(Image.open(path)) 32 | return hashfunc(image) 33 | return function 34 | 35 | 36 | def with_ztransform_preprocess(hashfunc, hash_size=8): 37 | def function(path): 38 | image = alpharemover(Image.open(path)) 39 | image = image.convert('L').resize((hash_size, hash_size), Image.ANTIALIAS) 40 | data = image.getdata() 41 | quantiles = np.arange(100) 42 | quantiles_values = np.percentile(data, quantiles) 43 | zdata = (np.interp(data, quantiles_values, quantiles) / 100 * 255).astype(np.uint8) 44 | image.putdata(zdata) 45 | return hashfunc(image) 46 | return function 47 | 48 | 49 | hashfuncopeners = [(name, image_loader(func)) for name, func in hashfuncs] 50 | hashfuncopeners += [(name + '-z', with_ztransform_preprocess(func)) for name, func in hashfuncs if name != 'colorhash'] 51 | 52 | files = sys.argv[1:] 53 | for path in files: 54 | hashes = [str(hashfuncopener(path)) for name, hashfuncopener in hashfuncopeners] 55 | print(path, ' '.join(hashes)) 56 | # print(path, colorhash(path)) 57 | -------------------------------------------------------------------------------- /tests/test_colorhash.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import, division, print_function 2 | 3 | import unittest 4 | 5 | import imagehash 6 | 7 | from .utils import TestImageHash 8 | 9 | CHECK_HASH_DEFAULT = range(2, 5) 10 | CHECK_HASH_SIZE_DEFAULT = range(-1, 1) 11 | 12 | 13 | class Test(TestImageHash): 14 | def setUp(self): 15 | self.image = self.get_data_image() 16 | self.func = imagehash.colorhash 17 | 18 | def test_colorhash(self): 19 | self.check_hash_algorithm(self.func, self.image) 20 | 21 | def check_hash_algorithm(self, func, image): 22 | original_hash = func(image) 23 | rotate_image = image.rotate(-1) 24 | rotate_hash = func(rotate_image) 25 | distance = original_hash - rotate_hash 26 | emsg = ('slightly rotated image should have similar hash {} {} {}'.format(original_hash, rotate_hash, distance)) 27 | self.assertTrue(distance <= 10, emsg) 28 | self.assertEqual(original_hash, rotate_hash, emsg) 29 | rotate_image = image.rotate(180) 30 | rotate_hash = func(rotate_image) 31 | emsg = ('flipped image should have same hash {} {}'.format(original_hash, rotate_hash)) 32 | self.assertEqual(original_hash, rotate_hash, emsg) 33 | 34 | def test_colorhash_stored(self): 35 | self.check_hash_stored(self.func, self.image) 36 | 37 | def test_colorhash_length(self): 38 | self.check_hash_length(self.func, self.image) 39 | 40 | def test_colorhash_size(self): 41 | self.check_hash_size(self.func, self.image) 42 | 43 | def check_hash_stored(self, func, image, binbits=CHECK_HASH_DEFAULT): 44 | for bit in binbits: 45 | image_hash = func(image, bit) 46 | other_hash = imagehash.hex_to_flathash(str(image_hash), bit * (2 + 6 * 2)) 47 | emsg = 'stringified hash {} != original hash {}'.format(other_hash, image_hash) 48 | self.assertEqual(image_hash, other_hash, emsg) 49 | distance = image_hash - other_hash 50 | emsg = ('unexpected hamming distance {}: original hash {} - stringified hash {}'.format(distance, image_hash, other_hash)) 51 | self.assertEqual(distance, 0, emsg) 52 | 53 | def check_hash_length(self, func, image, binbits=CHECK_HASH_DEFAULT): 54 | for bit in binbits: 55 | image_hash = func(image, bit) 56 | emsg = 'bit={} is not respected'.format(bit) 57 | self.assertEqual(image_hash.hash.size, (2 + 6 * 2) * bit, emsg) 58 | 59 | def check_hash_size(self, func, image, binbits=CHECK_HASH_SIZE_DEFAULT): 60 | for bit in binbits: 61 | with self.assertRaises(ValueError): 62 | func(image, bit) 63 | 64 | 65 | if __name__ == '__main__': 66 | unittest.main() 67 | -------------------------------------------------------------------------------- /tests/utils.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import, division, print_function 2 | 3 | import os 4 | import os.path 5 | import unittest 6 | 7 | from PIL import Image 8 | 9 | import imagehash 10 | 11 | CHECK_HASH_DEFAULT = range(2, 21) 12 | CHECK_HASH_SIZE_DEFAULT = range(-1, 2) 13 | 14 | 15 | class TestImageHash(unittest.TestCase): 16 | @staticmethod 17 | def get_data_image(fname=None): 18 | if fname is None: 19 | fname = 'imagehash.png' 20 | dname = os.path.abspath(os.path.dirname(__file__)) 21 | target = os.path.join(dname, 'data', fname) 22 | if not os.path.isfile(target): 23 | emsg = 'Unknown test image file: {!r}' 24 | raise ValueError(emsg.format(target)) 25 | return Image.open(target) 26 | 27 | def check_hash_algorithm(self, func, image): 28 | original_hash = func(image) 29 | rotate_image = image.rotate(-1) 30 | rotate_hash = func(rotate_image) 31 | distance = original_hash - rotate_hash 32 | emsg = ('slightly rotated image should have similar hash {} {} {}'.format(original_hash, rotate_hash, distance)) 33 | self.assertTrue(distance <= 10, emsg) 34 | rotate_image = image.rotate(-90) 35 | rotate_hash = func(rotate_image) 36 | emsg = ('rotated image should have different hash {} {}'.format(original_hash, rotate_hash)) 37 | self.assertNotEqual(original_hash, rotate_hash, emsg) 38 | distance = original_hash - rotate_hash 39 | emsg = ('rotated image should have larger different hash {} {} {}'.format(original_hash, rotate_hash, distance)) 40 | self.assertTrue(distance > 10, emsg) 41 | 42 | def check_hash_length(self, func, image, sizes=CHECK_HASH_DEFAULT): 43 | for hash_size in sizes: 44 | image_hash = func(image, hash_size=hash_size) 45 | emsg = 'hash_size={} is not respected'.format(hash_size) 46 | self.assertEqual(image_hash.hash.size, hash_size**2, emsg) 47 | 48 | def check_hash_stored(self, func, image, sizes=CHECK_HASH_DEFAULT): 49 | for hash_size in sizes: 50 | image_hash = func(image, hash_size) 51 | other_hash = imagehash.hex_to_hash(str(image_hash)) 52 | emsg = 'stringified hash {} != original hash {}'.format(other_hash, image_hash) 53 | self.assertEqual(image_hash, other_hash, emsg) 54 | distance = image_hash - other_hash 55 | emsg = ('unexpected hamming distance {}: original hash {} - stringified hash {}'.format(distance, image_hash, other_hash)) 56 | self.assertEqual(distance, 0, emsg) 57 | 58 | def check_hash_size(self, func, image, sizes=CHECK_HASH_SIZE_DEFAULT): 59 | for hash_size in sizes: 60 | with self.assertRaises(ValueError): 61 | func(image, hash_size) 62 | -------------------------------------------------------------------------------- /find_similar_images.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | from __future__ import absolute_import, division, print_function 3 | 4 | from PIL import Image 5 | 6 | import imagehash 7 | 8 | """ 9 | Demo of hashing 10 | """ 11 | 12 | 13 | def find_similar_images(userpaths, hashfunc=imagehash.average_hash): 14 | def is_image(filename): 15 | f = filename.lower() 16 | return f.endswith('.png') or f.endswith('.jpg') or \ 17 | f.endswith('.jpeg') or f.endswith('.bmp') or \ 18 | f.endswith('.gif') or '.jpg' in f or f.endswith('.svg') 19 | 20 | image_filenames = [] 21 | for userpath in userpaths: 22 | image_filenames += [os.path.join(userpath, path) for path in os.listdir(userpath) if is_image(path)] 23 | images = {} 24 | for img in sorted(image_filenames): 25 | try: 26 | hash = hashfunc(Image.open(img)) 27 | except Exception as e: 28 | print('Problem:', e, 'with', img) 29 | continue 30 | if hash in images: 31 | print(img, ' already exists as', ' '.join(images[hash])) 32 | if 'dupPictures' in img: 33 | print('rm -v', img) 34 | images[hash] = images.get(hash, []) + [img] 35 | 36 | # for k, img_list in six.iteritems(images): 37 | # if len(img_list) > 1: 38 | # print(" ".join(img_list)) 39 | 40 | 41 | if __name__ == '__main__': # noqa: C901 42 | import os 43 | import sys 44 | 45 | def usage(): 46 | sys.stderr.write("""SYNOPSIS: %s [ahash|phash|dhash|...] [] 47 | 48 | Identifies similar images in the directory. 49 | 50 | Method: 51 | ahash: Average hash 52 | phash: Perceptual hash 53 | dhash: Difference hash 54 | whash-haar: Haar wavelet hash 55 | whash-db4: Daubechies wavelet hash 56 | colorhash: HSV color hash 57 | crop-resistant: Crop-resistant hash 58 | 59 | (C) Johannes Buchner, 2013-2017 60 | """ % sys.argv[0]) 61 | sys.exit(1) 62 | 63 | hashmethod = sys.argv[1] if len(sys.argv) > 1 else usage() 64 | if hashmethod == 'ahash': 65 | hashfunc = imagehash.average_hash 66 | elif hashmethod == 'phash': 67 | hashfunc = imagehash.phash 68 | elif hashmethod == 'dhash': 69 | hashfunc = imagehash.dhash 70 | elif hashmethod == 'whash-haar': 71 | hashfunc = imagehash.whash 72 | elif hashmethod == 'whash-db4': 73 | def hashfunc(img): 74 | return imagehash.whash(img, mode='db4') 75 | elif hashmethod == 'colorhash': 76 | hashfunc = imagehash.colorhash 77 | elif hashmethod == 'crop-resistant': 78 | hashfunc = imagehash.crop_resistant_hash 79 | else: 80 | usage() 81 | userpaths = sys.argv[2:] if len(sys.argv) > 2 else '.' 82 | find_similar_images(userpaths=userpaths, hashfunc=hashfunc) 83 | -------------------------------------------------------------------------------- /tests/test_whash.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import, division, print_function 2 | 3 | import unittest 4 | 5 | import six 6 | from PIL import Image 7 | 8 | import imagehash 9 | 10 | from .utils import TestImageHash 11 | 12 | 13 | class TestBasic(TestImageHash): 14 | 15 | def setUp(self): 16 | self.image = self.get_data_image() 17 | self.func = imagehash.whash 18 | 19 | def test_whash(self): 20 | self.check_hash_algorithm(self.func, self.image) 21 | 22 | def test_whash_length(self): 23 | self.check_hash_length(self.func, self.image, sizes=[2, 4, 8, 16, 32, 64]) 24 | 25 | def test_whash_stored(self): 26 | self.check_hash_stored(self.func, self.image, sizes=[2, 4, 8, 16, 32, 64]) 27 | 28 | 29 | class Test(unittest.TestCase): 30 | def setUp(self): 31 | self.image = self._get_white_image() 32 | 33 | def _get_white_image(self, size=None): 34 | if size is None: 35 | size = (512, 512) 36 | return Image.new('RGB', size, 'white') 37 | 38 | def test_hash_size_2power(self): 39 | for hash_size in [4, 8, 16]: 40 | hash = imagehash.whash(self.image, hash_size=hash_size) 41 | self.assertEqual(hash.hash.size, hash_size**2) 42 | 43 | def test_hash_size_for_small_images(self): 44 | default_hash_size = 8 45 | for image_size in [(1, 25), (7, 5)]: 46 | image = self._get_white_image(image_size) 47 | hash = imagehash.whash(image) 48 | self.assertEqual(hash.hash.size, default_hash_size**2) 49 | 50 | def test_hash_size_not_2power(self): 51 | emsg = 'hash_size is not power of 2' 52 | for hash_size in [3, 7, 12]: 53 | with six.assertRaisesRegex(self, AssertionError, emsg): 54 | imagehash.whash(self.image, hash_size=hash_size) 55 | 56 | def test_hash_size_is_less_than_image_scale(self): 57 | image = self._get_white_image((120, 200)) 58 | emsg = 'hash_size in a wrong range' 59 | for hash_size in [128, 512]: 60 | with six.assertRaisesRegex(self, AssertionError, emsg): 61 | imagehash.whash(image, hash_size=hash_size, image_scale=64) 62 | 63 | def test_custom_hash_size_and_scale(self): 64 | hash_size = 16 65 | hash = imagehash.whash(self.image, hash_size=hash_size, image_scale=64) 66 | self.assertEqual(hash.hash.size, hash_size**2) 67 | 68 | def test_hash_size_more_than_scale(self): 69 | emsg = 'hash_size in a wrong range' 70 | with six.assertRaisesRegex(self, AssertionError, emsg): 71 | imagehash.whash(self.image, hash_size=32, image_scale=16) 72 | 73 | def test_image_scale_not_2power(self): 74 | emsg = 'image_scale is not power of 2' 75 | for image_scale in [4, 8, 16]: 76 | with six.assertRaisesRegex(self, AssertionError, emsg): 77 | imagehash.whash(self.image, image_scale=image_scale + 1) 78 | 79 | 80 | if __name__ == '__main__': 81 | unittest.main() 82 | -------------------------------------------------------------------------------- /.github/workflows/testing.yml: -------------------------------------------------------------------------------- 1 | name: Build 2 | 3 | on: 4 | push: 5 | pull_request: 6 | schedule: 7 | - cron: '42 4 5,20 * *' 8 | env: 9 | CACHE_NUMBER: 0 # increase to reset cache manually 10 | jobs: 11 | run-tests: 12 | name: Run tests 13 | runs-on: ubuntu-latest 14 | defaults: 15 | run: 16 | shell: bash -l {0} 17 | strategy: 18 | matrix: 19 | python-version: ['3.9.1', '3.11'] 20 | fail-fast: false 21 | steps: 22 | - uses: actions/checkout@v2 23 | - uses: conda-incubator/setup-miniconda@v2 24 | with: 25 | auto-update-conda: true 26 | python-version: ${{ matrix.python-version }} 27 | - name: Set cache date 28 | run: echo "DATE=$(date +'%Y%m')" >> $GITHUB_ENV 29 | - name: Conda download cache 30 | id: myconda-download-cache 31 | uses: actions/cache@v4 32 | with: 33 | path: /usr/share/miniconda/pkgs/ 34 | key: ${{ matrix.python-version }}-conda-${{ env.DATE }}-${{ env.CACHE_NUMBER }} 35 | - name: Install imagemagick 36 | run: | 37 | sudo sudo apt-get update && sudo apt-get -y --no-install-recommends install -y imagemagick libstdc++6 || true 38 | # conda does not support environment markers 39 | - name: fix libstdc++ for scipy install 40 | run: ln -sf /usr/lib/x86_64-linux-gnu/libstdc++.so.6 /usr/share/miniconda/envs/test/lib/libstdc++.so.6 41 | - name: Install testing dependencies 42 | run: | 43 | conda install -c conda-forge --file requirements-conda.txt --file requirements-linting-old.txt six packaging pytest coveralls coverage libstdcxx-ng toml 44 | - name: Conda info 45 | run: | 46 | conda info 47 | conda list 48 | - name: Lint with flake8 49 | # stop the build if there are Python syntax errors or undefined names 50 | run: flake8 imagehash/ --show-source 51 | - name: Check typing with mypy 52 | run: mypy imagehash tests/*.py --follow-imports=silent --ignore-missing-imports || true 53 | - name: Test install from setup.py 54 | run: pip install . 55 | - run: coverage run -m pytest . 56 | - name: Convert coverage output to lcov for coveralls 57 | run: | 58 | coverage lcov -o lcov.info 59 | # make paths relative 60 | sed -i s,$PWD/,,g lcov.info 61 | - name: prepare coveralls partial upload 62 | uses: coverallsapp/github-action@master 63 | with: 64 | github-token: ${{ secrets.github_token }} 65 | path-to-lcov: lcov.info 66 | flag-name: run-${{ matrix.python-version }} 67 | parallel: true 68 | 69 | 70 | finish: 71 | needs: run-tests 72 | runs-on: ubuntu-latest 73 | steps: 74 | - name: Coveralls Finished 75 | uses: coverallsapp/github-action@master 76 | with: 77 | github-token: ${{ secrets.github_token }} 78 | parallel-finished: true 79 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | .PHONY: clean clean-test clean-pyc clean-build docs help 2 | .DEFAULT_GOAL := help 3 | 4 | define BROWSER_PYSCRIPT 5 | import os, webbrowser, sys 6 | 7 | try: 8 | from urllib import pathname2url 9 | except: 10 | from urllib.request import pathname2url 11 | 12 | webbrowser.open("file://" + pathname2url(os.path.abspath(sys.argv[1]))) 13 | endef 14 | export BROWSER_PYSCRIPT 15 | 16 | define PRINT_HELP_PYSCRIPT 17 | import re, sys 18 | 19 | for line in sys.stdin: 20 | match = re.match(r'^([a-zA-Z_-]+):.*?## (.*)$$', line) 21 | if match: 22 | target, help = match.groups() 23 | print("%-20s %s" % (target, help)) 24 | endef 25 | export PRINT_HELP_PYSCRIPT 26 | 27 | PYTHON := python3 28 | 29 | BROWSER := $(PYTHON) -c "$$BROWSER_PYSCRIPT" 30 | 31 | help: 32 | @$(PYTHON) -c "$$PRINT_HELP_PYSCRIPT" < $(MAKEFILE_LIST) 33 | 34 | clean: clean-build clean-pyc clean-test clean-doc ## remove all build, test, coverage and Python artifacts 35 | 36 | clean-build: ## remove build artifacts 37 | rm -fr build/ 38 | rm -fr dist/ 39 | rm -fr .eggs/ 40 | find . -name '*.egg-info' -exec rm -fr {} + 41 | find . -name '*.egg' -exec rm -f {} + 42 | 43 | clean-pyc: ## remove Python file artifacts 44 | find . -name '*.pyc' -exec rm -f {} + 45 | find . -name '*.pyo' -exec rm -f {} + 46 | find . -name '*~' -exec rm -f {} + 47 | find . -name '__pycache__' -exec rm -fr {} + 48 | find . -name '*.so' -exec rm -f {} + 49 | find . -name '*.c' -exec rm -f {} + 50 | 51 | clean-test: ## remove test and coverage artifacts 52 | rm -fr .tox/ 53 | rm -f .coverage 54 | rm -fr htmlcov/ 55 | rm -fr .pytest_cache 56 | 57 | clean-doc: 58 | rm -rf docs/build 59 | 60 | lint: ## check style with flake8 61 | flake8 . 62 | 63 | lint-fix: ## fix style with autopep8 and isort; ignores to not autofix tabs to spaces, but still warn when mixed 64 | autopep8 . --in-place --aggressive --aggressive --aggressive --recursive --ignore=W191,E101,E111,E122 65 | isort . 66 | 67 | test: ## run tests quickly with the default Python 68 | pytest 69 | 70 | test-all: ## run tests on every Python version with tox 71 | tox 72 | 73 | coverage: ## check code coverage quickly with the default Python 74 | coverage run --source ultranest -m pytest 75 | coverage report -m 76 | coverage html 77 | $(BROWSER) htmlcov/index.html 78 | 79 | docs: ## generate Sphinx HTML documentation, including API docs 80 | rm -f docs/ultranest.rst 81 | rm -f docs/modules.rst 82 | #nbstripout docs/*.ipynb 83 | sphinx-apidoc -H API -o docs/ ultranest 84 | $(MAKE) -C docs clean 85 | $(MAKE) -C docs html 86 | sed --in-place '/href="ultranest\/mlfriends.html"/d' docs/build/html/_modules/index.html 87 | $(BROWSER) docs/build/html/index.html 88 | 89 | servedocs: docs ## compile the docs watching for changes 90 | watchmedo shell-command -p '*.rst' -c '$(MAKE) -C docs html' -R -D . 91 | 92 | release: dist ## package and upload a release 93 | twine upload -s dist/*.tar.gz dist/*.whl 94 | 95 | dist: clean ## builds source and wheel package 96 | $(PYTHON) setup.py sdist 97 | $(PYTHON) setup.py bdist_wheel 98 | ls -l dist 99 | 100 | install: clean ## install the package to the active Python's site-packages 101 | $(PYTHON) setup.py install 102 | -------------------------------------------------------------------------------- /tests/test_hex_conversions.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | import numpy as np 4 | 5 | import imagehash 6 | 7 | # Each row is a test case where the first value is a bit sequence and 8 | # the second value is the expected hexadecimal representation for it. 9 | binary_to_hexadecimal_values = [ 10 | ['1', '1'], 11 | ['11', '3'], 12 | ['111', '7'], 13 | ['1111', 'f'], 14 | ['10000', '10'], 15 | ['110000', '30'], 16 | ['1110000', '70'], 17 | ['11110000', 'f0'], 18 | ['00001', '01'], 19 | ['000011', '03'], 20 | ['0000111', '07'], 21 | ['00001111', '0f'], 22 | ['10000001', '81'], 23 | ['00000000000000001', '00001'], 24 | ['000000000000000011', '00003'], 25 | ['0000000000000000111', '00007'], 26 | ['00000000000000001111', '0000f'], 27 | ['11110000111100001111', 'f0f0f'], 28 | ['00001111000011110000', '0f0f0'], 29 | ['11110000000100100011010001010110011110001001101010111100110111101111', 'f0123456789abcdef'], 30 | ['1001111000111100110000011111000011110000110000111110011111000000', '9e3cc1f0f0c3e7c0'], 31 | ['1000111100001111000011110000111100001111000010110000101101111010', '8f0f0f0f0f0b0b7a'], 32 | ] 33 | 34 | # Each row is a test case where the first value is a hexadecimal sequence 35 | # and the second value is the expected binary representation for it. 36 | hexadecimal_to_binary_values = [ 37 | ['1', '0001'], 38 | ['2', '0010'], 39 | ['3', '0011'], 40 | ['a', '1010'], 41 | ['f', '1111'], 42 | ['101', '100000001'], 43 | ['1b1', '110110001'], 44 | ['0b1', '010110001'], 45 | ['f0f0', '1111000011110000'], 46 | ['0f0f', '0000111100001111'], 47 | ['000c', '0000000000001100'], 48 | ['100000d', '1000000000000000000001101'], 49 | ['000000d', '0000000000000000000001101'], 50 | ['000000001', '000000000000000000000000000000000001'], 51 | ['800000001', '100000000000000000000000000000000001'], 52 | ['0000000000001', '0000000000000000000000000000000000000000000000001'], 53 | ['1000000000001', '1000000000000000000000000000000000000000000000001'], 54 | ['0000000000000001', '0000000000000000000000000000000000000000000000000000000000000001'], 55 | ['8000000000000001', '1000000000000000000000000000000000000000000000000000000000000001'], 56 | ] 57 | 58 | # Each row is a test case where the first value is a hexadecimal 59 | # sequence and the second value is the expected bool array for it. 60 | hexadecimal_to_bool_array = [ 61 | ['9e3cc1f0f0c3e7c0', np.array([ 62 | [True, False, False, True, True, True, True, False], 63 | [False, False, True, True, True, True, False, False], 64 | [True, True, False, False, False, False, False, True], 65 | [True, True, True, True, False, False, False, False], 66 | [True, True, True, True, False, False, False, False], 67 | [True, True, False, False, False, False, True, True], 68 | [True, True, True, False, False, True, True, True], 69 | [True, True, False, False, False, False, False, False]])], 70 | ] 71 | 72 | 73 | class TestHexConversions(unittest.TestCase): 74 | 75 | def setUp(self): 76 | self.to_hex = imagehash._binary_array_to_hex 77 | self.from_hex = imagehash.hex_to_hash 78 | 79 | def test_binary_array_to_hex_input(self): 80 | for case in hexadecimal_to_bool_array: 81 | self.assertEqual(case[0], self.to_hex(case[1])) 82 | 83 | def test_hex_to_hash_output(self): 84 | for case in hexadecimal_to_bool_array: 85 | self.assertTrue(np.array_equal(case[1], self.from_hex(case[0]).hash)) 86 | 87 | def test_conversion_to_hex(self): 88 | for case in binary_to_hexadecimal_values: 89 | expected = case[1] 90 | bit_array = np.array([int(d) for d in case[0]]) 91 | result = self.to_hex(bit_array) 92 | self.assertEqual(expected, result) 93 | 94 | def test_conversion_from_hex(self): 95 | for case in hexadecimal_to_binary_values: 96 | expected = case[1] 97 | result = ''.join(str(b) for b in 1 * self.from_hex(case[0]).hash.flatten()) 98 | self.assertEqual(expected, result) 99 | 100 | 101 | if __name__ == '__main__': 102 | unittest.main() 103 | -------------------------------------------------------------------------------- /tests/test_crop_resistant_hash.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import, division, print_function 2 | 3 | import unittest 4 | from datetime import datetime 5 | 6 | import imagehash 7 | 8 | from .utils import TestImageHash 9 | 10 | 11 | class Test(TestImageHash): 12 | def setUp(self): 13 | self.image = self.get_data_image() 14 | self.peppers = self.get_data_image('peppers.png') 15 | 16 | def test_segmented_hash(self): 17 | original_hash = imagehash.crop_resistant_hash(self.image) 18 | rotate_image = self.image.rotate(-1) 19 | small_rotate_hash = imagehash.crop_resistant_hash(rotate_image) 20 | emsg = ('slightly rotated image should have similar hash {} {}'.format(original_hash, small_rotate_hash)) 21 | self.assertTrue(original_hash.matches(small_rotate_hash), emsg) 22 | rotate_image = self.image.rotate(-90) 23 | large_rotate_hash = imagehash.crop_resistant_hash(rotate_image) 24 | emsg = ('rotated image should have different hash {} {}'.format(original_hash, large_rotate_hash)) 25 | self.assertFalse(original_hash.matches(large_rotate_hash), emsg) 26 | 27 | other_hashes = [small_rotate_hash, large_rotate_hash] 28 | self.assertEqual( 29 | original_hash.best_match(other_hashes), 30 | small_rotate_hash, 31 | 'Hash of the slightly rotated image should be a better match than for the more heavily rotated image.' 32 | ) 33 | 34 | def test_segmented_hash__hash_func(self): 35 | segmented_ahash = imagehash.crop_resistant_hash(self.image, imagehash.average_hash) 36 | segmented_dhash = imagehash.crop_resistant_hash(self.image, imagehash.dhash) 37 | self.assertFalse( 38 | segmented_ahash.matches(segmented_dhash), 39 | 'Segmented hash should not match when the underlying hashing method is not the same' 40 | ) 41 | 42 | def test_segmented_hash__limit_segments(self): 43 | segmented_orig = imagehash.crop_resistant_hash(self.image) 44 | segmented_limit = imagehash.crop_resistant_hash(self.image, limit_segments=1) 45 | self.assertGreaterEqual( 46 | len(segmented_orig.segment_hashes), len(segmented_limit.segment_hashes), 47 | 'Limit segments should mean there are fewer segments' 48 | ) 49 | self.assertEqual( 50 | len(segmented_limit.segment_hashes), 1, 51 | 'Limit segments should correctly limit the segment count' 52 | ) 53 | 54 | def test_segmented_hash__segment_threshold(self): 55 | segmented_low_threshold = imagehash.crop_resistant_hash(self.image, segment_threshold=20) 56 | segmented_high_threshold = imagehash.crop_resistant_hash(self.image, segment_threshold=250) 57 | self.assertFalse( 58 | segmented_low_threshold.matches(segmented_high_threshold, region_cutoff=3), 59 | 'Segmented hash should not match when segment threshold is changed' 60 | ) 61 | 62 | def test_segmentation_image_size(self): 63 | start_time = datetime.now() 64 | imagehash.crop_resistant_hash(self.image, segmentation_image_size=200) 65 | small_timed = datetime.now() - start_time 66 | 67 | start_time = datetime.now() 68 | imagehash.crop_resistant_hash(self.image, segmentation_image_size=400) 69 | large_timed = datetime.now() - start_time 70 | 71 | self.assertGreater(large_timed, small_timed, 'Hashing should take longer when the segmentation image is larger') 72 | 73 | def test_min_segment_size(self): 74 | small_segments_hash = imagehash.crop_resistant_hash(self.peppers, min_segment_size=100) 75 | big_segments_hash = imagehash.crop_resistant_hash(self.peppers, min_segment_size=1000) 76 | 77 | self.assertGreater( 78 | len(small_segments_hash.segment_hashes), 79 | len(big_segments_hash.segment_hashes), 80 | 'Small segment size limit should lead to larger number of segments detected.' 81 | ) 82 | self.assertEqual( 83 | small_segments_hash, 84 | big_segments_hash, 85 | 'Hashes should still match, as large segments are present in both' 86 | ) 87 | 88 | def test_crop_resistance(self): 89 | full_image = self.peppers 90 | width, height = full_image.size 91 | crop_10 = full_image.crop((0.05 * width, 0.05 * height, 0.95 * width, 0.95 * height)) 92 | crop_40 = full_image.crop((0.2 * width, 0.2 * height, 0.8 * width, 0.8 * height)) 93 | crop_asymmetric = full_image.crop((0, 0.3 * height, 0.4 * width, 0.75 * height)) 94 | 95 | full_hash = imagehash.crop_resistant_hash(full_image, min_segment_size=200) 96 | crop_hash_10 = imagehash.crop_resistant_hash(crop_10) 97 | crop_hash_40 = imagehash.crop_resistant_hash(crop_40) 98 | crop_hash_asymmetric = imagehash.crop_resistant_hash(crop_asymmetric) 99 | 100 | self.assertEqual(crop_hash_10, full_hash, 'Slightly cropped image hash should match full image hash') 101 | self.assertEqual(crop_hash_40, full_hash, 'Heavily cropped image hash should match full image hash') 102 | self.assertEqual( 103 | crop_hash_asymmetric, full_hash, 'Asymmetrically cropped image hash should match full image hash' 104 | ) 105 | 106 | 107 | if __name__ == '__main__': 108 | unittest.main() 109 | -------------------------------------------------------------------------------- /tests/test_hash_is_constant.py: -------------------------------------------------------------------------------- 1 | import hashlib 2 | 3 | import numpy 4 | import PIL 5 | from packaging import version 6 | from PIL import ImageFilter 7 | 8 | import imagehash 9 | from tests import TestImageHash 10 | 11 | 12 | def _calculate_segment_properties(segment): 13 | length = len(segment) 14 | min_y = min(coord[0] for coord in segment) 15 | min_x = min(coord[1] for coord in segment) 16 | max_y = max(coord[0] for coord in segment) 17 | max_x = max(coord[1] for coord in segment) 18 | return { 19 | 'length': length, 20 | 'min_x': min_x, 21 | 'min_y': min_y, 22 | 'max_x': max_x, 23 | 'max_y': max_y 24 | } 25 | 26 | 27 | def _pillow_has_convert_fix(): 28 | """ 29 | Pillow version 7.0.0 introduced a fix for a rounding error in Image.convert("L") which means that segmentation is 30 | slightly different after this release. 31 | The PR which fixed the rounding and caused this inconsistency is https://github.com/python-pillow/Pillow/pull/4320 32 | """ 33 | return version.parse(PIL.__version__) >= version.parse('7.0.0') 34 | 35 | 36 | class Test(TestImageHash): 37 | def setUp(self): 38 | self.image = self.get_data_image() 39 | self.peppers = self.get_data_image('peppers.png') 40 | 41 | def test_average_hash(self): 42 | result_hash = imagehash.average_hash(self.image) 43 | known_hash = 'ffd7918181c9ffff' 44 | self.assertEqual(str(result_hash), known_hash) 45 | 46 | def test_phash(self): 47 | result_hash = imagehash.phash(self.image) 48 | known_hash = 'ba8c84536bd3c366' 49 | self.assertEqual(str(result_hash), known_hash) 50 | 51 | def test_dhash(self): 52 | result_hash = imagehash.dhash(self.image) 53 | known_hash = '0026273b2b19550e' 54 | self.assertEqual(str(result_hash), known_hash) 55 | 56 | def test_whash(self): 57 | result_hash = imagehash.whash(self.image) 58 | known_hash = 'ffd391818181a5e7' 59 | self.assertEqual(str(result_hash), known_hash) 60 | 61 | def test_color_hash(self): 62 | result_hash = imagehash.colorhash(self.image) 63 | known_hash = '07007000000' 64 | self.assertEqual(str(result_hash), known_hash) 65 | 66 | def test_crop_resistant_hash(self): 67 | result_hash = imagehash.crop_resistant_hash(self.peppers) 68 | if _pillow_has_convert_fix(): 69 | known_hash = ( 70 | 'c4d9f3e3e1c18101,' 71 | '706c6e66464c99b9,' 72 | '98d8f1ecd8f0f0e1,' 73 | 'a082c0c49acc6dbd,' 74 | 'f1f39b99c1c1b1b1,' 75 | '3a7ece1c9df4fcb9' 76 | ) 77 | else: 78 | known_hash = ( 79 | 'c4d9f1e3e1c18101,' 80 | '706c6e66464c99b9,' 81 | '98d8f1ecd8f0f0e1,' 82 | 'a282c0c49acc6dbd,' 83 | 'b1f39b99e1c1b1b1,' 84 | '3a7ece1c9df4fcb9' 85 | ) 86 | self.assertEqual(str(result_hash), known_hash) 87 | 88 | def test_crop_resistant_segmentation(self): 89 | # Image pre-processing 90 | image = self.peppers.convert('L') 91 | if _pillow_has_convert_fix(): 92 | known_bw_md5 = '61db06218cc8b9aba14812d965869120' 93 | else: 94 | known_bw_md5 = '61442e74c83cfea67d182481c24c5f3e' 95 | self.assertEqual( 96 | hashlib.md5(image.tobytes()).hexdigest(), 97 | known_bw_md5, 98 | "This hash should match, unless pillow have changed Convert('L') again" 99 | ) 100 | image = image.resize((300, 300), imagehash.ANTIALIAS) 101 | # Add filters 102 | image = image.filter(ImageFilter.GaussianBlur()).filter(ImageFilter.MedianFilter()) 103 | pixels = numpy.array(image).astype(numpy.float32) 104 | # Segment 105 | segments = imagehash._find_all_segments(pixels, 128, 500) 106 | known_segment_count = 6 107 | self.assertEqual(len(segments), known_segment_count) 108 | if _pillow_has_convert_fix(): 109 | known_segments = sorted([ 110 | {'length': 595, 'min_x': 20, 'min_y': 0, 'max_x': 60, 'max_y': 31}, 111 | {'length': 1458, 'min_x': 61, 'min_y': 0, 'max_x': 156, 'max_y': 58}, 112 | {'length': 3505, 'min_x': 0, 'min_y': 111, 'max_x': 97, 'max_y': 191}, 113 | {'length': 8789, 'min_x': 112, 'min_y': 145, 'max_x': 299, 'max_y': 260}, 114 | {'length': 12153, 'min_x': 157, 'min_y': 0, 'max_x': 299, 'max_y': 148}, 115 | {'length': 60916, 'min_x': 0, 'min_y': 0, 'max_x': 299, 'max_y': 299} 116 | ], key=lambda x: (x['length'], x['min_x'], x['min_y'], x['max_x'], x['max_y'])) 117 | else: 118 | known_segments = sorted([ 119 | {'length': 591, 'min_x': 20, 'min_y': 0, 'max_x': 60, 'max_y': 31}, 120 | {'length': 1451, 'min_x': 61, 'min_y': 0, 'max_x': 156, 'max_y': 58}, 121 | {'length': 12040, 'min_x': 157, 'min_y': 0, 'max_x': 299, 'max_y': 147}, 122 | {'length': 3452, 'min_x': 0, 'min_y': 111, 'max_x': 97, 'max_y': 191}, 123 | {'length': 8701, 'min_x': 112, 'min_y': 145, 'max_x': 299, 'max_y': 259}, 124 | {'length': 61179, 'min_x': 0, 'min_y': 0, 'max_x': 299, 'max_y': 299} 125 | ], key=lambda x: (x['length'], x['min_x'], x['min_y'], x['max_x'], x['max_y'])) 126 | segment_properties = sorted([ 127 | _calculate_segment_properties(segment) for segment in segments 128 | ], key=lambda x: (x['length'], x['min_x'], x['min_y'], x['max_x'], x['max_y'])) 129 | self.assertEqual(segment_properties, known_segments) 130 | -------------------------------------------------------------------------------- /README.rst: -------------------------------------------------------------------------------- 1 | =========== 2 | ImageHash 3 | =========== 4 | 5 | An image hashing library written in Python. ImageHash supports: 6 | 7 | * Average hashing 8 | * Perceptual hashing 9 | * Difference hashing 10 | * Wavelet hashing 11 | * HSV color hashing (colorhash) 12 | * Crop-resistant hashing 13 | 14 | |CI|_ |Coveralls|_ 15 | 16 | Rationale 17 | ========= 18 | 19 | Image hashes tell whether two images look nearly identical. 20 | This is different from cryptographic hashing algorithms (like MD5, SHA-1) 21 | where tiny changes in the image give completely different hashes. 22 | In image fingerprinting, we actually want our similar inputs to have 23 | similar output hashes as well. 24 | 25 | The image hash algorithms (average, perceptual, difference, wavelet) 26 | analyse the image structure on luminance (without color information). 27 | The color hash algorithm analyses the color distribution and 28 | black & gray fractions (without position information). 29 | 30 | Installation 31 | ============ 32 | 33 | Based on PIL/Pillow Image, numpy and scipy.fftpack (for pHash) 34 | Easy installation through `pypi`_:: 35 | 36 | pip install imagehash 37 | 38 | Basic usage 39 | =========== 40 | :: 41 | 42 | >>> from PIL import Image 43 | >>> import imagehash 44 | >>> hash = imagehash.average_hash(Image.open('tests/data/imagehash.png')) 45 | >>> print(hash) 46 | ffd7918181c9ffff 47 | >>> otherhash = imagehash.average_hash(Image.open('tests/data/peppers.png')) 48 | >>> print(otherhash) 49 | 9f172786e71f1e00 50 | >>> print(hash == otherhash) 51 | False 52 | >>> print(hash - otherhash) # hamming distance 53 | 33 54 | 55 | Each algorithm can also have its hash size adjusted (or in the case of 56 | colorhash, its :code:`binbits`). Increasing the hash size allows an 57 | algorithm to store more detail in its hash, increasing its sensitivity 58 | to changes in detail. 59 | 60 | The demo script **find_similar_images** illustrates how to find similar 61 | images in a directory. 62 | 63 | Source hosted at GitHub: https://github.com/JohannesBuchner/imagehash 64 | 65 | References 66 | ----------- 67 | 68 | * Average hashing (`aHashref`_) 69 | * Perceptual hashing (`pHashref`_) 70 | * Difference hashing (`dHashref`_) 71 | * Wavelet hashing (`wHashref`_) 72 | * Crop-resistant hashing (`crop_resistant_hashref`_) 73 | 74 | .. _aHashref: https://www.hackerfactor.com/blog/index.php?/archives/432-Looks-Like-It.html 75 | .. _pHashref: https://www.hackerfactor.com/blog/index.php?/archives/432-Looks-Like-It.html 76 | .. _dHashref: https://www.hackerfactor.com/blog/index.php?/archives/529-Kind-of-Like-That.html 77 | .. _wHashref: https://fullstackml.com/wavelet-image-hash-in-python-3504fdd282b5 78 | .. _pypi: https://pypi.python.org/pypi/ImageHash 79 | .. _crop_resistant_hashref: https://ieeexplore.ieee.org/document/6980335 80 | 81 | Examples 82 | ========= 83 | 84 | To help evaluate how different hashing algorithms behave, below are a few hashes applied 85 | to two datasets. This will let you know what images an algorithm thinks are basically identical. 86 | 87 | Example 1: Icon dataset 88 | ----------------------- 89 | 90 | Source: 7441 free icons on GitHub (see examples/github-urls.txt). 91 | 92 | The following pages show groups of images with the same hash (the hashing method sees them as the same). 93 | 94 | * `phash `__ (or `with z-transform `__) 95 | * `dhash `__ (or `with z-transform `__) 96 | * `colorhash `__ 97 | * `average_hash `__ (`with z-transform `__) 98 | 99 | The hashes use hashsize=8; colorhash uses binbits=3. 100 | You may want to adjust the hashsize or require some manhattan distance (hash1 - hash2 < threshold). 101 | 102 | Example 2: Art dataset 103 | ---------------------- 104 | 105 | Source: 109259 art pieces from https://www.parismuseescollections.paris.fr/en/recherche/image-libre/. 106 | 107 | The following pages show groups of images with the same hash (the hashing method sees them as the same). 108 | 109 | * `phash `__ (or `with z-transform `__) 110 | * `dhash `__ (or `with z-transform `__) 111 | * `colorhash `__ 112 | * `average_hash `__ (`with z-transform `__) 113 | 114 | For understanding hash distances, check out these excellent blog posts: 115 | * https://tech.okcupid.com/evaluating-perceptual-image-hashes-at-okcupid-e98a3e74aa3a 116 | * https://content-blockchain.org/research/testing-different-image-hash-functions/ 117 | 118 | Storing hashes 119 | ============== 120 | 121 | As illustrated above, hashes can be turned into strings. 122 | The strings can be turned back into a ImageHash object as follows. 123 | 124 | For single perceptual hashes:: 125 | 126 | >>> original_hash = imagehash.phash(Image.open('tests/data/imagehash.png')) 127 | >>> hash_as_str = str(original_hash) 128 | >>> print(hash_as_str) 129 | ffd7918181c9ffff 130 | >>> restored_hash = imagehash.hex_to_hash(hash_as_str) 131 | >>> print(restored_hash) 132 | ffd7918181c9ffff 133 | >>> assert restored_hash == original_hash 134 | >>> assert str(restored_hash) == hash_as_str 135 | 136 | For crop_resistant_hash:: 137 | 138 | >>> original_hash = imagehash.crop_resistant_hash(Image.open('tests/data/imagehash.png'), min_segment_size=500, segmentation_image_size=1000) 139 | >>> hash_as_str = str(original_hash) 140 | >>> restored_hash = imagehash.hex_to_multihash(hash_as_str) 141 | >>> assert restored_hash == original_hash 142 | >>> assert str(restored_hash) == hash_as_str 143 | 144 | For colorhash:: 145 | 146 | >>> original_hash = imagehash.colorhash(Image.open('tests/data/imagehash.png'), binbits=3) 147 | >>> hash_as_str = str(original_hash) 148 | >>> restored_hash = imagehash.hex_to_flathash(hash_as_str, hashsize=3) 149 | 150 | Efficient database search 151 | ------------------------- 152 | 153 | For storing the hashes in a database and using fast hamming distance 154 | searches, see pointers at https://github.com/JohannesBuchner/imagehash/issues/127 155 | (a blog post on how to do this would be a great contribution!) 156 | 157 | @KDJDEV points to https://github.com/KDJDEV/imagehash-reverse-image-search-tutorial and writes: 158 | In this tutorial I use PostgreSQL and `this extension `_, 159 | and show how you can create a reverse image search using hashes generated by this library. 160 | 161 | 162 | Changelog 163 | ---------- 164 | 165 | * 4.3: typing annotations by @Avasam @SpangleLabs and @nh2 166 | 167 | * 4.2: Cropping-Resistant image hashing added by @SpangleLabs 168 | 169 | * 4.1: Add examples and colorhash 170 | 171 | * 4.0: Changed binary to hex implementation, because the previous one was broken for various hash sizes. This change breaks compatibility to previously stored hashes; to convert them from the old encoding, use the "old_hex_to_hash" function. 172 | 173 | * 3.5: Image data handling speed-up 174 | 175 | * 3.2: whash now also handles smaller-than-hash images 176 | 177 | * 3.0: dhash had a bug: It computed pixel differences vertically, not horizontally. 178 | I modified it to follow `dHashref`_. The old function is available as dhash_vertical. 179 | 180 | * 2.0: Added whash 181 | 182 | * 1.0: Initial ahash, dhash, phash implementations. 183 | 184 | Contributing 185 | ============= 186 | 187 | Pull requests and new features are warmly welcome. 188 | 189 | If you encounter a bug or have a question, please open a GitHub issue. You can also try Stack Overflow. 190 | 191 | Other projects 192 | ============== 193 | 194 | * https://github.com/commonsmachinery/blockhash-python 195 | * https://github.com/acoomans/instagram-filters 196 | * https://pippy360.github.io/transformationInvariantImageSearch/ 197 | * https://www.phash.org/ 198 | * https://pypi.org/project/dhash/ 199 | * https://github.com/thorn-oss/perception (based on imagehash code, depends on opencv) 200 | * https://docs.opencv.org/3.4/d4/d93/group__img__hash.html 201 | 202 | .. |CI| image:: https://github.com/JohannesBuchner/imagehash/actions/workflows/testing.yml/badge.svg 203 | .. _CI: https://github.com/JohannesBuchner/imagehash/actions/workflows/testing.yml 204 | 205 | .. |Coveralls| image:: https://coveralls.io/repos/github/JohannesBuchner/imagehash/badge.svg 206 | .. _Coveralls: https://coveralls.io/github/JohannesBuchner/imagehash 207 | -------------------------------------------------------------------------------- /output.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | ImageHash 8 | 324 | 325 | 326 |
327 |

ImageHash

328 | 329 |

A image hashing library written in Python. 330 | Supports:

331 |
    332 |
  • average hashing (aHash)
  • 333 |
  • perception hashing (pHash)
  • 334 |
  • difference hashing (dHash)
  • 335 |
336 |
337 |

Requirements

338 |

Based on PIL Image, numpy and scipy.fftpack (for pHash)

339 |
340 |
341 |

Basic usage

342 |
343 | >>> import Image
344 | >>> import ImageHash
345 | >>> hash = ImageHash.average_hash(Image.open('test.png'))
346 | >>> print hash
347 | d879f8f89b1bbf
348 | >>> otherhash = ImageHash.average_hash(Image.open('other.bmp'))
349 | >>> print otherhash
350 | ffff3720200ffff
351 | >>> print hash == otherhash
352 | False
353 | >>> print hash - otherhash
354 | 36
355 | 
356 |

Demo function find_similar_images illustrates finding similar images in a directory.

357 |

References:

358 |
359 | * pHash implementation following http://www.hackerfactor.com/blog/index.php?/archives/432-Looks-Like-It.html
360 | * dHash implementation following http://www.hackerfactor.com/blog/index.php?/archives/529-Kind-of-Like-That.html
361 | 
362 |
363 |
364 | 365 | 366 | -------------------------------------------------------------------------------- /imagehash/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Perceptual image hashing library 3 | 4 | Example: 5 | 6 | >>> from PIL import Image 7 | >>> import imagehash 8 | >>> hash = imagehash.average_hash(Image.open('test.png')) 9 | >>> print(hash) 10 | d879f8f89b1bbf 11 | >>> otherhash = imagehash.average_hash(Image.open('other.bmp')) 12 | >>> print(otherhash) 13 | ffff3720200ffff 14 | >>> print(hash == otherhash) 15 | False 16 | >>> print(hash - otherhash) 17 | 36 18 | >>> for r in range(1, 30, 5): 19 | ... rothash = imagehash.average_hash(Image.open('test.png').rotate(r)) 20 | ... print('Rotation by %d: %d Hamming difference' % (r, hash - rothash)) 21 | ... 22 | Rotation by 1: 2 Hamming difference 23 | Rotation by 6: 11 Hamming difference 24 | Rotation by 11: 13 Hamming difference 25 | Rotation by 16: 17 Hamming difference 26 | Rotation by 21: 19 Hamming difference 27 | Rotation by 26: 21 Hamming difference 28 | >>> 29 | """ 30 | 31 | from __future__ import absolute_import, division, print_function 32 | 33 | import sys 34 | 35 | import numpy 36 | from PIL import Image, ImageFilter 37 | 38 | try: 39 | ANTIALIAS = Image.Resampling.LANCZOS 40 | except AttributeError: 41 | # deprecated in pillow 10 42 | # https://pillow.readthedocs.io/en/stable/deprecations.html 43 | ANTIALIAS = Image.ANTIALIAS 44 | 45 | __version__ = '4.3.2' 46 | 47 | """ 48 | You may copy this file, if you keep the copyright information below: 49 | 50 | 51 | Copyright (c) 2013-2022, Johannes Buchner 52 | https://github.com/JohannesBuchner/imagehash 53 | 54 | All rights reserved. 55 | 56 | Redistribution and use in source and binary forms, with or without 57 | modification, are permitted provided that the following conditions are 58 | met: 59 | 60 | Redistributions of source code must retain the above copyright 61 | notice, this list of conditions and the following disclaimer. 62 | 63 | Redistributions in binary form must reproduce the above copyright 64 | notice, this list of conditions and the following disclaimer in the 65 | documentation and/or other materials provided with the distribution. 66 | 67 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS 68 | IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 69 | TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A 70 | PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 71 | HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 72 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 73 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 74 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 75 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 76 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 77 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 78 | 79 | """ 80 | 81 | 82 | def _binary_array_to_hex(arr): 83 | """ 84 | internal function to make a hex string out of a binary array. 85 | """ 86 | bit_string = ''.join(str(b) for b in 1 * arr.flatten()) 87 | width = int(numpy.ceil(len(bit_string) / 4)) 88 | return '{:0>{width}x}'.format(int(bit_string, 2), width=width) 89 | 90 | 91 | class ImageHash: 92 | """ 93 | Hash encapsulation. Can be used for dictionary keys and comparisons. 94 | """ 95 | 96 | def __init__(self, binary_array): 97 | # type: (NDArray) -> None 98 | self.hash = binary_array # type: NDArray 99 | 100 | def __str__(self): 101 | return _binary_array_to_hex(self.hash.flatten()) 102 | 103 | def __repr__(self): 104 | return repr(self.hash) 105 | 106 | def __sub__(self, other): 107 | # type: (ImageHash) -> int 108 | if other is None: 109 | raise TypeError('Other hash must not be None.') 110 | if self.hash.size != other.hash.size: 111 | raise TypeError('ImageHashes must be of the same shape.', self.hash.shape, other.hash.shape) 112 | return numpy.count_nonzero(self.hash.flatten() != other.hash.flatten()) 113 | 114 | def __eq__(self, other): 115 | # type: (object) -> bool 116 | if other is None: 117 | return False 118 | return numpy.array_equal(self.hash.flatten(), other.hash.flatten()) # type: ignore 119 | 120 | def __ne__(self, other): 121 | # type: (object) -> bool 122 | if other is None: 123 | return False 124 | return not numpy.array_equal(self.hash.flatten(), other.hash.flatten()) # type: ignore 125 | 126 | def __hash__(self): 127 | # this returns a 8 bit integer, intentionally shortening the information 128 | return sum([2**(i % 8) for i, v in enumerate(self.hash.flatten()) if v]) 129 | 130 | def __len__(self): 131 | # Returns the bit length of the hash 132 | return self.hash.size 133 | 134 | 135 | # dynamic code for typing 136 | try: 137 | # specify allowed values if possible (py3.8+) 138 | from typing import Literal 139 | WhashMode = Literal['haar', 'db4'] # type: ignore 140 | except ImportError: 141 | WhashMode = str # type: ignore 142 | 143 | try: 144 | # enable numpy array typing (py3.7+) 145 | import numpy.typing 146 | NDArray = numpy.typing.NDArray[numpy.bool_] 147 | except (AttributeError, ImportError): 148 | NDArray = list # type: ignore 149 | 150 | # type of Callable 151 | if sys.version_info >= (3, 3): 152 | if sys.version_info >= (3, 9, 0) and sys.version_info <= (3, 9, 1): 153 | # https://stackoverflow.com/questions/65858528/is-collections-abc-callable-bugged-in-python-3-9-1 154 | from typing import Callable 155 | else: 156 | from collections.abc import Callable 157 | try: 158 | MeanFunc = Callable[[NDArray], float] 159 | HashFunc = Callable[[Image.Image], ImageHash] 160 | except TypeError: 161 | MeanFunc = Callable # type: ignore 162 | HashFunc = Callable # type: ignore 163 | # end of dynamic code for typing 164 | 165 | 166 | def hex_to_hash(hexstr): 167 | # type: (str) -> ImageHash 168 | """ 169 | Convert a stored hash (hex, as retrieved from str(Imagehash)) 170 | back to a Imagehash object. 171 | 172 | Notes: 173 | 1. This algorithm assumes all hashes are either 174 | bidimensional arrays with dimensions hash_size * hash_size, 175 | or onedimensional arrays with dimensions binbits * 14. 176 | 2. This algorithm does not work for hash_size < 2. 177 | """ 178 | hash_size = int(numpy.sqrt(len(hexstr) * 4)) 179 | # assert hash_size == numpy.sqrt(len(hexstr)*4) 180 | binary_array = '{:0>{width}b}'.format(int(hexstr, 16), width=hash_size * hash_size) 181 | bit_rows = [binary_array[i:i + hash_size] for i in range(0, len(binary_array), hash_size)] 182 | hash_array = numpy.array([[bool(int(d)) for d in row] for row in bit_rows]) 183 | return ImageHash(hash_array) 184 | 185 | 186 | def hex_to_flathash(hexstr, hashsize): 187 | # type: (str, int) -> ImageHash 188 | hash_size = int(len(hexstr) * 4 / (hashsize)) 189 | binary_array = '{:0>{width}b}'.format(int(hexstr, 16), width=hash_size * hashsize) 190 | hash_array = numpy.array([[bool(int(d)) for d in binary_array]])[-hash_size * hashsize:] 191 | return ImageHash(hash_array) 192 | 193 | 194 | def hex_to_multihash(hexstr): 195 | # type: (str) -> ImageMultiHash 196 | """ 197 | Convert a stored multihash (hex, as retrieved from str(ImageMultiHash)) 198 | back to an ImageMultiHash object. 199 | 200 | This function is based on hex_to_hash so the same caveats apply. Namely: 201 | 202 | 1. This algorithm assumes all hashes are either 203 | bidimensional arrays with dimensions hash_size * hash_size, 204 | or onedimensional arrays with dimensions binbits * 14. 205 | 2. This algorithm does not work for hash_size < 2. 206 | """ 207 | split = hexstr.split(',') 208 | hashes = [hex_to_hash(x) for x in split] 209 | return ImageMultiHash(hashes) 210 | 211 | 212 | def old_hex_to_hash(hexstr, hash_size=8): 213 | # type: (str, int) -> ImageHash 214 | """ 215 | Convert a stored hash (hex, as retrieved from str(Imagehash)) 216 | back to a Imagehash object. This method should be used for 217 | hashes generated by ImageHash up to version 3.7. For hashes 218 | generated by newer versions of ImageHash, hex_to_hash should 219 | be used instead. 220 | """ 221 | arr = [] 222 | count = hash_size * (hash_size // 4) 223 | if len(hexstr) != count: 224 | emsg = 'Expected hex string size of {}.' 225 | raise ValueError(emsg.format(count)) 226 | for i in range(count // 2): 227 | h = hexstr[i * 2:i * 2 + 2] 228 | v = int('0x' + h, 16) 229 | arr.append([v & 2**i > 0 for i in range(8)]) 230 | return ImageHash(numpy.array(arr)) 231 | 232 | 233 | def average_hash(image, hash_size=8, mean=numpy.mean): 234 | # type: (Image.Image, int, MeanFunc) -> ImageHash 235 | """ 236 | Average Hash computation 237 | 238 | Implementation follows https://www.hackerfactor.com/blog/index.php?/archives/432-Looks-Like-It.html 239 | 240 | Step by step explanation: https://web.archive.org/web/20171112054354/https://www.safaribooksonline.com/blog/2013/11/26/image-hashing-with-python/ # noqa: E501 241 | 242 | @image must be a PIL instance. 243 | @mean how to determine the average luminescence. can try numpy.median instead. 244 | """ 245 | if hash_size < 2: 246 | raise ValueError('Hash size must be greater than or equal to 2') 247 | 248 | # reduce size and complexity, then convert to grayscale 249 | image = image.convert('L').resize((hash_size, hash_size), ANTIALIAS) 250 | 251 | # find average pixel value; 'pixels' is an array of the pixel values, ranging from 0 (black) to 255 (white) 252 | pixels = numpy.asarray(image) 253 | avg = mean(pixels) 254 | 255 | # create string of bits 256 | diff = pixels > avg 257 | # make a hash 258 | return ImageHash(diff) 259 | 260 | 261 | def phash(image, hash_size=8, highfreq_factor=4): 262 | # type: (Image.Image, int, int) -> ImageHash 263 | """ 264 | Perceptual Hash computation. 265 | 266 | Implementation follows https://www.hackerfactor.com/blog/index.php?/archives/432-Looks-Like-It.html 267 | 268 | @image must be a PIL instance. 269 | """ 270 | if hash_size < 2: 271 | raise ValueError('Hash size must be greater than or equal to 2') 272 | 273 | import scipy.fftpack 274 | img_size = hash_size * highfreq_factor 275 | image = image.convert('L').resize((img_size, img_size), ANTIALIAS) 276 | pixels = numpy.asarray(image) 277 | dct = scipy.fftpack.dct(scipy.fftpack.dct(pixels, axis=0), axis=1) 278 | dctlowfreq = dct[:hash_size, :hash_size] 279 | med = numpy.median(dctlowfreq) 280 | diff = dctlowfreq > med 281 | return ImageHash(diff) 282 | 283 | 284 | def phash_simple(image, hash_size=8, highfreq_factor=4): 285 | # type: (Image.Image, int, int) -> ImageHash 286 | """ 287 | Perceptual Hash computation. 288 | 289 | Implementation follows https://www.hackerfactor.com/blog/index.php?/archives/432-Looks-Like-It.html 290 | 291 | @image must be a PIL instance. 292 | """ 293 | import scipy.fftpack 294 | img_size = hash_size * highfreq_factor 295 | image = image.convert('L').resize((img_size, img_size), ANTIALIAS) 296 | pixels = numpy.asarray(image) 297 | dct = scipy.fftpack.dct(pixels) 298 | dctlowfreq = dct[:hash_size, 1:hash_size + 1] 299 | avg = dctlowfreq.mean() 300 | diff = dctlowfreq > avg 301 | return ImageHash(diff) 302 | 303 | 304 | def dhash(image, hash_size=8): 305 | # type: (Image.Image, int) -> ImageHash 306 | """ 307 | Difference Hash computation. 308 | 309 | following https://www.hackerfactor.com/blog/index.php?/archives/529-Kind-of-Like-That.html 310 | 311 | computes differences horizontally 312 | 313 | @image must be a PIL instance. 314 | """ 315 | # resize(w, h), but numpy.array((h, w)) 316 | if hash_size < 2: 317 | raise ValueError('Hash size must be greater than or equal to 2') 318 | 319 | image = image.convert('L').resize((hash_size + 1, hash_size), ANTIALIAS) 320 | pixels = numpy.asarray(image) 321 | # compute differences between columns 322 | diff = pixels[:, 1:] > pixels[:, :-1] 323 | return ImageHash(diff) 324 | 325 | 326 | def dhash_vertical(image, hash_size=8): 327 | # type: (Image.Image, int) -> ImageHash 328 | """ 329 | Difference Hash computation. 330 | 331 | following https://www.hackerfactor.com/blog/index.php?/archives/529-Kind-of-Like-That.html 332 | 333 | computes differences vertically 334 | 335 | @image must be a PIL instance. 336 | """ 337 | # resize(w, h), but numpy.array((h, w)) 338 | image = image.convert('L').resize((hash_size, hash_size + 1), ANTIALIAS) 339 | pixels = numpy.asarray(image) 340 | # compute differences between rows 341 | diff = pixels[1:, :] > pixels[:-1, :] 342 | return ImageHash(diff) 343 | 344 | 345 | def whash(image, hash_size=8, image_scale=None, mode='haar', remove_max_haar_ll=True): 346 | # type: (Image.Image, int, int | None, WhashMode, bool) -> ImageHash 347 | """ 348 | Wavelet Hash computation. 349 | 350 | based on https://www.kaggle.com/c/avito-duplicate-ads-detection/ 351 | 352 | @image must be a PIL instance. 353 | @hash_size must be a power of 2 and less than @image_scale. 354 | @image_scale must be power of 2 and less than image size. By default is equal to max 355 | power of 2 for an input image. 356 | @mode (see modes in pywt library): 357 | 'haar' - Haar wavelets, by default 358 | 'db4' - Daubechies wavelets 359 | @remove_max_haar_ll - remove the lowest low level (LL) frequency using Haar wavelet. 360 | """ 361 | import pywt 362 | if image_scale is not None: 363 | assert image_scale & (image_scale - 1) == 0, 'image_scale is not power of 2' 364 | else: 365 | image_natural_scale = 2**int(numpy.log2(min(image.size))) 366 | image_scale = max(image_natural_scale, hash_size) 367 | 368 | ll_max_level = int(numpy.log2(image_scale)) 369 | 370 | level = int(numpy.log2(hash_size)) 371 | assert hash_size & (hash_size - 1) == 0, 'hash_size is not power of 2' 372 | assert level <= ll_max_level, 'hash_size in a wrong range' 373 | dwt_level = ll_max_level - level 374 | 375 | image = image.convert('L').resize((image_scale, image_scale), ANTIALIAS) 376 | pixels = numpy.asarray(image) / 255. 377 | 378 | # Remove low level frequency LL(max_ll) if @remove_max_haar_ll using haar filter 379 | if remove_max_haar_ll: 380 | coeffs = pywt.wavedec2(pixels, 'haar', level=ll_max_level) 381 | coeffs = list(coeffs) 382 | coeffs[0] *= 0 383 | pixels = pywt.waverec2(coeffs, 'haar') 384 | 385 | # Use LL(K) as freq, where K is log2(@hash_size) 386 | coeffs = pywt.wavedec2(pixels, mode, level=dwt_level) 387 | dwt_low = coeffs[0] 388 | 389 | # Subtract median and compute hash 390 | med = numpy.median(dwt_low) 391 | diff = dwt_low > med 392 | return ImageHash(diff) 393 | 394 | 395 | def colorhash(image, binbits=3): 396 | # type: (Image.Image, int) -> ImageHash 397 | """ 398 | Color Hash computation. 399 | 400 | Computes fractions of image in intensity, hue and saturation bins: 401 | 402 | * the first binbits encode the black fraction of the image 403 | * the next binbits encode the gray fraction of the remaining image (low saturation) 404 | * the next 6*binbits encode the fraction in 6 bins of saturation, for highly saturated parts of the remaining image 405 | * the next 6*binbits encode the fraction in 6 bins of saturation, for mildly saturated parts of the remaining image 406 | 407 | @binbits number of bits to use to encode each pixel fractions 408 | """ 409 | 410 | # bin in hsv space: 411 | intensity = numpy.asarray(image.convert('L')).flatten() 412 | h, s, v = [numpy.asarray(v).flatten() for v in image.convert('HSV').split()] 413 | # black bin 414 | mask_black = intensity < 256 // 8 415 | frac_black = mask_black.mean() 416 | # gray bin (low saturation, but not black) 417 | mask_gray = s < 256 // 3 418 | frac_gray = numpy.logical_and(~mask_black, mask_gray).mean() 419 | # two color bins (medium and high saturation, not in the two above) 420 | mask_colors = numpy.logical_and(~mask_black, ~mask_gray) 421 | mask_faint_colors = numpy.logical_and(mask_colors, s < 256 * 2 // 3) 422 | mask_bright_colors = numpy.logical_and(mask_colors, s > 256 * 2 // 3) 423 | 424 | c = max(1, mask_colors.sum()) 425 | # in the color bins, make sub-bins by hue 426 | hue_bins = numpy.linspace(0, 255, 6 + 1) 427 | if mask_faint_colors.any(): 428 | h_faint_counts, _ = numpy.histogram(h[mask_faint_colors], bins=hue_bins) 429 | else: 430 | h_faint_counts = numpy.zeros(len(hue_bins) - 1) 431 | if mask_bright_colors.any(): 432 | h_bright_counts, _ = numpy.histogram(h[mask_bright_colors], bins=hue_bins) 433 | else: 434 | h_bright_counts = numpy.zeros(len(hue_bins) - 1) 435 | 436 | # now we have fractions in each category (6*2 + 2 = 14 bins) 437 | # convert to hash and discretize: 438 | maxvalue = 2**binbits 439 | values = [min(maxvalue - 1, int(frac_black * maxvalue)), min(maxvalue - 1, int(frac_gray * maxvalue))] 440 | for counts in list(h_faint_counts) + list(h_bright_counts): 441 | values.append(min(maxvalue - 1, int(counts / c * maxvalue))) 442 | # print(values) 443 | bitarray = [] 444 | for v in values: 445 | bitarray += [v // (2**(binbits - i - 1)) % 2**(binbits - i) > 0 for i in range(binbits)] 446 | return ImageHash(numpy.asarray(bitarray).reshape((-1, binbits))) 447 | 448 | 449 | class ImageMultiHash: 450 | """ 451 | This is an image hash containing a list of individual hashes for segments of the image. 452 | The matching logic is implemented as described in Efficient Cropping-Resistant Robust Image Hashing 453 | """ 454 | 455 | def __init__(self, hashes): 456 | # type: (list[ImageHash]) -> None 457 | self.segment_hashes = hashes # type: list[ImageHash] 458 | 459 | def __eq__(self, other): 460 | # type: (object) -> bool 461 | if other is None: 462 | return False 463 | return self.matches(other) # type: ignore 464 | 465 | def __ne__(self, other): 466 | # type: (object) -> bool 467 | return not self.matches(other) # type: ignore 468 | 469 | def __sub__(self, other, hamming_cutoff=None, bit_error_rate=None): 470 | # type: (ImageMultiHash, float | None, float | None) -> float 471 | matches, sum_distance = self.hash_diff(other, hamming_cutoff, bit_error_rate) 472 | max_difference = len(self.segment_hashes) 473 | if matches == 0: 474 | return max_difference 475 | max_distance = matches * len(self.segment_hashes[0]) 476 | tie_breaker = 0 - (float(sum_distance) / max_distance) 477 | match_score = matches + tie_breaker 478 | return max_difference - match_score 479 | 480 | def __hash__(self): 481 | return hash(tuple(hash(segment) for segment in self.segment_hashes)) 482 | 483 | def __str__(self): 484 | return ','.join(str(x) for x in self.segment_hashes) 485 | 486 | def __repr__(self): 487 | return repr(self.segment_hashes) 488 | 489 | def hash_diff(self, other_hash, hamming_cutoff=None, bit_error_rate=None): 490 | # type: (ImageMultiHash, float | None, float | None) -> tuple[int, int] 491 | """ 492 | Gets the difference between two multi-hashes, as a tuple. The first element of the tuple is the number of 493 | matching segments, and the second element is the sum of the hamming distances of matching hashes. 494 | NOTE: Do not order directly by this tuple, as higher is better for matches, and worse for hamming cutoff. 495 | :param other_hash: The image multi hash to compare against 496 | :param hamming_cutoff: The maximum hamming distance to a region hash in the target hash 497 | :param bit_error_rate: Percentage of bits which can be incorrect, an alternative to the hamming cutoff. The 498 | default of 0.25 means that the segment hashes can be up to 25% different 499 | """ 500 | # Set default hamming cutoff if it's not set. 501 | if hamming_cutoff is None: 502 | if bit_error_rate is None: 503 | bit_error_rate = 0.25 504 | hamming_cutoff = len(self.segment_hashes[0]) * bit_error_rate 505 | # Get the hash distance for each region hash within cutoff 506 | distances = [] 507 | for segment_hash in self.segment_hashes: 508 | lowest_distance = min( 509 | segment_hash - other_segment_hash 510 | for other_segment_hash in other_hash.segment_hashes 511 | ) 512 | if lowest_distance > hamming_cutoff: 513 | continue 514 | distances.append(lowest_distance) 515 | return len(distances), sum(distances) 516 | 517 | def matches(self, other_hash, region_cutoff=1, hamming_cutoff=None, bit_error_rate=None): 518 | # type: (ImageMultiHash, int, float | None, float | None) -> bool 519 | """ 520 | Checks whether this hash matches another crop resistant hash, `other_hash`. 521 | :param other_hash: The image multi hash to compare against 522 | :param region_cutoff: The minimum number of regions which must have a matching hash 523 | :param hamming_cutoff: The maximum hamming distance to a region hash in the target hash 524 | :param bit_error_rate: Percentage of bits which can be incorrect, an alternative to the hamming cutoff. The 525 | default of 0.25 means that the segment hashes can be up to 25% different 526 | """ 527 | matches, _ = self.hash_diff(other_hash, hamming_cutoff, bit_error_rate) 528 | return matches >= region_cutoff 529 | 530 | def best_match(self, other_hashes, hamming_cutoff=None, bit_error_rate=None): 531 | # type: (list[ImageMultiHash], float | None, float | None) -> ImageMultiHash 532 | """ 533 | Returns the hash in a list which is the best match to the current hash 534 | :param other_hashes: A list of image multi hashes to compare against 535 | :param hamming_cutoff: The maximum hamming distance to a region hash in the target hash 536 | :param bit_error_rate: Percentage of bits which can be incorrect, an alternative to the hamming cutoff. 537 | Defaults to 0.25 if unset, which means the hash can be 25% different 538 | """ 539 | return min( 540 | other_hashes, 541 | key=lambda other_hash: self.__sub__(other_hash, hamming_cutoff, bit_error_rate) 542 | ) 543 | 544 | 545 | def _find_region(remaining_pixels, segmented_pixels): 546 | """ 547 | Finds a region and returns a set of pixel coordinates for it. 548 | :param remaining_pixels: A numpy bool array, with True meaning the pixels are remaining to segment 549 | :param segmented_pixels: A set of pixel coordinates which have already been assigned to segment. This will be 550 | updated with the new pixels added to the returned segment. 551 | """ 552 | in_region = set() 553 | not_in_region = set() 554 | # Find the first pixel in remaining_pixels with a value of True 555 | available_pixels = numpy.transpose(numpy.nonzero(remaining_pixels)) 556 | start = tuple(available_pixels[0]) 557 | in_region.add(start) 558 | new_pixels = in_region.copy() 559 | while True: 560 | try_next = set() 561 | # Find surrounding pixels 562 | for pixel in new_pixels: 563 | x, y = pixel 564 | neighbours = [ 565 | (x - 1, y), 566 | (x + 1, y), 567 | (x, y - 1), 568 | (x, y + 1) 569 | ] 570 | try_next.update(neighbours) 571 | # Remove pixels we have already seen 572 | try_next.difference_update(segmented_pixels, not_in_region) 573 | # If there's no more pixels to try, the region is complete 574 | if not try_next: 575 | break 576 | # Empty new pixels set, so we know whose neighbour's to check next time 577 | new_pixels = set() 578 | # Check new pixels 579 | for pixel in try_next: 580 | if remaining_pixels[pixel]: 581 | in_region.add(pixel) 582 | new_pixels.add(pixel) 583 | segmented_pixels.add(pixel) 584 | else: 585 | not_in_region.add(pixel) 586 | return in_region 587 | 588 | 589 | def _find_all_segments(pixels, segment_threshold, min_segment_size): 590 | """ 591 | Finds all the regions within an image pixel array, and returns a list of the regions. 592 | 593 | Note: Slightly different segmentations are produced when using pillow version 6 vs. >=7, due to a change in 594 | rounding in the greyscale conversion. 595 | :param pixels: A numpy array of the pixel brightnesses. 596 | :param segment_threshold: The brightness threshold to use when differentiating between hills and valleys. 597 | :param min_segment_size: The minimum number of pixels for a segment. 598 | """ 599 | img_width, img_height = pixels.shape 600 | # threshold pixels 601 | threshold_pixels = pixels > segment_threshold 602 | unassigned_pixels = numpy.full(pixels.shape, True, dtype=bool) 603 | 604 | segments = [] 605 | already_segmented = set() 606 | 607 | # Add all the pixels around the border outside the image: 608 | already_segmented.update([(-1, z) for z in range(img_height)]) 609 | already_segmented.update([(z, -1) for z in range(img_width)]) 610 | already_segmented.update([(img_width, z) for z in range(img_height)]) 611 | already_segmented.update([(z, img_height) for z in range(img_width)]) 612 | 613 | # Find all the "hill" regions 614 | while numpy.bitwise_and(threshold_pixels, unassigned_pixels).any(): 615 | remaining_pixels = numpy.bitwise_and(threshold_pixels, unassigned_pixels) 616 | segment = _find_region(remaining_pixels, already_segmented) 617 | # Apply segment 618 | if len(segment) > min_segment_size: 619 | segments.append(segment) 620 | for pix in segment: 621 | unassigned_pixels[pix] = False 622 | 623 | # Invert the threshold matrix, and find "valleys" 624 | threshold_pixels_i = numpy.invert(threshold_pixels) 625 | while len(already_segmented) < img_width * img_height: 626 | remaining_pixels = numpy.bitwise_and(threshold_pixels_i, unassigned_pixels) 627 | segment = _find_region(remaining_pixels, already_segmented) 628 | # Apply segment 629 | if len(segment) > min_segment_size: 630 | segments.append(segment) 631 | for pix in segment: 632 | unassigned_pixels[pix] = False 633 | 634 | return segments 635 | 636 | 637 | def crop_resistant_hash( 638 | image, # type: Image.Image 639 | hash_func=dhash, # type: HashFunc 640 | limit_segments=None, # type: int | None 641 | segment_threshold=128, # type: int 642 | min_segment_size=500, # type: int 643 | segmentation_image_size=300 # type: int 644 | ): 645 | # type: (...) -> ImageMultiHash 646 | """ 647 | Creates a CropResistantHash object, by the algorithm described in the paper "Efficient Cropping-Resistant Robust 648 | Image Hashing". DOI 10.1109/ARES.2014.85 649 | This algorithm partitions the image into bright and dark segments, using a watershed-like algorithm, and then does 650 | an image hash on each segment. This makes the image much more resistant to cropping than other algorithms, with 651 | the paper claiming resistance to up to 50% cropping, while most other algorithms stop at about 5% cropping. 652 | 653 | Note: Slightly different segmentations are produced when using pillow version 6 vs. >=7, due to a change in 654 | rounding in the greyscale conversion. This leads to a slightly different result. 655 | :param image: The image to hash 656 | :param hash_func: The hashing function to use 657 | :param limit_segments: If you have storage requirements, you can limit to hashing only the M largest segments 658 | :param segment_threshold: Brightness threshold between hills and valleys. This should be static, putting it between 659 | peak and through dynamically breaks the matching 660 | :param min_segment_size: Minimum number of pixels for a hashable segment 661 | :param segmentation_image_size: Size which the image is resized to before segmentation 662 | """ 663 | 664 | orig_image = image.copy() 665 | # Convert to gray scale and resize 666 | image = image.convert('L').resize((segmentation_image_size, segmentation_image_size), ANTIALIAS) 667 | # Add filters 668 | image = image.filter(ImageFilter.GaussianBlur()).filter(ImageFilter.MedianFilter()) 669 | pixels = numpy.array(image).astype(numpy.float32) 670 | 671 | segments = _find_all_segments(pixels, segment_threshold, min_segment_size) 672 | 673 | # If there are no segments, have 1 segment including the whole image 674 | if not segments: 675 | full_image_segment = {(0, 0), (segmentation_image_size - 1, segmentation_image_size - 1)} 676 | segments.append(full_image_segment) 677 | 678 | # If segment limit is set, discard the smaller segments 679 | if limit_segments: 680 | segments = sorted(segments, key=lambda s: len(s), reverse=True)[:limit_segments] 681 | 682 | # Create bounding box for each segment 683 | hashes = [] 684 | for segment in segments: 685 | orig_w, orig_h = orig_image.size 686 | scale_w = float(orig_w) / segmentation_image_size 687 | scale_h = float(orig_h) / segmentation_image_size 688 | min_y = min(coord[0] for coord in segment) * scale_h 689 | min_x = min(coord[1] for coord in segment) * scale_w 690 | max_y = (max(coord[0] for coord in segment) + 1) * scale_h 691 | max_x = (max(coord[1] for coord in segment) + 1) * scale_w 692 | # Compute robust hash for each bounding box 693 | bounding_box = orig_image.crop((min_x, min_y, max_x, max_y)) 694 | hashes.append(hash_func(bounding_box)) 695 | # Show bounding box 696 | # im_segment = image.copy() 697 | # for pix in segment: 698 | # im_segment.putpixel(pix[::-1], 255) 699 | # im_segment.show() 700 | # bounding_box.show() 701 | 702 | return ImageMultiHash(hashes) 703 | --------------------------------------------------------------------------------