├── LICENSE ├── README.md ├── generateHashes.py ├── install.bat └── install.sh /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2021 Jan Kaiser 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # pyPhotoDNA 2 | Calculate PhotoDNA hashes using Python 3 | 4 | ## Setup 5 | 1) Clone this repo 6 | 2) Run `install.bat` if you are on Windows, or `install.sh` if you are on a Mac or Linux. 7 | 3) Once the setup is complete, run `generateHashes.py` to generate hashes. 8 | 9 | 10 | If you want to learn more about PhotoDNA, head over to [jPhotoDNA](https://github.com/jankais3r/jPhotoDNA). 11 | -------------------------------------------------------------------------------- /generateHashes.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | 4 | import os 5 | import sys 6 | import glob 7 | import time 8 | import base64 9 | import multiprocessing 10 | from ctypes import cast 11 | from ctypes import cdll 12 | from ctypes import c_int 13 | from ctypes import c_ubyte 14 | from ctypes import POINTER 15 | from ctypes import c_char_p 16 | try: 17 | from PIL import Image, ImageFile 18 | ImageFile.LOAD_TRUNCATED_IMAGES = True 19 | except: 20 | print('Install Pillow with "pip3 install Pillow"') 21 | quit() 22 | 23 | inputFolder = r'C:\images\to\be\hashed' 24 | 25 | def generateHash(outputFolder, libName, imagePath): 26 | try: 27 | workerId = multiprocessing.current_process().name 28 | imageFile = Image.open(imagePath, 'r') 29 | if imageFile.mode != 'RGB': 30 | imageFile = imageFile.convert(mode = 'RGB') 31 | libPhotoDNA = cdll.LoadLibrary(os.path.join(outputFolder, libName)) 32 | 33 | ComputeRobustHash = libPhotoDNA.ComputeRobustHash 34 | ComputeRobustHash.argtypes = [c_char_p, c_int, c_int, c_int, POINTER(c_ubyte), c_int] 35 | ComputeRobustHash.restype = c_ubyte 36 | 37 | hashByteArray = (c_ubyte * 144)() 38 | ComputeRobustHash(c_char_p(imageFile.tobytes()), imageFile.width, imageFile.height, 0, hashByteArray, 0) 39 | 40 | hashPtr = cast(hashByteArray, POINTER(c_ubyte)) 41 | hashList = [str(hashPtr[i]) for i in range(144)] 42 | hashString = ','.join([i for i in hashList]) 43 | hashList = hashString.split(',') 44 | for i, hashPart in enumerate(hashList): 45 | hashList[i] = int(hashPart).to_bytes((len(hashPart) + 7) // 8, 'big') 46 | hashBytes = b''.join(hashList) 47 | 48 | with open(os.path.join(outputFolder, workerId + '.txt'), 'a', encoding = 'utf8') as outputFile: 49 | #outputFile.write('"' + imagePath + '","' + hashString + '"\n') # uncomment if you prefer base10 hashes 50 | outputFile.write('"' + imagePath + '","' + base64.b64encode(hashBytes).decode('utf-8') + '"\n') 51 | except Exception as e: 52 | print(e) 53 | 54 | if __name__ == '__main__': 55 | outputFolder = os.getcwd() 56 | if sys.platform == "win32": 57 | libName = 'PhotoDNAx64.dll' 58 | elif sys.platform == "darwin": 59 | libName = 'PhotoDNAx64.so' 60 | else: 61 | print('Linux is not supported.') 62 | quit() 63 | if (inputFolder == r'C:\images\to\be\hashed'): 64 | print('Please update the input folder path on row 23.') 65 | quit() 66 | startTime = time.time() 67 | print('Generating hashes for all images under ' + inputFolder) 68 | 69 | p = multiprocessing.Pool() 70 | print('Starting processing using ' + str(p._processes) + ' threads.') 71 | imageCount = 0 72 | images = glob.glob(os.path.join(inputFolder, '**', '*.jp*g'), recursive = True) 73 | images.extend(glob.glob(os.path.join(inputFolder, '**', '*.png'), recursive = True)) 74 | images.extend(glob.glob(os.path.join(inputFolder, '**', '*.gif'), recursive = True)) 75 | images.extend(glob.glob(os.path.join(inputFolder, '**', '*.bmp'), recursive = True)) 76 | for f in images: 77 | imageCount = imageCount + 1 78 | p.apply_async(generateHash, [outputFolder, libName, f]) 79 | p.close() 80 | p.join() 81 | 82 | allHashes = [] 83 | for i in range(p._processes): 84 | try: 85 | workerId = 'SpawnPoolWorker-' + str(i + 1) 86 | with open(os.path.join(outputFolder, workerId + '.txt'), 'r', encoding = 'utf8') as inputFile: 87 | fileContents = inputFile.read().splitlines() 88 | allHashes.extend(fileContents) 89 | os.remove(os.path.join(outputFolder, workerId + '.txt')) 90 | #print('Merged the ' + workerId + ' output.') 91 | except FileNotFoundError: 92 | #print(workerId + ' not used. Skipping.') 93 | pass 94 | 95 | with open(os.path.join(outputFolder, 'hashes.csv'), 'a', encoding = 'utf8', errors = 'ignore') as f: 96 | for word in allHashes: 97 | f.write(str(word) + '\n') 98 | 99 | print('Results saved into ' + os.path.join(outputFolder, 'hashes.csv')) 100 | print('Generated hashes for ' + f'{imageCount:,}' + ' images in ' + str(int(round((time.time() - startTime)))) + ' seconds.') -------------------------------------------------------------------------------- /install.bat: -------------------------------------------------------------------------------- 1 | @echo off 2 | 3 | echo. 4 | echo Welcome to the pyPhotoDNA installer. 5 | echo The script will now setup a pyPhotoDNA environment for you. Please be patient. 6 | 7 | echo. 8 | echo Downloading FTK (3.3GB, might take a while)... 9 | bitsadmin /transfer "Downloading FTK" /priority HIGH https://d1kpmuwb7gvu1i.cloudfront.net/AD_FTK_7.0.0.iso %cd%\AD_FTK_7.0.0.iso > nul 10 | 11 | echo. 12 | echo Extracting PhotoDNAx64.dll. 13 | powershell.exe -Command "&{$mountResult = Mount-DiskImage %cd%\\AD_FTK_7.0.0.iso; $driveLetter = ($mountResult | Get-Volume).DriveLetter; $setupPath = \"$($driveLetter):\FTK\FTK\x64\{592825BA-2875-4476-9820-858BA282D876}\Data1.cab\"; echo "'Setup file is' $setupPath"; $finalString = expand $setupPath /f:photodnax64.1.72.dll %cd%; Get-Volume $driveLetter | Get-DiskImage | Dismount-DiskImage }" > nul 14 | del AD_FTK_7.0.0.iso 15 | rename photodnax64.1.72.dll PhotoDNAx64.dll 16 | 17 | echo. 18 | echo. 19 | echo Installation complete! 20 | echo _____________________________ 21 | echo. 22 | echo To generate a PhotoDNA hash, run generateHashes.py. 23 | pause -------------------------------------------------------------------------------- /install.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | echo 4 | echo "Welcome to the pyPhotoDNA installer." 5 | echo "The script will now setup a pyPhotoDNA environment for you. Please be patient." 6 | echo 7 | 8 | if [ "$(uname)" == "Darwin" ]; then 9 | echo "Download link for native macOS version currently unavailable." 10 | else 11 | if ! [ -x "$(command -v curl)" ]; then 12 | echo "Dependency missing. Please install 'curl' and re-run the installer." 13 | exit 1 14 | fi 15 | if ! [ -x "$(command -v wine64)" ]; then 16 | echo "Dependency missing. Please install 'wine64' and re-run the installer." 17 | exit 1 18 | fi 19 | if ! [ -x "$(command -v cabextract)" ]; then 20 | echo "Dependency missing. Please install 'cabextract' and re-run the installer." 21 | exit 1 22 | fi 23 | if ! [ -x "$(command -v isoinfo)" ]; then 24 | echo "Dependency missing. Please install 'genisoimage' and re-run the installer." 25 | exit 1 26 | fi 27 | 28 | echo "Downloading FTK (3.3GB, might take a while)..." 29 | curl -LO https://d1kpmuwb7gvu1i.cloudfront.net/AD_FTK_7.0.0.iso 30 | 31 | echo 32 | echo "Extracting PhotoDNAx64.dll." 33 | isoinfo -i AD_FTK_7.0.0.iso -x /FTK/FTK/X64/_8A89F09/DATA1.CAB > Data1.cab 34 | rm AD_FTK_7.0.0.iso 35 | cabextract -d tmp -q Data1.cab 36 | rm Data1.cab 37 | mv tmp/photodnax64.1.72.dll PhotoDNAx64.dll 38 | rm -rf tmp 39 | 40 | echo 41 | echo "Downloading minimal Python for Wine..." 42 | curl -LO https://github.com/jankais3r/pyPhotoDNA/releases/download/wine_python_39/wine_python_39.tar.gz 43 | tar -xf wine_python_39.tar.gz 44 | rm wine_python_39.tar.gz 45 | 46 | echo 47 | echo 48 | echo "Installation complete!" 49 | echo "_____________________________" 50 | echo 51 | echo "To generate a PhotoDNA hash, run: WINEDEBUG=-all wine64 python-3.9.12-embed-amd64/python.exe generateHashes.py" 52 | fi 53 | --------------------------------------------------------------------------------