├── .gitignore ├── README.md ├── align.py ├── aligned_dataset.pickle ├── calc_map.py ├── calc_pre_rec.py ├── conda_macos.yml ├── conda_ubuntu.yml ├── confusion.py ├── dataset.pickle ├── dataset.py ├── ddh.py ├── ddh2.py ├── ddh3.py ├── ddh4.py ├── eval_perf.py ├── gather.py ├── hamming_dist.py ├── logger.py ├── predict.py ├── run.py ├── stats.py ├── utils.py └── viz.py /.gitignore: -------------------------------------------------------------------------------- 1 | .ipynb_checkpoints 2 | __pycache__ 3 | .DS_Store 4 | data/* 5 | aligned_data/* 6 | logs/* 7 | saved_models/* 8 | stats/* 9 | codes/* 10 | *.pyc 11 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Face Hasing Using Neural Networks 2 | 3 | ## File structure 4 | 5 | ```bash 6 | |--- code/ # contains code for the project 7 | | |--- data/ # contains preprocessed data 8 | |--- facescrub/ # from https://github.com/faceteam/facescrub.git 9 | | |--- download/ # data from the download.py script 10 | | |--- download.py # script to download data 11 | ``` 12 | 13 | ## Packages 14 | Run `source activate cs670project` to activate project conda environment. 15 | 16 | ### Package Versions 17 | - python=3.6 18 | - numpy=1.15.4 19 | - scipy=1.1.0 20 | - opencv-python=3.4.3.18 21 | - matplotlib=3.0.1 22 | - jupyter=1.0.0 23 | - sklearn=0.20.1 (pip) 24 | - cmake=3.13.2 (pip) 25 | - dlib=19.16.0 (pip) 26 | 27 | ### Installing Pytorch 28 | `conda install pytorch torchvision -c pytorch` 29 | 30 | ## Miscellaneous Notes 31 | 32 | - Location of dataset https://github.com/faceteam/facescrub.git. **NOTE:** Need Python 2.7 to run download.py. 33 | 34 | ## Instructions 35 | 36 | 1. Download miniconda and pip, 37 | 2. Install the packages noted above, 38 | 3. `git clone https://github.com/faceteam/facescrub.git` into the same level as the project's path (see file structure above), 39 | 4. Run `python download.py` with Python 2.7 to download the FaceScrub images, 40 | 5. Run `python utils.py` to preprocess the images and move them into the project's ./data folder, 41 | 42 | - To run on a specific CUDA device, run `python run.py CUDA_VISIBLE_DEVICES=#` where # is the number of the device in `nvidia-smi`. 43 | -------------------------------------------------------------------------------- /align.py: -------------------------------------------------------------------------------- 1 | import os 2 | import cv2 3 | import dlib 4 | import numpy as np 5 | from pdb import set_trace 6 | from matplotlib import pyplot as plt 7 | from utils import lsdir, mkdir 8 | 9 | # From 10 | # https://www.pyimagesearch.com/2017/04/03/facial-landmarks-dlib-opencv-python/ 11 | PREDICTOR_MODEL_PATH = "./saved_models/shape_predictor_68_face_landmarks.dat" 12 | RIGHT_EYE_IDX = np.arange(36, 42) 13 | LEFT_EYE_IDX = np.arange(42, 48) 14 | 15 | def align(img_path, **kwargs): 16 | ''' 17 | https://www.pyimagesearch.com/2017/05/22/face-alignment-with-opencv-and-python/ 18 | ''' 19 | desired_left_x = kwargs.get("desired_left_eye_x", 0.25) 20 | img_width = kwargs.get("img_width", 400) 21 | 22 | detector = dlib.get_frontal_face_detector() 23 | predictor = dlib.shape_predictor(PREDICTOR_MODEL_PATH) 24 | img = cv2.imread(img_path) 25 | img = cv2.resize(img, (img_width, img_width)) 26 | img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) 27 | gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) 28 | rects = detector(gray, 1) 29 | # get all of the 68 points 30 | shape = _shape_to_np(predictor(gray, rects[0])) 31 | # center of all eye points 32 | left_eye_center = shape[LEFT_EYE_IDX, :].mean(axis=0).astype('int') 33 | right_eye_center = shape[RIGHT_EYE_IDX, :].mean(axis=0).astype('int') 34 | # difference between x and y 35 | dy = right_eye_center[1] - left_eye_center[1] 36 | dx = right_eye_center[0] - left_eye_center[0] 37 | # rotated angle 38 | angle = np.degrees(np.arctan2(dy, dx)) - 180 39 | # where the eyes are along x-axis 40 | desired_right_x = 1.0 - desired_left_x 41 | # calculate scale to get to desired size 42 | dist = np.sqrt((dx ** 2) + (dy ** 2)) 43 | desired_dist = desired_right_x - desired_left_x 44 | desired_dist *= round(img_width * 0.7) 45 | scale = desired_dist / dist 46 | # calculate eye center 47 | eye_center = ((left_eye_center[0] + right_eye_center[0])) // 2, \ 48 | ((left_eye_center[1] + right_eye_center[1])) // 2 49 | # rotational matrix 50 | M = cv2.getRotationMatrix2D(eye_center, angle, scale) 51 | # update translation component 52 | tx = img_width * 0.45 53 | ty = img_width * desired_left_x + 20 54 | M[0,2] += (tx - eye_center[0]) 55 | M[1,2] += (ty - eye_center[1]) 56 | output = cv2.warpAffine(img_rgb, M, (img_width, img_width)) 57 | return output[:350, :350, :] 58 | 59 | def _shape_to_np(shape, dtype="int"): 60 | ''' 61 | From 62 | https://www.pyimagesearch.com/2017/04/03/facial-landmarks-dlib-opencv-python/ 63 | ''' 64 | coords = np.zeros((68, 2), dtype=dtype) 65 | for i in range(0, 68): 66 | coords[i] = (shape.part(i).x, shape.part(i).y) 67 | return coords 68 | 69 | def _align_imgs(img_files, from_path, to_path): 70 | counter = 0 71 | for img_name in img_files: 72 | img_path = from_path + "/" + img_name 73 | aligned_path = to_path + "/" + img_name 74 | try: 75 | aligned_img = align(img_path) 76 | cv2.imwrite(aligned_path, 77 | cv2.cvtColor(aligned_img, cv2.COLOR_RGB2BGR)) 78 | except Exception as error: 79 | pass 80 | finally: 81 | counter += 1 82 | return counter 83 | 84 | if __name__ == "__main__": 85 | # from dataset import FaceScrubDataset 86 | # dataset = FaceScrubDataset() 87 | # img_path = dataset.img_paths[4000] 88 | # img = cv2.imread(img_path) 89 | # rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) 90 | # output = align(img_path) 91 | # plt.imshow(output) 92 | # plt.show() 93 | 94 | root_path = "./data" 95 | target_path = "./aligned_data" 96 | mkdir(target_path) 97 | names = lsdir(root_path) 98 | counter = 0 99 | for name in names: 100 | from_path = root_path + "/" + name 101 | to_path = target_path + "/" + name 102 | mkdir(to_path) 103 | # training files 104 | img_names = list(filter(lambda x: ".jpg" in x, lsdir(from_path))) 105 | counter += _align_imgs(img_names, from_path, to_path) 106 | # validation files 107 | val_from_path = from_path + "/val" 108 | val_to_path = to_path + "/val" 109 | mkdir(val_to_path) 110 | img_names = lsdir(val_from_path) 111 | counter += _align_imgs(img_names, val_from_path, val_to_path) 112 | # test files 113 | test_from_path = from_path + "/test" 114 | test_to_path = to_path + "/test" 115 | mkdir(test_to_path) 116 | img_names = lsdir(test_from_path) 117 | counter += _align_imgs(img_names, test_from_path, test_to_path) 118 | print("Aligned {} images.".format(counter)) 119 | -------------------------------------------------------------------------------- /aligned_dataset.pickle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/treble-maker123/deep-face-hashing/0da530f5486685ae093d79994919b411dc4a4ac5/aligned_dataset.pickle -------------------------------------------------------------------------------- /calc_map.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | def calc_map(matches, rankings, top_k): 4 | ''' 5 | Calculate the mean average precision for each of the T test samples, from G 6 | training/gallery samples. 7 | 8 | Args: 9 | - matches (numpy.ndarray): a ground-truth matrix mapping whether a 10 | pair of gallery data and test data are of the same person. 11 | Shape (G, T). 12 | - rankings (numpy.ndarray): a sorted index of hamming distance ranking. 13 | Shape (G, T). 14 | - top_k (int): the number of shortest distance gallery images to 15 | compare to the test images. 16 | 17 | Returns: 18 | (numpy.ndarray): an array containing mean average precision scores for 19 | each of the T test samples. Of shape (T, ) 20 | ''' 21 | num_gallery, num_test = rankings.shape 22 | 23 | # if top_k is greater than the number of training images, set it to the 24 | # number of training images, only for testing. 25 | if num_gallery < top_k: top_k = num_gallery 26 | 27 | correct_retrievals = np.zeros((top_k, num_test), dtype="int8") 28 | 29 | for idx in range(num_test): 30 | # get the top_k matches for the test data 31 | rank_slice = rankings[:top_k, idx] 32 | correct_retrievals[:, idx] = matches[rank_slice, idx] 33 | 34 | # [[1,2,3,...,top_k], [1,2,3,...,top_k]] 35 | # to facilitate calculating the average precision 36 | idx = np.linspace(1, top_k, top_k, dtype="int").reshape((top_k, 1)) 37 | correct_idx = np.repeat(idx, num_test, axis=1) 38 | # sum up all of the scores across the top_k for each test sample 39 | summed_scores = np.zeros_like(correct_idx) 40 | for idx in range(top_k): 41 | # sum the number of correct retrievals from 1 up to idx 42 | summed_scores[idx, :] = correct_retrievals[:idx+1, :].sum(axis=0) 43 | 44 | # mask out the scores for the incorrect retrievals 45 | summed_scores = summed_scores * correct_retrievals 46 | # count the number of correct retrievals to be divided 47 | num_corrects = correct_retrievals.sum(axis=0) 48 | 49 | # calculate mean average precision 50 | ap = (summed_scores / correct_idx).sum(axis=0) / num_corrects 51 | # set the nan values to 0 52 | ap[ap != ap] = 0 53 | 54 | return ap.mean() 55 | 56 | if __name__ == "__main__": 57 | matches = np.array([ 58 | [1,1,0,1,0], 59 | [1,0,1,0,1], 60 | [1,1,1,1,0], 61 | [0,0,0,0,0], 62 | [0,0,1,0,1] 63 | ], dtype="int8") 64 | ranking = np.array([ 65 | [0,3,2,1,0], 66 | [4,0,4,0,2], 67 | [1,4,1,2,3], 68 | [3,1,0,3,1], 69 | [2,2,3,4,4] 70 | ], dtype="int8") 71 | mean_ap = calc_map(matches, ranking, top_k=3) 72 | target = np.array([5/6, 1/2, 1, 7/12, 0]).mean() 73 | assert np.isclose(mean_ap, target), "Invalid calculation!" 74 | -------------------------------------------------------------------------------- /calc_pre_rec.py: -------------------------------------------------------------------------------- 1 | from sklearn.metrics import precision_recall_curve 2 | from pdb import set_trace 3 | 4 | def calc_pre_rec(hamm_dist, gt, radius): 5 | ''' 6 | Calculates the precision-recall curve values. 7 | ''' 8 | # distance within radius counts as 0 9 | dist = hamm_dist * (hamm_dist > radius) 10 | # normalize the distance values, so the smaller distance, the closer to 1 11 | max_val = dist.max() 12 | scores = ((max_val - dist) / max_val) ** 2 13 | scores[scores != scores] = 1 14 | # calculate the "micro average" of the curves 15 | pre_curve, rec_curve, _ = precision_recall_curve(gt.ravel(), scores.ravel()) 16 | 17 | # pred == 1 is what the model believes to be the person 18 | pred = (dist == 0).astype("int8") 19 | # true positives 20 | tp = (pred * gt).sum(axis=0) 21 | # recall 22 | rec = tp / gt.sum(axis=0) 23 | rec[rec != rec] = 0 24 | # precision 25 | pre = tp / pred.sum(axis=0) 26 | pre[pre != pre] = 0 27 | # harmonic mean 28 | hmean = 2 * (pre * rec) / (pre + rec) 29 | hmean[hmean != hmean] = 0 30 | 31 | return pre.mean(), rec.mean(), hmean.mean(), pre_curve, rec_curve 32 | -------------------------------------------------------------------------------- /conda_macos.yml: -------------------------------------------------------------------------------- 1 | name: cs670project 2 | channels: 3 | - pytorch 4 | - anaconda-fusion 5 | - defaults 6 | dependencies: 7 | - appnope=0.1.0=py36hf537a9a_0 8 | - backcall=0.1.0=py36_0 9 | - blas=1.0=mkl 10 | - bleach=3.0.2=py36_0 11 | - ca-certificates=2018.03.07=0 12 | - certifi=2018.10.15=py36_0 13 | - cffi=1.11.5=py36h6174b99_1 14 | - cycler=0.10.0=py36hfc81398_0 15 | - dbus=1.13.2=h760590f_1 16 | - decorator=4.3.0=py36_0 17 | - entrypoints=0.2.3=py36_2 18 | - expat=2.2.6=h0a44026_0 19 | - freetype=2.9.1=hb4e5f40_0 20 | - gettext=0.19.8.1=h15daf44_3 21 | - glib=2.56.2=hd9629dc_0 22 | - icu=58.2=h4b95b61_1 23 | - intel-openmp=2019.1=144 24 | - ipykernel=5.1.0=py36h39e3cac_0 25 | - ipython=7.2.0=py36h39e3cac_0 26 | - ipython_genutils=0.2.0=py36h241746c_0 27 | - ipywidgets=7.4.2=py36_0 28 | - jedi=0.13.1=py36_0 29 | - jinja2=2.10=py36_0 30 | - jpeg=9b=he5867d9_2 31 | - jsonschema=2.6.0=py36hb385e00_0 32 | - jupyter=1.0.0=py36_7 33 | - jupyter_client=5.2.3=py36_0 34 | - jupyter_console=6.0.0=py36_0 35 | - jupyter_core=4.4.0=py36_0 36 | - kiwisolver=1.0.1=py36h0a44026_0 37 | - libcxx=4.0.1=hcfea43d_1 38 | - libcxxabi=4.0.1=hcfea43d_1 39 | - libedit=3.1.20170329=hb402a30_2 40 | - libffi=3.2.1=h475c297_4 41 | - libgfortran=3.0.1=h93005f0_2 42 | - libiconv=1.15=hdd342a3_7 43 | - libpng=1.6.35=ha441bb4_0 44 | - libsodium=1.0.16=h3efe00b_0 45 | - libtiff=4.0.9=hcb84e12_2 46 | - markupsafe=1.1.0=py36h1de35cc_0 47 | - matplotlib=3.0.1=py36h54f8f79_0 48 | - mistune=0.8.4=py36h1de35cc_0 49 | - mkl=2018.0.3=1 50 | - mkl_fft=1.0.6=py36hb8a8100_0 51 | - mkl_random=1.0.1=py36h5d10147_1 52 | - nbconvert=5.3.1=py36_0 53 | - nbformat=4.4.0=py36h827af21_0 54 | - ncurses=6.1=h0a44026_0 55 | - ninja=1.8.2=py36h04f5b5a_1 56 | - notebook=5.7.2=py36_0 57 | - numpy=1.15.4=py36h6a91979_0 58 | - numpy-base=1.15.4=py36h8a80b8c_0 59 | - olefile=0.46=py36_0 60 | - openssl=1.1.1a=h1de35cc_0 61 | - pandoc=2.2.3.2=0 62 | - pandocfilters=1.4.2=py36_1 63 | - parso=0.3.1=py36_0 64 | - pcre=8.42=h378b8a2_0 65 | - pexpect=4.6.0=py36_0 66 | - pickleshare=0.7.5=py36_0 67 | - pillow=5.3.0=py36hb68e598_0 68 | - pip=18.1=py36_0 69 | - prometheus_client=0.4.2=py36_0 70 | - prompt_toolkit=2.0.7=py36_0 71 | - ptyprocess=0.6.0=py36_0 72 | - pycparser=2.19=py36_0 73 | - pygments=2.2.0=py36h240cd3f_0 74 | - pyparsing=2.3.0=py36_0 75 | - pyqt=5.9.2=py36h655552a_2 76 | - python=3.6.7=haf84260_0 77 | - python-dateutil=2.7.5=py36_0 78 | - pytz=2018.7=py36_0 79 | - pyzmq=17.1.2=py36h1de35cc_0 80 | - qt=5.9.6=h45cd832_2 81 | - qtconsole=4.4.2=py36_0 82 | - readline=7.0=h1de35cc_5 83 | - scipy=1.1.0=py36h28f7352_1 84 | - send2trash=1.5.0=py36_0 85 | - setuptools=40.6.2=py36_0 86 | - sip=4.19.8=py36h0a44026_0 87 | - six=1.11.0=py36_1 88 | - sqlite=3.25.3=ha441bb4_0 89 | - terminado=0.8.1=py36_1 90 | - testpath=0.4.2=py36_0 91 | - tk=8.6.8=ha441bb4_0 92 | - tornado=5.1.1=py36h1de35cc_0 93 | - traitlets=4.3.2=py36h65bd3ce_0 94 | - wcwidth=0.1.7=py36h8c6ec74_0 95 | - webencodings=0.5.1=py36_1 96 | - wheel=0.32.3=py36_0 97 | - widgetsnbextension=3.4.2=py36_0 98 | - xz=5.2.4=h1de35cc_4 99 | - zeromq=4.2.5=h0a44026_1 100 | - zlib=1.2.11=h1de35cc_3 101 | - pytorch=0.4.1=py36_cuda0.0_cudnn0.0_1 102 | - torchvision=0.2.1=py36_1 103 | - pip: 104 | - blessed==1.14.2 105 | - botocore==1.7.42 106 | - cement==2.8.2 107 | - docker-py==1.7.2 108 | - dockerpty==0.4.1 109 | - docopt==0.6.2 110 | - docutils==0.14 111 | - opencv-python==3.4.3.18 112 | - pathspec==0.5.0 113 | - requests==2.9.1 114 | - semantic-version==2.5.0 115 | - tabulate==0.7.5 116 | - termcolor==1.1.0 117 | - torch==0.4.1 118 | - websocket-client==0.44.0 119 | prefix: /Users/qui-gon/Workspace/anaconda3/envs/cs670project 120 | 121 | -------------------------------------------------------------------------------- /conda_ubuntu.yml: -------------------------------------------------------------------------------- 1 | name: cs670project 2 | channels: 3 | - pytorch 4 | - defaults 5 | dependencies: 6 | - backcall=0.1.0=py36_0 7 | - blas=1.0=mkl 8 | - bleach=3.0.2=py36_0 9 | - ca-certificates=2018.03.07=0 10 | - certifi=2018.10.15=py36_0 11 | - cffi=1.11.5=py36he75722e_1 12 | - cycler=0.10.0=py36h93f1223_0 13 | - dbus=1.13.2=h714fa37_1 14 | - decorator=4.3.0=py36_0 15 | - entrypoints=0.2.3=py36_2 16 | - expat=2.2.6=he6710b0_0 17 | - fontconfig=2.13.0=h9420a91_0 18 | - freetype=2.9.1=h8a8886c_1 19 | - glib=2.56.2=hd408876_0 20 | - gmp=6.1.2=h6c8ec71_1 21 | - gst-plugins-base=1.14.0=hbbd80ab_1 22 | - gstreamer=1.14.0=hb453b48_1 23 | - icu=58.2=h9c2bf20_1 24 | - intel-openmp=2019.1=144 25 | - ipykernel=5.1.0=py36h39e3cac_0 26 | - ipython=7.2.0=py36h39e3cac_0 27 | - ipython_genutils=0.2.0=py36_0 28 | - ipywidgets=7.4.2=py36_0 29 | - jedi=0.13.1=py36_0 30 | - jinja2=2.10=py36_0 31 | - jpeg=9b=h024ee3a_2 32 | - jsonschema=2.6.0=py36_0 33 | - jupyter=1.0.0=py36_7 34 | - jupyter_client=5.2.3=py36_0 35 | - jupyter_console=6.0.0=py36_0 36 | - jupyter_core=4.4.0=py36_0 37 | - kiwisolver=1.0.1=py36hf484d3e_0 38 | - libedit=3.1.20170329=h6b74fdf_2 39 | - libffi=3.2.1=hd88cf55_4 40 | - libgcc-ng=8.2.0=hdf63c60_1 41 | - libgfortran-ng=7.3.0=hdf63c60_0 42 | - libpng=1.6.35=hbc83047_0 43 | - libsodium=1.0.16=h1bed415_0 44 | - libstdcxx-ng=8.2.0=hdf63c60_1 45 | - libtiff=4.0.9=he85c1e1_2 46 | - libuuid=1.0.3=h1bed415_2 47 | - libxcb=1.13=h1bed415_1 48 | - libxml2=2.9.8=h26e45fe_1 49 | - markupsafe=1.1.0=py36h7b6447c_0 50 | - matplotlib=3.0.1=py36h5429711_0 51 | - mistune=0.8.4=py36h7b6447c_0 52 | - mkl=2018.0.3=1 53 | - mkl_fft=1.0.6=py36h7dd41cf_0 54 | - mkl_random=1.0.1=py36h4414c95_1 55 | - nbconvert=5.3.1=py36_0 56 | - nbformat=4.4.0=py36h31c9010_0 57 | - ncurses=6.1=he6710b0_1 58 | - ninja=1.8.2=py36h6bb024c_1 59 | - notebook=5.7.2=py36_0 60 | - numpy=1.15.4=py36h1d66e8a_0 61 | - numpy-base=1.15.4=py36h81de0dd_0 62 | - olefile=0.46=py36_0 63 | - openssl=1.0.2p=h14c3975_0 64 | - pandoc=2.2.3.2=0 65 | - pandocfilters=1.4.2=py36_1 66 | - parso=0.3.1=py36_0 67 | - pcre=8.42=h439df22_0 68 | - pexpect=4.6.0=py36_0 69 | - pickleshare=0.7.5=py36_0 70 | - pillow=5.3.0=py36h34e0f95_0 71 | - pip=18.1=py36_0 72 | - prometheus_client=0.4.2=py36_0 73 | - prompt_toolkit=2.0.7=py36_0 74 | - ptyprocess=0.6.0=py36_0 75 | - pycparser=2.19=py36_0 76 | - pygments=2.2.0=py36_0 77 | - pyparsing=2.3.0=py36_0 78 | - pyqt=5.9.2=py36h05f1152_2 79 | - python=3.6.6=h6e4f718_2 80 | - python-dateutil=2.7.5=py36_0 81 | - pytz=2018.7=py36_0 82 | - pyzmq=17.1.2=py36h14c3975_0 83 | - qt=5.9.6=h8703b6f_2 84 | - qtconsole=4.4.2=py36_0 85 | - readline=7.0=h7b6447c_5 86 | - scipy=1.1.0=py36hfa4b5c9_1 87 | - send2trash=1.5.0=py36_0 88 | - setuptools=40.6.2=py36_0 89 | - sip=4.19.8=py36hf484d3e_0 90 | - six=1.11.0=py36_1 91 | - sqlite=3.25.3=h7b6447c_0 92 | - terminado=0.8.1=py36_1 93 | - testpath=0.4.2=py36_0 94 | - tk=8.6.8=hbc83047_0 95 | - tornado=5.1.1=py36h7b6447c_0 96 | - traitlets=4.3.2=py36_0 97 | - wcwidth=0.1.7=py36_0 98 | - webencodings=0.5.1=py36_1 99 | - wheel=0.32.3=py36_0 100 | - widgetsnbextension=3.4.2=py36_0 101 | - xz=5.2.4=h14c3975_4 102 | - zeromq=4.2.5=hf484d3e_1 103 | - zlib=1.2.11=h7b6447c_3 104 | - pytorch=0.4.1=py36_py35_py27__9.0.176_7.1.2_2 105 | - torchvision=0.2.1=py36_1 106 | - pip: 107 | - opencv-python==3.4.4.19 108 | - torch==0.4.1.post2 109 | -------------------------------------------------------------------------------- /confusion.py: -------------------------------------------------------------------------------- 1 | import pickle 2 | from hamming_dist import * 3 | from functools import reduce 4 | from calc_pre_rec import calc_pre_rec 5 | 6 | CODES_PATH = "./codes" 7 | CODES_FILE = "/12-17_20-25-11_25F03D.codes" 8 | 9 | if __name__ == "__main__": 10 | with open(CODES_PATH + CODES_FILE, "rb") as file: 11 | codes = pickle.load(file) 12 | 13 | gallery_codes, gallery_labels, test_codes, test_labels = codes 14 | gallery_codes = np.array(gallery_codes) 15 | test_codes = np.array(test_codes) 16 | 17 | gallery_labels = gallery_labels.numpy() 18 | test_labels = test_labels.numpy() 19 | gt = gallery_labels == test_labels.T 20 | 21 | hamm_dist = hamming_dist(gallery_codes, test_codes) 22 | total = reduce(lambda x,y: x*y, hamm_dist.shape) 23 | pt = hamm_dist <= 2 24 | true_pos = ((pt == gt) * pt).sum() 25 | true_neg = ((~pt == ~gt) * ~pt).sum() 26 | false_pos = ((pt == ~gt) * pt).sum() 27 | false_neg = ((~pt == gt) * ~pt).sum() 28 | print(true_pos, true_neg, false_pos, false_neg, total) 29 | print(true_pos + true_neg) 30 | print(false_pos + false_neg) 31 | print(true_pos + false_pos) 32 | print(true_neg + false_neg) 33 | 34 | avg_pre, avg_rec, _, _, _ = calc_pre_rec(hamm_dist, gt, 2) 35 | -------------------------------------------------------------------------------- /dataset.pickle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/treble-maker123/deep-face-hashing/0da530f5486685ae093d79994919b411dc4a4ac5/dataset.pickle -------------------------------------------------------------------------------- /dataset.py: -------------------------------------------------------------------------------- 1 | import os 2 | import torch 3 | import numpy as np 4 | from torch.utils.data import Dataset 5 | import torchvision.transforms as T 6 | from utils import DATA_DIR, ALIGNED_DATA_DIR, get_data_path, mkdir, lsdir 7 | from pdb import set_trace 8 | from align import align 9 | from matplotlib import pyplot as plt 10 | import multiprocessing 11 | import torchvision.transforms.functional as tF 12 | from torch.utils.data.dataloader import default_collate 13 | from PIL import Image 14 | 15 | class FaceScrubDataset(Dataset): 16 | ''' 17 | The dataset has a total of 63903 images of 530 faces. The most images a person has is 191, and the least images a person has is 39. 18 | 19 | 20 | Statistics for "comparison" type, 21 | 22 | The combination of datapoints is (63903 choose 2), which is 23 | (63903 * 63902) / 2 = 2,041,764,753 24 | 25 | The permutation of datapoints is 26 | 63903 ** 2 = 4,083,593,409 27 | 28 | Five images per person set aside for validation, 29 | 7,022,500 30 | 31 | Five images per person set aside for test, 32 | 7,022,500 33 | ''' 34 | def __init__(self, **kwargs): 35 | hash_dim = kwargs.get("hash_dim", 48) 36 | type = kwargs.get("type", "label") 37 | mode = kwargs.get("mode", "train") 38 | transform = kwargs.get("transform", []) 39 | normalize = kwargs.get("normalize", False) 40 | align = kwargs.get("align", False) 41 | 42 | if mode not in ["train", "val", "test"]: 43 | raise Exception("Invalid dataset mode") 44 | if type not in ["label", "comparison"]: 45 | raise Exception("Invalid dataset type") 46 | 47 | self.data_dir = ALIGNED_DATA_DIR if align else DATA_DIR 48 | self.mode = mode 49 | self.type = type 50 | self.names = lsdir(self.data_dir) 51 | self.img_paths = self._get_all_img_paths() 52 | self.hash_dim = hash_dim 53 | self.transform = T.Compose(transform) 54 | 55 | def __len__(self): 56 | if self.type == "comparison": 57 | return len(self.img_paths) ** 2 58 | elif self.type == "label": 59 | # if self.mode == "train": return 67 60 | # elif self.mode == "val": return 2 61 | # else: return 2 62 | return len(self.img_paths) 63 | 64 | def __getitem__(self, index): 65 | if self.type == "comparison": 66 | return self._get_data_comparison(index) 67 | elif self.type == "label": 68 | return self._get_data_label(index) 69 | else: 70 | raise Exception("Invalid dataset type") 71 | 72 | def _get_data_comparison(self, index): 73 | ''' 74 | For __getitem__() method. Return data at index in the format, 75 | (baseline_image, comparison_image, label) 76 | 77 | Label is an integer specifying whether the baseline and comparison are of the same person. 1 is True and 0 is False. 78 | ''' 79 | baseline, compare = self._get_pair_from_index(index) 80 | label = baseline.split("/")[2] == compare.split("/")[2] 81 | bimg = self._get_img_from_path(baseline) 82 | cimg = self._get_img_from_path(compare) 83 | return (bimg, cimg, int(label)) 84 | 85 | def _get_data_label(self, index): 86 | ''' 87 | For __getitem__() method. Return data at index in the format, 88 | (image, hash_code) 89 | 90 | hash_code is a numpy array of integers of 0s and 1s, mapping the name 91 | of the peson into Hamming space. 92 | ''' 93 | img_path = self.img_paths[index] 94 | name = img_path.split("/")[2] 95 | try: 96 | output = self._get_img_from_path(img_path), self.names.index(name) 97 | except Exception as error: 98 | # print("Exception countered ({}): {}".format(index, error)) 99 | output = None 100 | 101 | return output 102 | 103 | def _get_pair_from_index(self, index): 104 | ''' 105 | Return the paths to a pair of images based on the index. 106 | ''' 107 | num_imgs = len(self.img_paths) 108 | x, y = index % num_imgs, index // num_imgs 109 | return self.img_paths[x], self.img_paths[y] 110 | 111 | def _get_folder_paths(self): 112 | ''' 113 | Return a list of folder paths for all of the people. 114 | ''' 115 | return list(map(lambda name: self.data_dir + "/" + name, self.names)) 116 | 117 | def _get_all_img_paths(self): 118 | ''' 119 | Return a list of all image paths. 120 | ''' 121 | paths = list(map(self._get_img_paths, self.names)) 122 | return sum(paths, []) 123 | 124 | def _get_img_paths(self, name): 125 | ''' 126 | Returns a list of image paths for the given person. 127 | ''' 128 | folder = self.data_dir + "/" + name 129 | 130 | if self.mode == "train": 131 | pass 132 | elif self.mode == "val": 133 | folder += "/val" 134 | elif self.mode == "test": 135 | folder += "/test" 136 | else: 137 | raise Exception("Invalid dataset mode") 138 | 139 | files = list(filter(lambda f: f not in ["val", "test"], lsdir(folder))) 140 | return list(map(lambda fp: folder + "/" + fp, files)) 141 | 142 | def _get_img_from_path(self, path): 143 | ''' 144 | Returns an image and applies the transformations defined in self.transform. 145 | ''' 146 | img = Image.open(path) 147 | if self.transform is not None: 148 | img = self.transform(img) 149 | return img 150 | 151 | def invalid_collate(batch): 152 | batch = list(filter(lambda X: X is not None, batch)) 153 | return default_collate(batch) 154 | 155 | def create_set(mode, num_imgs=5): 156 | ''' 157 | This method randomly picks num_imgs images from the DATA_DIR folder and places them in a folder. 158 | ''' 159 | options = ["val", "test"] 160 | if mode not in options: return 161 | # path of all of the people names, "./name" 162 | name_paths = list(map(lambda name: DATA_DIR + "/" + name, lsdir(DATA_DIR))) 163 | for path in name_paths: 164 | # "./name/val" 165 | test_path = path + "/" + mode 166 | mkdir(test_path) 167 | file_names = list(filter(lambda i: i not in options, lsdir(path))) 168 | num_names = len(file_names) 169 | idx = list(set(np.random.randint(0, num_names, num_names)))[:num_imgs] 170 | for i in idx: 171 | os.rename(path+"/"+file_names[i], test_path+"/"+file_names[i]) 172 | 173 | def undo_create_set(mode): 174 | ''' 175 | This method will undo create_test_set(). 176 | ''' 177 | options = ["val", "test"] 178 | if mode not in options: return 179 | # path of all of the people names 180 | name_paths = list(map(lambda name: DATA_DIR + "/" + name,lsdir(DATA_DIR))) 181 | for path in name_paths: 182 | test_path = path + "/" + mode 183 | if not os.path.exists(test_path): 184 | continue 185 | test_imgs = lsdir(test_path) 186 | for i in range(len(test_imgs)): 187 | os.rename(test_path+"/"+test_imgs[i], path+"/"+test_imgs[i]) 188 | 189 | def assert_data_split_correct(): 190 | undo_create_set("val") 191 | undo_create_set("test") 192 | total_num = len(FaceScrubDataset(mode="train")) 193 | num_people = len(FaceScrubDataset(mode="train").names) 194 | assert total_num == 4083593409, "INCORRECT NUMBER OF IMAGES" 195 | create_set("val") 196 | create_set("test") 197 | train = len(FaceScrubDataset(mode="train")) 198 | val = len(FaceScrubDataset(mode="val")) 199 | test = len(FaceScrubDataset(mode="test")) 200 | assert val == (num_people * 5) ** 2 201 | assert test == (num_people * 5) ** 2 202 | 203 | def calc_mean(X): 204 | array = np.asarray(X[0]) 205 | R = array[:,:,0].mean() 206 | G = array[:,:,1].mean() 207 | B = array[:,:,2].mean() 208 | return R, G, B 209 | 210 | def calc_std(X): 211 | array = np.asarray(X[0]) 212 | R = array[:,:,0].std() 213 | G = array[:,:,1].std() 214 | B = array[:,:,2].std() 215 | return R, G, B 216 | 217 | def get_mean_std(): 218 | dataset = FaceScrubDataset(type="label") 219 | pool = multiprocessing.Pool(max(1, multiprocessing.cpu_count()-2)) 220 | print("Started calculating mean and stds") 221 | means = pool.map(calc_mean, dataset) 222 | stds = pool.map(calc_std, dataset) 223 | pool.close() 224 | pool.join() 225 | return means, stds 226 | 227 | if __name__ == "__main__": 228 | TRANSFORMS = [ 229 | T.Resize((64, 64)), 230 | T.ToTensor() 231 | ] 232 | dataset = FaceScrubDataset(transform=TRANSFORMS) 233 | img = dataset[4000] 234 | # assert_data_split_correct() 235 | 236 | # means, stds = get_mean_std() 237 | # red_mean = 0.6118626050840847 238 | # green_mean = 0.4627732225147951 239 | # blue_mean = 0.39181750819165523 240 | # red_std = 0.24004882860157573 241 | # green_std = 0.20515205679125115 242 | # blue_std = 0.19287499225344598 243 | pass 244 | -------------------------------------------------------------------------------- /ddh.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.optim as optim 4 | import torch.nn.functional as F 5 | import torchvision.transforms as T 6 | import multiprocessing 7 | from time import time 8 | from torch.utils.data import DataLoader 9 | from matplotlib import pyplot as plt 10 | 11 | from dataset import * 12 | 13 | class DDH(nn.Module): 14 | ''' 15 | # ========================================================================== 16 | # Discriminative Deep Hashing for Scalable Face Image Retrieval 17 | # https://www.ijcai.org/proceedings/2017/0315.pdf 18 | # ========================================================================== 19 | 20 | Image resized to 32x32, batch size of 256 21 | 22 | Conv1 = 3x3 kernel, 1 stride, 20 dim (output 31x31) 23 | Batch 24 | Pool1 = 2x2 kernel (output 15x15) 25 | 26 | Conv2 = 2x2 kernel, 1 stride, 40 dim (output 14x14) 27 | Batch 28 | Pool2 = 2x2 kernel (output 7x7) 29 | 30 | Conv3 = 2x2 kernel, 1 stride, 60 dim (output 6x6) 31 | Batch 32 | Pool3 = 2x2 kernel (output 3x3) 33 | 34 | Conv4 = 2x2 kernel, 1 stride, 80 dim (output 2x2) 35 | Batch 36 | 37 | Merge = 60*3*3 + 80*2*2 = 860 38 | 39 | Split into K groups, let K = 96 40 | 41 | 480 face features 42 | 48 groups of 10 features 43 | 48-bits 44 | 45 | # ========================================================================== 46 | # Simultaneous Feature Learning and Hash Coding with Deep Neural Networks 47 | # https://arxiv.org/pdf/1504.03410.pdf 48 | # ========================================================================== 49 | ''' 50 | def __init__(self, hash_dim=48, split_num=10, num_classes=530): 51 | super().__init__() 52 | self.cn1 = nn.Conv2d(3, 20, kernel_size=3) 53 | nn.init.kaiming_normal_(self.cn1.weight) 54 | self.bn1 = nn.BatchNorm2d(20) 55 | self.mp1 = nn.MaxPool2d(2) 56 | 57 | self.cn2 = nn.Conv2d(20, 40, kernel_size=2) 58 | nn.init.kaiming_normal_(self.cn2.weight) 59 | self.bn2 = nn.BatchNorm2d(40) 60 | self.mp2 = nn.MaxPool2d(2) 61 | 62 | self.cn3 = nn.Conv2d(40, 60, kernel_size=2) 63 | nn.init.kaiming_normal_(self.cn3.weight) 64 | self.bn3 = nn.BatchNorm2d(60) 65 | self.mp3 = nn.MaxPool2d(2) 66 | 67 | self.cn4 = nn.Conv2d(60, 80, kernel_size=2) 68 | nn.init.kaiming_normal_(self.cn4.weight) 69 | self.bn4 = nn.BatchNorm2d(80) 70 | 71 | # merge layer 72 | self.mg1 = Merge() 73 | self.fc1 = nn.Linear(29180, hash_dim*split_num) 74 | 75 | # hash layer 76 | self.de1 = DivideEncode(hash_dim*split_num, split_num) 77 | 78 | self.fc2 = nn.Linear(hash_dim, num_classes) 79 | 80 | def forward(self, X): 81 | l1 = self.mp1(F.relu(self.bn1(self.cn1(X)))) 82 | l2 = self.mp2(F.relu(self.bn2(self.cn2(l1)))) 83 | l3 = self.mp3(F.relu(self.bn3(self.cn3(l2)))) 84 | l4 = F.relu(self.bn4(self.cn4(l3))) 85 | # merge of output from layer 3 and 4 86 | l5 = self.mg1(l3, l4) 87 | # face feature layer 88 | l6 = F.relu(self.fc1(l5)) 89 | # divide and encode 90 | codes = self.de1(l6) 91 | scores = self.fc2(codes) 92 | return codes, scores 93 | 94 | class Merge(nn.Module): 95 | ''' 96 | Implementation of the Merged Layer in, 97 | 98 | Discriminative Deep Hashing for Scalable Face Image Retrieval 99 | https://www.ijcai.org/proceedings/2017/0315.pdf 100 | ''' 101 | def __init__(self): 102 | super().__init__() 103 | 104 | def forward(self, X1, X2): 105 | X1, X2 = self._flatten(X1), self._flatten(X2) 106 | return self._merge(X1, X2) 107 | 108 | def _flatten(self, X): 109 | N = X.shape[0] 110 | return X.view(N, -1) 111 | 112 | def _merge(self, X1, X2): 113 | return torch.cat((X1, X2), 1) 114 | 115 | class DivideEncode(nn.Module): 116 | ''' 117 | Implementation of the divide-and-encode module in, 118 | 119 | Simultaneous Feature Learning and Hash Coding with Deep Neural Networks 120 | https://arxiv.org/pdf/1504.03410.pdf 121 | ''' 122 | def __init__(self, num_inputs, num_per_group): 123 | super().__init__() 124 | assert num_inputs % num_per_group == 0, \ 125 | "num_per_group should be divisible by num_inputs." 126 | self.num_groups = num_inputs // num_per_group 127 | self.num_per_group = num_per_group 128 | weights_dim = (self.num_groups, self.num_per_group) 129 | self.weights = nn.Parameter(torch.empty(weights_dim)) 130 | nn.init.xavier_normal_(self.weights) 131 | 132 | def forward(self, X): 133 | X = X.view((-1, self.num_groups, self.num_per_group)) 134 | return X.mul(self.weights).sum(2) 135 | 136 | # ========================== 137 | # Hyperparameters 138 | # ========================== 139 | 140 | # number of epochs to train 141 | NUM_EPOCHS = 60 142 | # the number of hash bits in the output 143 | HASH_DIM = 48 144 | # the distance to use for calculating precision/recall 145 | HAMM_RADIUS = 2 146 | # top_k closet images to score for mean average precision 147 | TOP_K = 50 148 | # optimizer parameters 149 | OPTIM_PARAMS = { 150 | "lr": 1e-2, 151 | "weight_decay":2e-4 152 | } 153 | CUSTOM_PARAMS = { 154 | "beta": 1.0, # quantization loss regularizer 155 | "img_size": 128 156 | } 157 | BATCH_SIZE = { 158 | "train": 256, 159 | "gallery": 128, 160 | "val": 256, 161 | "test": 256 162 | } 163 | LOADER_PARAMS = { 164 | "num_workers": multiprocessing.cpu_count() - 2, 165 | # "num_workers": 1 166 | } 167 | 168 | # ========================== 169 | # Setup 170 | # ========================== 171 | 172 | # uncomment to reset the data 173 | # undo_create_set("val") 174 | # undo_create_set("test") 175 | # create_set("val") 176 | # create_set("test") 177 | 178 | TRANSFORMS = [ 179 | T.Resize((CUSTOM_PARAMS['img_size'], CUSTOM_PARAMS['img_size'])), 180 | T.ToTensor() 181 | ] 182 | 183 | data_train = FaceScrubDataset(type="label", 184 | mode="train", 185 | transform=TRANSFORMS, 186 | hash_dim=HASH_DIM) 187 | 188 | data_val = FaceScrubDataset(type="label", 189 | mode="val", 190 | transform=TRANSFORMS, 191 | hash_dim=HASH_DIM) 192 | 193 | data_test = FaceScrubDataset(type="label", 194 | mode="test", 195 | transform=TRANSFORMS, 196 | hash_dim=HASH_DIM) 197 | 198 | # for training use, shuffling 199 | loader_train = DataLoader(data_train, 200 | batch_size=BATCH_SIZE["train"], 201 | shuffle=True, 202 | **LOADER_PARAMS) 203 | 204 | # for use as gallery, no shuffling 205 | loader_gallery = DataLoader(data_train, 206 | batch_size=BATCH_SIZE["gallery"], 207 | shuffle=False, 208 | **LOADER_PARAMS) 209 | 210 | loader_val = DataLoader(data_val, 211 | batch_size=BATCH_SIZE["val"], 212 | shuffle=False, 213 | **LOADER_PARAMS) 214 | loader_test = DataLoader(data_test, 215 | batch_size=BATCH_SIZE["test"], 216 | shuffle=False, 217 | **LOADER_PARAMS) 218 | 219 | model_class = DDH 220 | model = model_class(hash_dim=HASH_DIM) 221 | optimizer = optim.Adam(model.parameters(), **OPTIM_PARAMS) 222 | 223 | def train(model, loader, optim, logger, **kwargs): 224 | ''' 225 | Train for one epoch. 226 | ''' 227 | device = kwargs.get("device", torch.device("cpu")) 228 | print_iter = kwargs.get("print_iter", 40) 229 | 230 | model.to(device=device) 231 | # set model to train mode 232 | model.train() 233 | quant_losses = [] 234 | score_losses = [] 235 | 236 | for num_iter, (X, y) in enumerate(loader): 237 | optim.zero_grad() 238 | 239 | X = X.to(device).float() 240 | y = y.to(device).long() 241 | codes, scores = model(X) 242 | # quantization loss 243 | quant_loss = CUSTOM_PARAMS['beta'] * (codes.abs() - 1).abs().mean() 244 | # score error 245 | score_loss = F.cross_entropy(scores, y) 246 | # total loss 247 | loss = quant_loss + score_loss 248 | loss.backward() 249 | # apply gradient 250 | optim.step() 251 | # save the lossses 252 | quant_losses.append(quant_loss.item()) 253 | score_losses.append(score_loss.item()) 254 | 255 | if (num_iter+1) % print_iter == 0: 256 | logger.write( 257 | "iter {} ".format(num_iter+1) + 258 | "- quant loss: {:.8f}, score loss: {:.8f}" 259 | .format(quant_loss, score_loss)) 260 | 261 | return sum(quant_losses)/len(quant_losses), \ 262 | sum(score_losses)/len(score_losses) 263 | 264 | 265 | if __name__ == "__main__": 266 | # visualize the images 267 | # img = data_train[100][0].transpose(0, 1).transpose(1, 2) 268 | # plt.imshow(img) 269 | # plt.show() 270 | pass 271 | -------------------------------------------------------------------------------- /ddh2.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.optim as optim 4 | import torch.nn.functional as F 5 | import torchvision.transforms as T 6 | import multiprocessing 7 | from time import time 8 | from torch.utils.data import DataLoader 9 | from matplotlib import pyplot as plt 10 | 11 | from dataset import * 12 | 13 | class DDH2(nn.Module): 14 | def __init__(self, hash_dim=48, split_num=40, num_classes=530): 15 | super().__init__() 16 | self.cn1 = nn.Conv2d(3, 32, kernel_size=3) 17 | nn.init.kaiming_normal_(self.cn1.weight) 18 | self.bn1 = nn.BatchNorm2d(32) 19 | self.mp1 = nn.MaxPool2d(2) 20 | 21 | self.cn2 = nn.Conv2d(32, 64, kernel_size=2) 22 | nn.init.kaiming_normal_(self.cn2.weight) 23 | self.bn2 = nn.BatchNorm2d(64) 24 | self.mp2 = nn.MaxPool2d(2) 25 | 26 | self.cn3 = nn.Conv2d(64, 128, kernel_size=2) 27 | nn.init.kaiming_normal_(self.cn3.weight) 28 | self.bn3 = nn.BatchNorm2d(128) 29 | self.mp3 = nn.MaxPool2d(2) 30 | 31 | self.cn4 = nn.Conv2d(128, 256, kernel_size=2) 32 | nn.init.kaiming_normal_(self.cn4.weight) 33 | self.bn4 = nn.BatchNorm2d(256) 34 | 35 | # merge layer 36 | self.mg1 = Merge() 37 | self.fc1 = nn.Linear(78976, hash_dim*split_num) 38 | 39 | # hash layer 40 | self.de1 = DivideEncode(hash_dim*split_num, split_num) 41 | 42 | self.fc2 = nn.Linear(hash_dim, num_classes) 43 | 44 | def forward(self, X): 45 | l1 = self.mp1(F.relu(self.bn1(self.cn1(X)))) 46 | l2 = self.mp2(F.relu(self.bn2(self.cn2(l1)))) 47 | l3 = self.mp3(F.relu(self.bn3(self.cn3(l2)))) 48 | l4 = F.relu(self.bn4(self.cn4(l3))) 49 | # merge of output from layer 3 and 4 50 | l5 = self.mg1(l3, l4) 51 | # face feature layer 52 | l6 = F.relu(self.fc1(l5)) 53 | # divide and encode 54 | codes = self.de1(l6) 55 | scores = self.fc2(codes) 56 | return codes, scores 57 | 58 | class Merge(nn.Module): 59 | ''' 60 | Implementation of the Merged Layer in, 61 | 62 | Discriminative Deep Hashing for Scalable Face Image Retrieval 63 | https://www.ijcai.org/proceedings/2017/0315.pdf 64 | ''' 65 | def __init__(self): 66 | super().__init__() 67 | 68 | def forward(self, X1, X2): 69 | X1, X2 = self._flatten(X1), self._flatten(X2) 70 | return self._merge(X1, X2) 71 | 72 | def _flatten(self, X): 73 | N = X.shape[0] 74 | return X.view(N, -1) 75 | 76 | def _merge(self, X1, X2): 77 | return torch.cat((X1, X2), 1) 78 | 79 | class DivideEncode(nn.Module): 80 | ''' 81 | Implementation of the divide-and-encode module in, 82 | 83 | Simultaneous Feature Learning and Hash Coding with Deep Neural Networks 84 | https://arxiv.org/pdf/1504.03410.pdf 85 | ''' 86 | def __init__(self, num_inputs, num_per_group): 87 | super().__init__() 88 | assert num_inputs % num_per_group == 0, \ 89 | "num_per_group should be divisible by num_inputs." 90 | self.num_groups = num_inputs // num_per_group 91 | self.num_per_group = num_per_group 92 | weights_dim = (self.num_groups, self.num_per_group) 93 | self.weights = nn.Parameter(torch.empty(weights_dim)) 94 | nn.init.xavier_normal_(self.weights) 95 | 96 | def forward(self, X): 97 | X = X.view((-1, self.num_groups, self.num_per_group)) 98 | return X.mul(self.weights).sum(2) 99 | 100 | # ========================== 101 | # Hyperparameters 102 | # ========================== 103 | 104 | # number of epochs to train 105 | NUM_EPOCHS = 40 106 | # the number of hash bits in the output 107 | HASH_DIM = 48 108 | # the distance to use for calculating precision/recall 109 | HAMM_RADIUS = 2 110 | # top_k closet images to score for mean average precision 111 | TOP_K = 50 112 | # optimizer parameters 113 | OPTIM_PARAMS = { 114 | "lr": 1e-2, 115 | "weight_decay": 2e-4 116 | } 117 | CUSTOM_PARAMS = { 118 | "beta": 1.0, # quantization loss regularizer 119 | "img_size": 128 120 | } 121 | BATCH_SIZE = { 122 | "train": 64, 123 | "gallery": 64, 124 | "val": 64, 125 | "test": 64 126 | } 127 | LOADER_PARAMS = { 128 | "num_workers": multiprocessing.cpu_count() - 2 129 | } 130 | 131 | # ========================== 132 | # Setup 133 | # ========================== 134 | 135 | # uncomment to reset the data 136 | # undo_create_set("val") 137 | # undo_create_set("test") 138 | # create_set("val") 139 | # create_set("test") 140 | 141 | TRANSFORMS = [ 142 | T.Resize((CUSTOM_PARAMS['img_size'], CUSTOM_PARAMS['img_size'])), 143 | T.ToTensor() 144 | ] 145 | 146 | data_train = FaceScrubDataset(type="label", 147 | mode="train", 148 | transform=TRANSFORMS, 149 | hash_dim=HASH_DIM) 150 | 151 | data_val = FaceScrubDataset(type="label", 152 | mode="val", 153 | transform=TRANSFORMS, 154 | hash_dim=HASH_DIM) 155 | 156 | data_test = FaceScrubDataset(type="label", 157 | mode="test", 158 | transform=TRANSFORMS, 159 | hash_dim=HASH_DIM) 160 | 161 | # for training use, shuffling 162 | loader_train = DataLoader(data_train, 163 | batch_size=BATCH_SIZE["train"], 164 | shuffle=True, 165 | **LOADER_PARAMS) 166 | 167 | # for use as gallery, no shuffling 168 | loader_gallery = DataLoader(data_train, 169 | batch_size=BATCH_SIZE["gallery"], 170 | shuffle=False, 171 | **LOADER_PARAMS) 172 | 173 | loader_val = DataLoader(data_val, 174 | batch_size=BATCH_SIZE["val"], 175 | shuffle=False, 176 | **LOADER_PARAMS) 177 | loader_test = DataLoader(data_test, 178 | batch_size=BATCH_SIZE["test"], 179 | shuffle=False, 180 | **LOADER_PARAMS) 181 | 182 | model_class = DDH2 183 | model = model_class(hash_dim=HASH_DIM) 184 | optimizer = optim.Adam(model.parameters(), **OPTIM_PARAMS) 185 | 186 | 187 | def train(model, loader, optim, logger, **kwargs): 188 | ''' 189 | Train for one epoch. 190 | ''' 191 | device = kwargs.get("device", torch.device("cpu")) 192 | print_iter = kwargs.get("print_iter", 40) 193 | 194 | model.to(device=device) 195 | # set model to train mode 196 | model.train() 197 | quant_losses = [] 198 | score_losses = [] 199 | 200 | for num_iter, (X, y) in enumerate(loader): 201 | optim.zero_grad() 202 | 203 | X = X.to(device).float() 204 | y = y.to(device).long() 205 | codes, scores = model(X) 206 | # quantization loss 207 | quant_loss = CUSTOM_PARAMS['beta'] * (codes.abs() - 1).abs().mean() 208 | # score error 209 | score_loss = F.cross_entropy(scores, y) 210 | # total loss 211 | loss = quant_loss + score_loss 212 | loss.backward() 213 | # apply gradient 214 | optim.step() 215 | # save the lossses 216 | quant_losses.append(quant_loss.item()) 217 | score_losses.append(score_loss.item()) 218 | 219 | if (num_iter+1) % print_iter == 0: 220 | logger.write( 221 | "iter {} ".format(num_iter+1) + 222 | "- quant loss: {:.8f}, score loss: {:.8f}" 223 | .format(quant_loss, score_loss)) 224 | 225 | return sum(quant_losses)/len(quant_losses), \ 226 | sum(score_losses)/len(score_losses) 227 | -------------------------------------------------------------------------------- /ddh3.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.optim as optim 4 | import torch.nn.functional as F 5 | import torchvision.transforms as T 6 | import multiprocessing 7 | from time import time 8 | from torch.utils.data import DataLoader 9 | from matplotlib import pyplot as plt 10 | 11 | from dataset import * 12 | 13 | class DDH3(nn.Module): 14 | ''' 15 | # ========================================================================== 16 | # Discriminative Deep Hashing for Scalable Face Image Retrieval 17 | # https://www.ijcai.org/proceedings/2017/0315.pdf 18 | # ========================================================================== 19 | 20 | Image resized to 32x32, batch size of 256 21 | 22 | Conv1 = 3x3 kernel, 1 stride, 20 dim (output 31x31) 23 | Batch 24 | Pool1 = 2x2 kernel (output 15x15) 25 | 26 | Conv2 = 2x2 kernel, 1 stride, 40 dim (output 14x14) 27 | Batch 28 | Pool2 = 2x2 kernel (output 7x7) 29 | 30 | Conv3 = 2x2 kernel, 1 stride, 60 dim (output 6x6) 31 | Batch 32 | Pool3 = 2x2 kernel (output 3x3) 33 | 34 | Conv4 = 2x2 kernel, 1 stride, 80 dim (output 2x2) 35 | Batch 36 | 37 | Merge = 60*3*3 + 80*2*2 = 860 38 | 39 | Split into K groups, let K = 96 40 | 41 | 480 face features 42 | 48 groups of 10 features 43 | 48-bits 44 | 45 | # ========================================================================== 46 | # Simultaneous Feature Learning and Hash Coding with Deep Neural Networks 47 | # https://arxiv.org/pdf/1504.03410.pdf 48 | # ========================================================================== 49 | ''' 50 | def __init__(self, hash_dim=48, split_num=10): 51 | super().__init__() 52 | self.cn1 = nn.Conv2d(3, 20, kernel_size=3) 53 | nn.init.kaiming_normal_(self.cn1.weight) 54 | self.bn1 = nn.BatchNorm2d(20) 55 | self.mp1 = nn.MaxPool2d(2) 56 | 57 | self.cn2 = nn.Conv2d(20, 40, kernel_size=2) 58 | nn.init.kaiming_normal_(self.cn2.weight) 59 | self.bn2 = nn.BatchNorm2d(40) 60 | self.mp2 = nn.MaxPool2d(2) 61 | 62 | self.cn3 = nn.Conv2d(40, 60, kernel_size=2) 63 | nn.init.kaiming_normal_(self.cn3.weight) 64 | self.bn3 = nn.BatchNorm2d(60) 65 | self.mp3 = nn.MaxPool2d(2) 66 | 67 | self.cn4 = nn.Conv2d(60, 80, kernel_size=2) 68 | nn.init.kaiming_normal_(self.cn4.weight) 69 | self.bn4 = nn.BatchNorm2d(80) 70 | 71 | # merge layer 72 | self.mg1 = Merge() 73 | self.fc1 = nn.Linear(29180, hash_dim*split_num) 74 | nn.init.kaiming_normal_(self.fc1.weight) 75 | self.bn5 = nn.BatchNorm2d(hash_dim*split_num) 76 | 77 | # hash layer 78 | self.fc2 = nn.Linear(hash_dim*split_num, hash_dim) 79 | 80 | def forward(self, X): 81 | l1 = self.mp1(F.relu(self.bn1(self.cn1(X)))) 82 | l2 = self.mp2(F.relu(self.bn2(self.cn2(l1)))) 83 | l3 = self.mp3(F.relu(self.bn3(self.cn3(l2)))) 84 | l4 = F.relu(self.bn4(self.cn4(l3))) 85 | # merge of output from layer 3 and 4 86 | l5 = self.mg1(l3, l4) 87 | # face feature layer 88 | l6 = F.relu(self.fc1(l5)) 89 | # divide and encode 90 | codes = self.fc2(l6) 91 | return torch.tanh(codes), None 92 | 93 | class Merge(nn.Module): 94 | ''' 95 | Implementation of the Merged Layer in, 96 | 97 | Discriminative Deep Hashing for Scalable Face Image Retrieval 98 | https://www.ijcai.org/proceedings/2017/0315.pdf 99 | ''' 100 | def __init__(self): 101 | super(Merge, self).__init__() 102 | 103 | def forward(self, X1, X2): 104 | X1, X2 = self._flatten(X1), self._flatten(X2) 105 | return self._merge(X1, X2) 106 | 107 | def _flatten(self, X): 108 | N = X.shape[0] 109 | return X.view(N, -1) 110 | 111 | def _merge(self, X1, X2): 112 | return torch.cat((X1, X2), 1) 113 | 114 | # ========================== 115 | # Hyperparameters 116 | # ========================== 117 | 118 | # number of epochs to train 119 | NUM_EPOCHS = 40 120 | # the number of hash bits in the output 121 | HASH_DIM = 48 122 | # the distance to use for calculating precision/recall 123 | HAMM_RADIUS = 2 124 | # top_k closet images to score for mean average precision 125 | TOP_K = 50 126 | # optimizer parameters 127 | OPTIM_PARAMS = { 128 | "lr": 1e-2, 129 | "weight_decay": 2e-4 130 | } 131 | CUSTOM_PARAMS = { 132 | "dist_threshold": 6, # distance threshold 133 | "alpha": 1e-10, # quantization error 134 | "print_iter": 1, # print every n iterations 135 | "eps": 1e-8, # term added to l2_distance 136 | "gamma": 1e-3, # negative slope when calculating threshold 137 | "img_size": 128 138 | } 139 | BATCH_SIZE = { 140 | # "train": 512, 141 | "train": 32, 142 | "gallery": 128, 143 | "val": 512, 144 | "test": 512 145 | } 146 | LOADER_PARAMS = { 147 | "num_workers": multiprocessing.cpu_count() - 2, 148 | # "num_workers": 1 149 | } 150 | 151 | # ========================== 152 | # Setup 153 | # ========================== 154 | 155 | # uncomment to reset the data 156 | # undo_create_set("val") 157 | # undo_create_set("test") 158 | # create_set("val") 159 | # create_set("test") 160 | 161 | TRANSFORMS = [ 162 | T.Resize((CUSTOM_PARAMS['img_size'], CUSTOM_PARAMS['img_size'])), 163 | T.ToTensor() 164 | ] 165 | 166 | data_train = FaceScrubDataset(type="label", 167 | mode="train", 168 | transform=TRANSFORMS, 169 | hash_dim=HASH_DIM) 170 | 171 | data_val = FaceScrubDataset(type="label", 172 | mode="val", 173 | transform=TRANSFORMS, 174 | hash_dim=HASH_DIM) 175 | 176 | data_test = FaceScrubDataset(type="label", 177 | mode="test", 178 | transform=TRANSFORMS, 179 | hash_dim=HASH_DIM) 180 | 181 | # for training use, shuffling 182 | loader_train = DataLoader(data_train, 183 | batch_size=BATCH_SIZE["train"], 184 | shuffle=True, 185 | **LOADER_PARAMS) 186 | 187 | # for use as gallery, no shuffling 188 | loader_gallery = DataLoader(data_train, 189 | batch_size=BATCH_SIZE["gallery"], 190 | shuffle=False, 191 | **LOADER_PARAMS) 192 | 193 | loader_val = DataLoader(data_val, 194 | batch_size=BATCH_SIZE["val"], 195 | shuffle=False, 196 | **LOADER_PARAMS) 197 | loader_test = DataLoader(data_test, 198 | batch_size=BATCH_SIZE["test"], 199 | shuffle=False, 200 | **LOADER_PARAMS) 201 | 202 | model_class = DDH3 203 | model = model_class(hash_dim=HASH_DIM) 204 | optimizer = optim.Adam(model.parameters(), **OPTIM_PARAMS) 205 | 206 | def train(model, loader, optim, logger, **kwargs): 207 | ''' 208 | Train for one epoch. 209 | ''' 210 | device = kwargs.get("device", torch.device("cpu")) 211 | print_iter = kwargs.get("print_iter", 40) 212 | # the distance threshold above which the dissimilar pairs will contribute 0 213 | # to loss. 214 | mu = kwargs.get("dist_threshold", 2) 215 | # quantization loss regularizer 216 | alpha = kwargs.get("alpha", 0.01) 217 | 218 | model.to(device=device) 219 | # set model to train mode 220 | model.train() 221 | 222 | for num_iter, (X, y) in enumerate(loader): 223 | optim.zero_grad() 224 | 225 | half_size = BATCH_SIZE["train"] // 2 226 | half_size = len(X) // 2 if len(X) < half_size else half_size 227 | X1 = X[:half_size].float().to(device=device) 228 | X2 = X[half_size:].float().to(device=device) 229 | y1 = y[:half_size].long().to(device=device) 230 | y2 = y[half_size:].long().to(device=device) 231 | with torch.no_grad(): 232 | if len(X2) > len(X1): 233 | # get rid of the last row 234 | X2 = X2[:-1] 235 | y2 = y2[:-1] 236 | 237 | # figure out the ground truth table 238 | y1_gt = y1[None, :].repeat(half_size, 1) 239 | y2_gt = y2[:, None].repeat(1, half_size) 240 | # 1 for similar pairs, 0 for dissimilar pairs 241 | sim_gt = (y1_gt == y2_gt).float() 242 | dissim_gt = (1 - sim_gt) 243 | 244 | C1, _ = model(X1) 245 | C2, _ = model(X2) 246 | 247 | l2_dist = ((C1[:, None, :] - C2) ** 2 + 1e-8).sum(dim=2).sqrt() 248 | # minimize l2_dist for similar pairs (gt at i, j == 1) 249 | similar_loss = (sim_gt * l2_dist).sum() 250 | similar_loss /= (sim_gt + 1).sum() 251 | # maximize l2_dist for dissimilar pairs 252 | threshold = F.leaky_relu(mu - l2_dist, 253 | negative_slope=CUSTOM_PARAMS['gamma']) 254 | dissimilar_loss = ((1 - sim_gt) * threshold).sum() 255 | dissimilar_loss /= (dissim_gt.sum() + 1) 256 | # similarity loss 257 | sim_loss = similar_loss + dissimilar_loss 258 | # quantization loss 259 | quant_loss = alpha * \ 260 | ((C1.abs() - 1).abs() + ((C2.abs() - 1)).abs()).sum() 261 | set_trace() 262 | # total_loss 263 | loss = sim_loss + quant_loss 264 | # back-propagate 265 | loss.backward() 266 | # apply gradient 267 | optim.step() 268 | 269 | if (num_iter+1) % print_iter == 0: 270 | logger.write( 271 | "iter {}/{} ".format(num_iter+1, len(loader)) + 272 | "- quant loss: {:.4f}, sim loss: {:.4f}, dissim loss: {:.4f}" 273 | .format(quant_loss.item(), similar_loss.item(), 274 | dissimilar_loss.item())) 275 | -------------------------------------------------------------------------------- /ddh4.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.optim as optim 4 | import torch.nn.functional as F 5 | import torchvision.transforms as T 6 | import multiprocessing 7 | from time import time 8 | from torch.utils.data import DataLoader 9 | from matplotlib import pyplot as plt 10 | 11 | from dataset import * 12 | 13 | class DDH4(nn.Module): 14 | ''' 15 | # ========================================================================== 16 | # Discriminative Deep Hashing for Scalable Face Image Retrieval 17 | # https://www.ijcai.org/proceedings/2017/0315.pdf 18 | # ========================================================================== 19 | 20 | Introduced distance loss metrics. 21 | 22 | Conv1 = 3x3 kernel, 1 stride, 20 dim (output 31x31) 23 | Batch 24 | Pool1 = 2x2 kernel (output 15x15) 25 | 26 | Conv2 = 2x2 kernel, 1 stride, 40 dim (output 14x14) 27 | Batch 28 | Pool2 = 2x2 kernel (output 7x7) 29 | 30 | Conv3 = 2x2 kernel, 1 stride, 60 dim (output 6x6) 31 | Batch 32 | Pool3 = 2x2 kernel (output 3x3) 33 | 34 | Conv4 = 2x2 kernel, 1 stride, 80 dim (output 2x2) 35 | Batch 36 | 37 | Merge = 60*3*3 + 80*2*2 = 860 38 | 39 | Split into K groups, let K = 96 40 | 41 | 480 face features 42 | 48 groups of 10 features 43 | 48-bits 44 | 45 | # ========================================================================== 46 | # Simultaneous Feature Learning and Hash Coding with Deep Neural Networks 47 | # https://arxiv.org/pdf/1504.03410.pdf 48 | # ========================================================================== 49 | ''' 50 | def __init__(self, hash_dim=48, split_num=10, num_classes=530): 51 | super().__init__() 52 | self.cn1 = nn.Conv2d(3, 20, kernel_size=3) 53 | nn.init.kaiming_normal_(self.cn1.weight) 54 | self.bn1 = nn.BatchNorm2d(20) 55 | self.mp1 = nn.MaxPool2d(2) 56 | 57 | self.cn2 = nn.Conv2d(20, 40, kernel_size=2) 58 | nn.init.kaiming_normal_(self.cn2.weight) 59 | self.bn2 = nn.BatchNorm2d(40) 60 | self.mp2 = nn.MaxPool2d(2) 61 | 62 | self.cn3 = nn.Conv2d(40, 60, kernel_size=2) 63 | nn.init.kaiming_normal_(self.cn3.weight) 64 | self.bn3 = nn.BatchNorm2d(60) 65 | self.mp3 = nn.MaxPool2d(2) 66 | 67 | self.cn4 = nn.Conv2d(60, 80, kernel_size=2) 68 | nn.init.kaiming_normal_(self.cn4.weight) 69 | self.bn4 = nn.BatchNorm2d(80) 70 | 71 | # merge layer 72 | self.mg1 = Merge() 73 | self.fc1 = nn.Linear(29180, hash_dim*split_num) 74 | 75 | # hash layer 76 | self.de1 = DivideEncode(hash_dim*split_num, split_num) 77 | 78 | self.fc2 = nn.Linear(hash_dim, num_classes) 79 | 80 | def forward(self, X): 81 | l1 = self.mp1(F.relu(self.bn1(self.cn1(X)))) 82 | l2 = self.mp2(F.relu(self.bn2(self.cn2(l1)))) 83 | l3 = self.mp3(F.relu(self.bn3(self.cn3(l2)))) 84 | l4 = F.relu(self.bn4(self.cn4(l3))) 85 | # merge of output from layer 3 and 4 86 | l5 = self.mg1(l3, l4) 87 | # face feature layer 88 | l6 = F.relu(self.fc1(l5)) 89 | # divide and encode 90 | codes = self.de1(l6) 91 | scores = self.fc2(codes) 92 | return codes, scores 93 | 94 | class Merge(nn.Module): 95 | ''' 96 | Implementation of the Merged Layer in, 97 | 98 | Discriminative Deep Hashing for Scalable Face Image Retrieval 99 | https://www.ijcai.org/proceedings/2017/0315.pdf 100 | ''' 101 | def __init__(self): 102 | super().__init__() 103 | 104 | def forward(self, X1, X2): 105 | X1, X2 = self._flatten(X1), self._flatten(X2) 106 | return self._merge(X1, X2) 107 | 108 | def _flatten(self, X): 109 | N = X.shape[0] 110 | return X.view(N, -1) 111 | 112 | def _merge(self, X1, X2): 113 | return torch.cat((X1, X2), 1) 114 | 115 | class DivideEncode(nn.Module): 116 | ''' 117 | Implementation of the divide-and-encode module in, 118 | 119 | Simultaneous Feature Learning and Hash Coding with Deep Neural Networks 120 | https://arxiv.org/pdf/1504.03410.pdf 121 | ''' 122 | def __init__(self, num_inputs, num_per_group): 123 | super().__init__() 124 | assert num_inputs % num_per_group == 0, \ 125 | "num_per_group should be divisible by num_inputs." 126 | self.num_groups = num_inputs // num_per_group 127 | self.num_per_group = num_per_group 128 | weights_dim = (self.num_groups, self.num_per_group) 129 | self.weights = nn.Parameter(torch.empty(weights_dim)) 130 | nn.init.xavier_normal_(self.weights) 131 | 132 | def forward(self, X): 133 | X = X.view((-1, self.num_groups, self.num_per_group)) 134 | return X.mul(self.weights).sum(2) 135 | 136 | # ========================== 137 | # Hyperparameters 138 | # ========================== 139 | 140 | # number of epochs to train 141 | NUM_EPOCHS = 60 142 | # the number of hash bits in the output 143 | HASH_DIM = 48 144 | # the distance to use for calculating precision/recall 145 | HAMM_RADIUS = 2 146 | # top_k closet images to score for mean average precision 147 | TOP_K = 50 148 | # optimizer parameters 149 | OPTIM_PARAMS = { 150 | "lr": 1e-2, 151 | "weight_decay":2e-4 152 | } 153 | CUSTOM_PARAMS = { 154 | "alpha": 1.0, # quantization loss regularizer 155 | "beta": 1.0, # score loss regularizer 156 | "gamma": 1.0, # distance loss regularizer 157 | "mu": 6, # threshold for distance contribution to loss 158 | "print_iter": 40, # print every n iterations 159 | "img_size": 128 160 | } 161 | BATCH_SIZE = { 162 | "train": 256, 163 | "gallery": 256, 164 | "val": 256, 165 | "test": 256 166 | } 167 | LOADER_PARAMS = { 168 | # "num_workers": 4, 169 | "num_workers": multiprocessing.cpu_count() - 1, 170 | "collate_fn": invalid_collate 171 | } 172 | 173 | # ========================== 174 | # Setup 175 | # ========================== 176 | 177 | # uncomment to reset the data 178 | # undo_create_set("val") 179 | # undo_create_set("test") 180 | # create_set("val") 181 | # create_set("test") 182 | TRANSFORMS = [ 183 | T.Resize((CUSTOM_PARAMS['img_size'], CUSTOM_PARAMS['img_size'])), 184 | T.ToTensor() 185 | ] 186 | 187 | DATASET_PARAMS = { 188 | "align": True, 189 | "type": "label", 190 | "transform": TRANSFORMS, 191 | "hash_dim": HASH_DIM 192 | } 193 | 194 | data_train = FaceScrubDataset(mode="train", 195 | **DATASET_PARAMS) 196 | 197 | data_val = FaceScrubDataset(mode="val", 198 | **DATASET_PARAMS) 199 | 200 | data_test = FaceScrubDataset(mode="test", 201 | **DATASET_PARAMS) 202 | 203 | # for training use, shuffling 204 | loader_train = DataLoader(data_train, 205 | batch_size=BATCH_SIZE["train"], 206 | shuffle=True, 207 | **LOADER_PARAMS) 208 | 209 | # for use as gallery, no shuffling 210 | loader_gallery = DataLoader(data_train, 211 | batch_size=BATCH_SIZE["gallery"], 212 | shuffle=False, 213 | **LOADER_PARAMS) 214 | 215 | loader_val = DataLoader(data_val, 216 | batch_size=BATCH_SIZE["val"], 217 | shuffle=False, 218 | **LOADER_PARAMS) 219 | loader_test = DataLoader(data_test, 220 | batch_size=BATCH_SIZE["test"], 221 | shuffle=False, 222 | **LOADER_PARAMS) 223 | 224 | model_class = DDH4 225 | model = model_class(hash_dim=HASH_DIM) 226 | optimizer = optim.Adam(model.parameters(), **OPTIM_PARAMS) 227 | 228 | def train(model, loader, optim, logger, **kwargs): 229 | ''' 230 | Train for one epoch. 231 | ''' 232 | device = kwargs.get("device", torch.device("cpu")) 233 | print_iter = kwargs.get("print_iter", CUSTOM_PARAMS['print_iter']) 234 | 235 | model.to(device=device) 236 | # set model to train mode 237 | model.train() 238 | quant_losses = [] 239 | score_losses = [] 240 | 241 | for num_iter, (X, y) in enumerate(loader): 242 | optim.zero_grad() 243 | 244 | X = X.to(device).float() 245 | y = y.to(device).long() 246 | codes, scores = model(X) 247 | 248 | with torch.no_grad(): 249 | half_size = len(X) // 2 250 | 251 | C1, C2 = codes[:half_size], codes[half_size:] 252 | y1, y2 = y[:half_size][None, :], y[half_size:][:, None] 253 | y1, y2 = y1.repeat(half_size, 1), y2.repeat(1, half_size) 254 | 255 | with torch.no_grad(): 256 | if len(C2) > len(C1): 257 | C2, y2 = C2[:-1], y2[:-1] 258 | 259 | sim_gt = (y1 == y2).float() 260 | diff_gt = 1 - sim_gt 261 | 262 | # distance loss 263 | l2_dist = ((C1[:, None, :] - C2) ** 2 + 1e-8).sum(dim=2).sqrt() 264 | sim_loss = (sim_gt * l2_dist).mean() 265 | threshold = torch.max(CUSTOM_PARAMS['mu'] - l2_dist, 266 | torch.zeros_like(l2_dist)) 267 | diff_loss = ((1 - sim_gt) * threshold).mean() 268 | dist_loss = 0.10 * sim_loss + 0.90 * diff_loss 269 | # quantization loss 270 | quant_loss = (codes.abs() - 1).abs().mean() 271 | # score error 272 | score_loss = F.cross_entropy(scores, y) 273 | # # total loss 274 | loss = CUSTOM_PARAMS['alpha'] * quant_loss + \ 275 | CUSTOM_PARAMS['beta'] * score_loss + \ 276 | CUSTOM_PARAMS['gamma'] * dist_loss 277 | loss.backward() 278 | # apply gradient 279 | optim.step() 280 | # save the lossses 281 | quant_losses.append(quant_loss.item()) 282 | score_losses.append(score_loss.item()) 283 | 284 | if (num_iter+1) % print_iter == 0: 285 | logger.write( 286 | "iter {} ".format(num_iter+1) + 287 | "- quant loss: {:.4f}, score loss: {:.4f}, sim loss: {:.4f}, diff loss: {:.4f}" 288 | .format(quant_loss.item(), score_loss.item(), sim_loss.item(), diff_loss.item())) 289 | -------------------------------------------------------------------------------- /eval_perf.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from hamming_dist import * 3 | from calc_map import * 4 | from calc_pre_rec import * 5 | 6 | def eval_perf(gallery_codes, gallery_label, test_codes, test_label, **kwargs): 7 | top_k = kwargs.get("top_k", 50) 8 | hamm_radius = kwargs.get("hamm_radius", 2) 9 | 10 | gallery_codes = gallery_codes.cpu().numpy() 11 | gallery_label = gallery_label.cpu().numpy() 12 | test_codes = test_codes.cpu().numpy() 13 | test_label = test_label.cpu().numpy() 14 | 15 | # how many matches between train and test 16 | label_match = (gallery_label == test_label.T).astype("int8") 17 | 18 | dist = hamming_dist(gallery_codes, test_codes) 19 | ranked = np.argsort(dist, axis=0) 20 | 21 | # mean average precision 22 | mean_ap = calc_map(label_match, ranked, top_k=top_k) 23 | 24 | # calculate precision and recall curve 25 | avg_pre, avg_rec, avg_hmean, pre_curve, rec_curve = \ 26 | calc_pre_rec(dist, label_match, hamm_radius) 27 | 28 | return avg_pre, avg_rec, avg_hmean, pre_curve, rec_curve, mean_ap 29 | -------------------------------------------------------------------------------- /gather.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import pickle 3 | from ddh2 import * 4 | from logger import * 5 | from predict import * 6 | from utils import * 7 | 8 | MODELS_PATH = "./saved_models" 9 | MODEL_NAME = "/12-08_16-14-22_BA977C.pt" 10 | CODES_PATH = "./codes" 11 | 12 | if torch.cuda.is_available(): 13 | device = torch.device("cuda") 14 | torch.set_default_tensor_type('torch.cuda.FloatTensor') 15 | else: 16 | device = torch.device("cpu") 17 | 18 | mkdir(CODES_PATH) 19 | 20 | if __name__ == "__main__": 21 | gallery = loader_gallery 22 | test = loader_test 23 | model = model_class(hash_dim=HASH_DIM) 24 | model.load_state_dict(torch.load(MODELS_PATH + MODEL_NAME)) 25 | 26 | with Logger(write_to_file=False) as logger: 27 | gallery_codes, gallery_label, test_codes, test_label = \ 28 | predict(model, gallery, test, logger, device=device) 29 | 30 | logger.write("Finished generating codes, writing to output...") 31 | output = (gallery_codes.cpu(), gallery_label.cpu(), 32 | test_codes.cpu(), test_label.cpu()) 33 | output_fn = MODEL_NAME.split(".")[0] + ".codes" 34 | with open(CODES_PATH + output_fn, "wb") as file: 35 | pickle.dump(output, file) 36 | -------------------------------------------------------------------------------- /hamming_dist.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from pdb import set_trace 3 | 4 | def hamming_dist(A, B): 5 | ''' 6 | Calculates the hamming distance between two arrays with binary values. 7 | 8 | Args: 9 | - A (numpy.ndarray): array of shape (N, C) where N is the number of 10 | samples and C is the number of binary codes. 11 | - B (numpy.ndarray): array of shape (M, C) where M is the number of 12 | samples and C is the number of binary codes. 13 | 14 | Returns: 15 | (numpy.ndarray): array of shape (N, M), where the ith N and jth M is 16 | the hamming distance between the ith A and jth B. 17 | ''' 18 | code_len = A.shape[1] 19 | A = (2 * A) - 1 20 | B = (2 * B) - 1 21 | dists = 0.5 * (code_len - A.dot(B.T)) 22 | return dists.astype("int") 23 | 24 | if __name__ == "__main__": 25 | A = np.array([[0,1,0],[1,1,0]]) 26 | B = np.array([[1,0,1],[1,1,1]]) 27 | # [0,1,0] ^ [1,0,1] => 3 28 | # [0,1,0] ^ [1,1,1] => 2 29 | # ... 30 | output = np.array([[3, 2],[2,1]]) 31 | num_correct = (hamming_dist(A, B) == output).sum() 32 | assert num_correct == output.size, "Invalid output!" 33 | -------------------------------------------------------------------------------- /logger.py: -------------------------------------------------------------------------------- 1 | import os 2 | from time import time, strftime 3 | from datetime import datetime 4 | from utils import * 5 | 6 | class Logger(object): 7 | ''' 8 | Convenience class for logging metrics to a file. 9 | ''' 10 | def __init__(self, **kwargs): 11 | self.print_to_stdout = kwargs.get("print_to_std", True) 12 | self._write_to_file = kwargs.get("write_to_file", False) 13 | current_dir = os.getcwd() 14 | file_name = kwargs.get("file_name", self._get_time()) 15 | self.output_path = kwargs.get("output_path", 16 | current_dir + "/logs/{}.txt" 17 | .format(file_name)) 18 | if self._write_to_file: 19 | mkdir(current_dir + "/logs") 20 | 21 | def __enter__(self): 22 | if self._write_to_file: 23 | self.file = open(self.output_path, "a+") # create and append 24 | self.file.write("{}: Logger initialized\n".format(self._get_time())) 25 | return self 26 | 27 | def __exit__(self, type, value, traceback): 28 | if self._write_to_file: 29 | self.file.write("{}: Logger closing\n\n\n".format(self._get_time())) 30 | self.file.close() 31 | 32 | def write(self, message): 33 | if self.print_to_stdout: print(message) 34 | if self._write_to_file: 35 | self.file.write("{}: {}\n".format(self._get_time(), message)) 36 | 37 | def _get_time(self): 38 | return datetime.now().strftime("%Y-%m-%d-%H:%M:%S") 39 | -------------------------------------------------------------------------------- /predict.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | 4 | def predict(model, loader_gallery, loader_test, logger, **kwargs): 5 | # moving model to CPU because GPU doesn't have enough memory 6 | device = kwargs.get("device", torch.device("cpu")) 7 | print_iter = kwargs.get("print_iter", 100) 8 | 9 | model.to(device=device) 10 | # set model to evaluation mode 11 | model.eval() 12 | 13 | # [gallery_codes, gallery_label, test_codes, test_label] 14 | data = [None] * 4 15 | 16 | with torch.no_grad(): 17 | # process the gallery images 18 | logger.write("Hashing {} gallery images..." 19 | .format(len(loader_gallery.dataset))) 20 | for idx, (X, y) in enumerate(loader_gallery): 21 | gcodes, _ = model(X.to(device=device)) 22 | 23 | if data[0] is None: 24 | data[0] = gcodes 25 | else: 26 | data[0] = torch.cat((data[0], gcodes)) 27 | 28 | if data[1] is None: 29 | data[1] = y 30 | else: 31 | data[1] = torch.cat((data[1], y)) 32 | 33 | if idx % print_iter == 0: 34 | logger.write("{}/{} gallery batches completed..." \ 35 | .format(idx, len(loader_gallery))) 36 | 37 | assert len(loader_gallery.dataset) == len(data[0]) 38 | assert len(loader_gallery.dataset) == len(data[1]) 39 | 40 | logger.write("Hashing test images and labels...") 41 | # process the test images 42 | for idx, (X, y) in enumerate(loader_test): 43 | tcodes, _ = model(X.to(device=device)) 44 | 45 | if data[2] is None: 46 | data[2] = tcodes 47 | else: 48 | data[2] = torch.cat((data[2], tcodes)) 49 | 50 | if data[3] is None: 51 | data[3] = y 52 | else: 53 | data[3] = torch.cat((data[3], y)) 54 | 55 | if idx % print_iter == 0: 56 | logger.write("{}/{} test batches completed..." \ 57 | .format(idx, len(loader_test))) 58 | 59 | gallery_codes, gallery_label, test_codes, test_label = data 60 | # activating with sign function 61 | bin_gallery_codes = gallery_codes > 0 62 | bin_test_codes = test_codes > 0 63 | 64 | # reshape labels so gallery and test match shape 65 | gallery_label = gallery_label.unsqueeze(1) 66 | test_label = test_label.unsqueeze(1) 67 | gallery_label = gallery_label.repeat(1, test_label.shape[0]) 68 | test_label = test_label.repeat(1, gallery_label.shape[0]) 69 | 70 | return bin_gallery_codes, gallery_label, bin_test_codes, test_label 71 | -------------------------------------------------------------------------------- /run.py: -------------------------------------------------------------------------------- 1 | import os 2 | import cv2 3 | import uuid 4 | import pickle 5 | from functools import reduce 6 | from time import time, strftime 7 | from datetime import datetime 8 | 9 | import torch 10 | import numpy as np 11 | import torchvision.transforms as T 12 | from torch.utils.data import DataLoader, Dataset, sampler 13 | from matplotlib import pyplot as plt 14 | 15 | from pdb import set_trace 16 | from logger import * 17 | from eval_perf import * 18 | from predict import * 19 | 20 | 21 | if torch.cuda.is_available(): 22 | device = torch.device("cuda") 23 | torch.set_default_tensor_type('torch.cuda.FloatTensor') 24 | else: 25 | device = torch.device("cpu") 26 | 27 | # ignore all of the "invalid value encountered in true_divide" errors 28 | np.seterr(divide='ignore', invalid='ignore') 29 | 30 | # ================================== 31 | # PARAMETERS 32 | # ================================== 33 | WRITE_TO_FILE = True 34 | 35 | # whether to load from an intermediate point. 36 | LOAD_SAVED = False 37 | # required 38 | SAVED_MODEL_PATH = "./saved_models/12-15_16-57-52_FADE57.pt" 39 | RUN_ID = None 40 | # optional 41 | SAVED_STATS_PATH = None 42 | 43 | # from ddh import * 44 | # from ddh2 import * 45 | # from ddh3 import * 46 | from ddh4 import * 47 | 48 | if LOAD_SAVED and SAVED_MODEL_PATH: 49 | print("Loading existing model...") 50 | model.load_state_dict(torch.load(SAVED_MODEL_PATH)) 51 | 52 | if SAVED_STATS_PATH: 53 | print("Loading existing stats...") 54 | with open(SAVED_STATS_PATH, "rb") as file: 55 | stats = pickle.load(file) 56 | 57 | if not LOAD_SAVED or not SAVED_STATS_PATH: 58 | print("Creating new stats...") 59 | stats = { 60 | "val_mean_aps": [], 61 | "val_avg_pre": [], 62 | "val_avg_rec": [], 63 | "val_avg_hmean": [], 64 | "highest_map": 0.0, 65 | 66 | "test_avg_pre": 0.0, 67 | "test_avg_rec": 0.0, 68 | "test_avg_hmean": 0.0, 69 | "test_mean_ap": 0.0, 70 | "test_pre_curve": None, 71 | "test_rec_curve": None, 72 | } 73 | 74 | if LOAD_SAVED and RUN_ID: 75 | run_id = RUN_ID 76 | else: 77 | run_id = uuid.uuid4().hex.upper()[0:6] 78 | 79 | now = datetime.now().strftime("%m-%d_%H-%M-%S") 80 | file_name = now + "_" + run_id 81 | # model checkpoint 82 | saved_models_path = os.getcwd() + "/saved_models" 83 | mkdir(saved_models_path) 84 | checkpoint_path = saved_models_path + "/{}.pt" \ 85 | .format(file_name) 86 | 87 | # stats collection 88 | stats_path = os.getcwd() + "/stats" 89 | mkdir(stats_path) 90 | stats_file_path = stats_path + "/{}.pickle".format(file_name) 91 | 92 | with Logger(write_to_file=WRITE_TO_FILE, file_name=file_name) as logger: 93 | logger.write( 94 | "Starting run {} for {} epochs with model {}, and following params" 95 | .format(run_id, NUM_EPOCHS, type(model).__name__)) 96 | logger.write("hash_dim: " + str(HASH_DIM)) 97 | logger.write(OPTIM_PARAMS) 98 | logger.write(CUSTOM_PARAMS) 99 | logger.write(BATCH_SIZE) 100 | logger.write(LOADER_PARAMS) 101 | logger.write(DATASET_PARAMS) 102 | logger.write("====== START ======") 103 | logger.write("") 104 | 105 | for epoch in range(NUM_EPOCHS): 106 | # ====================================================================== 107 | # TRAINING 108 | # ====================================================================== 109 | logger.write("Epoch {}/{}".format(epoch+1, NUM_EPOCHS)) 110 | logger.write("--------------") 111 | 112 | start = time() 113 | train(model, loader_train, optimizer, logger, 114 | device=device, 115 | **CUSTOM_PARAMS) 116 | logger.write("Training completed in {:.0f} seconds." 117 | .format(time() - start)) 118 | logger.write("") 119 | 120 | # ====================================================================== 121 | # validation 122 | # ====================================================================== 123 | start = time() 124 | # get all of the codes for gallery and test images 125 | gallery_codes, gallery_label, test_codes, test_label = \ 126 | predict(model, loader_gallery, loader_val, logger, device=device) 127 | # evaluate the performance 128 | avg_pre, avg_rec, avg_hmean, _, _, mean_ap = \ 129 | eval_perf(gallery_codes, gallery_label, test_codes, test_label, 130 | top_k=TOP_K, hamm_radius=HAMM_RADIUS) 131 | stats['val_mean_aps'].append(mean_ap) 132 | stats['val_avg_pre'].append(avg_pre) 133 | stats['val_avg_rec'].append(avg_rec) 134 | stats['val_avg_hmean'].append(avg_hmean) 135 | 136 | if mean_ap > stats["highest_map"]: 137 | logger.write( 138 | "Higher mean avg precision {:.8f}/{:.8f}, saving!" 139 | .format(stats["highest_map"], mean_ap)) 140 | # saves the state of this model 141 | torch.save(model.state_dict(), checkpoint_path) 142 | stats["highest_map"] = mean_ap 143 | 144 | logger.write("Validation completed in {:.0f} seconds." 145 | .format(time() - start)) 146 | 147 | logger.write("val MAP: {:.8f}, ".format(mean_ap) + 148 | "avg precision: {:.6f}, ".format(avg_pre) + 149 | "avg recall: {:.6f}, ".format(avg_rec) + 150 | "avg harmonic mean: {:0.6f}".format(avg_hmean)) 151 | logger.write("") 152 | 153 | # ========================================================================== 154 | # test 155 | # ========================================================================== 156 | best_model = model_class(hash_dim=HASH_DIM) 157 | best_model.load_state_dict(torch.load(checkpoint_path)) 158 | 159 | start = time() 160 | 161 | # get all of the codes for gallery and test images 162 | gallery_codes, gallery_label, test_codes, test_label= \ 163 | predict(best_model, loader_gallery, loader_test, logger, device=device) 164 | # evaluate the performance 165 | stats['test_avg_pre'], stats['test_avg_rec'], stats['test_avg_hmean'], \ 166 | stats['test_pre_curve'], stats['test_rec_curve'], stats['test_mean_ap'] = \ 167 | eval_perf(gallery_codes, gallery_label, test_codes, test_label, 168 | top_k=TOP_K, hamm_radius=HAMM_RADIUS) 169 | 170 | logger.write("Test completed in {:0.0f} seconds" 171 | .format(time() - start)) 172 | logger.write("test MAP: {:.8f}, ".format(stats['test_mean_ap']) + 173 | "avg precision: {:.6f}, ".format(stats['test_avg_pre']) + 174 | "avg recall: {:.6f}, ".format(stats['test_avg_rec']) + 175 | "avg harmonic mean: {:0.6f}" 176 | .format(stats['test_avg_hmean'])) 177 | 178 | logger.write("====== END ======") 179 | logger.write("Completed run for {}".format(run_id)) 180 | 181 | with open(stats_file_path, 'wb') as file: 182 | pickle.dump(stats, file) 183 | -------------------------------------------------------------------------------- /stats.py: -------------------------------------------------------------------------------- 1 | import pickle 2 | import numpy as np 3 | from matplotlib import pyplot as plt 4 | from pdb import set_trace 5 | from scipy.interpolate import make_interp_spline, BSpline 6 | 7 | STATS_PATH = "./stats" 8 | FILE_NAME = "/12-17_21-59-29_E7A1ED.pickle" 9 | 10 | def visualize_val_stats(stats): 11 | val_mean_aps = np.array(stats['val_mean_aps']) 12 | val_avg_pre = np.array(stats['val_avg_pre']) 13 | val_avg_rec = np.array(stats['val_avg_rec']) 14 | val_avg_hmean = np.array(stats['val_avg_hmean']) 15 | ticks = len(val_avg_hmean) 16 | 17 | plt.subplot(2, 1, 1) 18 | 19 | plt.plot(val_mean_aps) 20 | # plt.xticks(np.linspace(1, ticks, ticks, dtype="int8")) 21 | plt.title("TOP 50 MEAN AVERAGE PRECISION") 22 | plt.xlabel("EPOCHS") 23 | plt.ylabel("MAP") 24 | 25 | plt.subplot(2, 1, 2) 26 | plt.plot(val_avg_pre) 27 | plt.plot(val_avg_rec) 28 | plt.plot(val_avg_hmean) 29 | plt.title("STATS OVER TIME") 30 | plt.xlabel("EPOCHS") 31 | plt.ylabel("AVERAGE VALUE") 32 | plt.ylim([0.0, 0.7]) 33 | plt.legend( 34 | ["Average Precision", "Average Recall", "Average Harmonic Mean"], 35 | loc="upper right") 36 | 37 | plt.subplots_adjust(hspace=1.0) 38 | 39 | plt.show() 40 | 41 | def visualize_test_stats(stats): 42 | test_recall = np.array(stats['test_rec_curve']) 43 | test_precision = np.array(stats['test_pre_curve']) 44 | test_ap = np.array(stats['test_avg_pre']) 45 | test_map = np.array(stats['test_mean_ap']) 46 | 47 | plt.step(test_recall, test_precision) 48 | plt.fill_between(test_recall, test_precision, alpha=0.2, color='b') 49 | plt.xlabel("Recall") 50 | plt.xlim([0.0, 1.0]) 51 | plt.ylabel("Precision") 52 | plt.ylim([0.0, 1.05]) 53 | plt.title("Retrieval Performance: AP={:.4f}; TOP50 MAP={:.4f}." 54 | .format(test_ap, test_map)) 55 | plt.show() 56 | 57 | if __name__ == "__main__": 58 | # stats = None 59 | 60 | # with open(STATS_PATH + FILE_NAME, "rb") as file: 61 | # stats = pickle.load(file) 62 | 63 | # visualize_val_stats(stats) 64 | # visualize_test_stats(stats) 65 | 66 | name = [ 67 | "Low Res Baseline", 68 | "High Res Baseline", 69 | "L2 Constraint", 70 | "Image Alignment" 71 | ] 72 | 73 | stat_paths = [ 74 | "/12-17_20-18-32_99AD3C.pickle", 75 | "/12-17_20-25-11_25F03D.pickle", 76 | "/12-17_21-59-29_E7A1ED.pickle", 77 | "/12-19_14-18-00_CE7872.pickle" 78 | ] 79 | 80 | colors = ['r', 'g', 'b', 'y'] 81 | 82 | for idx, path in enumerate(stat_paths): 83 | with open(STATS_PATH + path, "rb") as file: 84 | stats = pickle.load(file) 85 | print(stats['test_mean_ap']) 86 | 87 | # val_mean_aps = np.array(stats['val_mean_aps']) 88 | # ticks = np.linspace(1, len(val_mean_aps), 89 | # len(val_mean_aps), dtype="uint8") 90 | 91 | # X = np.linspace(ticks.min(), ticks.max(), 100) 92 | # spline = make_interp_spline(ticks, val_mean_aps, k=3) 93 | # smooth = spline(X) 94 | # # plt.plot(val_mean_aps) 95 | # plt.plot(X, smooth, color=colors[idx]) 96 | # # plt.xticks(np.linspace(1, ticks, ticks // 2, dtype="int8")) 97 | # plt.title("TOP 50 MEAN AVERAGE PRECISION") 98 | # plt.xlabel("EPOCHS") 99 | # plt.ylabel("MAP") 100 | 101 | # plt.legend(name, loc="lower right") 102 | # plt.show() 103 | -------------------------------------------------------------------------------- /utils.py: -------------------------------------------------------------------------------- 1 | import os 2 | import cv2 3 | import torch 4 | import numpy as np 5 | from time import time 6 | from multiprocessing import Pool, cpu_count 7 | from functools import reduce 8 | from pdb import set_trace 9 | 10 | # ============================================================================== 11 | # NOTE: Run all of the code from the ./code directory! 12 | # ============================================================================== 13 | 14 | DATA_DIR = "./data" 15 | ALIGNED_DATA_DIR = "./aligned_data" 16 | FACESCRUB_DIR = "../facescrub" 17 | RUN_ASSERTS = True 18 | VERBOSE = False 19 | 20 | def mkdir(path): 21 | ''' 22 | Creates the specified directory if it doesn't exist. 23 | ''' 24 | if not os.path.exists(path): 25 | os.mkdir(path) 26 | 27 | def lsdir(path): 28 | ''' 29 | Lists all of the files in the specified path, excluding files that start 30 | with a "." 31 | ''' 32 | files = os.listdir(path) 33 | return list(filter(lambda name: name[0] != ".", files)) 34 | 35 | def preprocess(): 36 | ''' 37 | Preprocess the data in ../facescrub/download and store them in ./data 38 | folder. 39 | ''' 40 | start = time() 41 | mkdir(DATA_DIR) 42 | names = lsdir(FACESCRUB_DIR + "/download") 43 | with Pool(max(1, cpu_count())) as pool: 44 | pool.map(_process_faces, names) 45 | print("Preprocessed images in {:.2f} seconds.".format(time() - start)) 46 | 47 | def _count_before_imgs(): 48 | ''' 49 | Count the number of images in the ./facescrub/download/*/face folders. 50 | ''' 51 | names = lsdir(FACESCRUB_DIR + "/download") 52 | paths = list(map(lambda name: get_facescrub_path(name), names)) 53 | return sum(list(map(lambda path: len(lsdir(path)), paths))) 54 | 55 | def _count_after_imgs(): 56 | ''' 57 | Count the number of images in the ./code/data/* folders 58 | ''' 59 | names = lsdir(DATA_DIR) 60 | paths = list(map(lambda name: get_data_path(name), names)) 61 | return sum(list(map(lambda path: len(lsdir(path)), paths))) 62 | 63 | def _process_faces(name): 64 | ''' 65 | Process the person's face images and save them in the ./code/data directory. 66 | ''' 67 | if VERBOSE: 68 | print_name = name.replace("_", " ") 69 | print("Processing images for {}...".format(print_name)) 70 | start = time() 71 | 72 | # using cropped faces 73 | faces_dir = get_facescrub_path(name) 74 | # create directory for the person in the ./code/data folder 75 | output_dir = get_data_path(name) 76 | mkdir(output_dir) 77 | # list of names of images 78 | img_names = lsdir(faces_dir) 79 | for img_name in img_names: 80 | output_path = output_dir + "/" + img_name 81 | 82 | if os.path.isfile(output_path): 83 | if VERBOSE: 84 | print("File {} already exists, skipping...".format(img_name)) 85 | continue 86 | 87 | img = cv2.imread(faces_dir + "/" + img_name) 88 | 89 | if img is None: 90 | if VERBOSE: 91 | print("Invalid image, skipping...") 92 | continue 93 | 94 | # eliminate empty images (white for some reason), threshold set at 85% 95 | max_threshold = round(reduce(lambda x,y: x*y, img.shape) * 255 * 0.85) 96 | if img.sum() > max_threshold: 97 | if VERBOSE: 98 | print("Image above pixel value threshold, skipping...") 99 | continue 100 | 101 | # save it 102 | cv2.imwrite(output_path, img) 103 | 104 | output_files = lsdir(output_dir) 105 | if VERBOSE: 106 | print("Processed images for {} in {:.2f} seconds. {} images before, {} images after".format(print_name, time() - start, 107 | len(img_names), len(output_files))) 108 | 109 | def get_facescrub_path(name): 110 | return FACESCRUB_DIR + "/download/{}/face".format(name) 111 | 112 | def get_data_path(name): 113 | return DATA_DIR + "/{}".format(name) 114 | 115 | if __name__ == "__main__": 116 | # preprocess() 117 | # print("There are {} images in ./facescrub/download/*/face folder." 118 | # .format(_count_before_imgs())) 119 | # print("There are {} images in ./code/data/*." 120 | # .format(_count_after_imgs())) 121 | pass 122 | -------------------------------------------------------------------------------- /viz.py: -------------------------------------------------------------------------------- 1 | import pickle 2 | import numpy as np 3 | import matplotlib.pyplot as plt 4 | from hamming_dist import * 5 | from ddh2 import * 6 | 7 | CODES_PATH = "./codes" 8 | CODES_FILE = "/12-19_14-18-00_CE7872.codes" 9 | DATASET_PATHS = "./aligned_dataset.pickle" 10 | 11 | # how many test subjects to pick up and examine 12 | NUM_TEST_TO_SHOW = 6 13 | TOP_N_RESULTS = 10 14 | 15 | if __name__ == "__main__": 16 | with open(CODES_PATH + CODES_FILE, "rb") as file: 17 | codes = pickle.load(file) 18 | 19 | gallery_codes, gallery_labels, test_codes, test_labels = codes 20 | gallery_codes = np.array(gallery_codes) 21 | test_codes = np.array(test_codes) 22 | 23 | gallery_labels = gallery_labels.numpy() 24 | test_labels = test_labels.numpy() 25 | truth_table = gallery_labels == test_labels.T 26 | 27 | with open(DATASET_PATHS, "rb") as file: 28 | gallery, test = pickle.load(file) 29 | 30 | num_gallery, num_test = len(gallery_codes), len(test_codes) 31 | 32 | # only looking at a subset of test subjects 33 | test_idx = np.random.randint(0, num_test, num_test)[:NUM_TEST_TO_SHOW] 34 | test_subset = test_codes[test_idx, :] 35 | # calculate the hamming dists 36 | dist = hamming_dist(gallery_codes, test_subset) 37 | # get the sorted idx 38 | sorted_idx = dist.argsort(axis=0) 39 | 40 | fig, ax_arr = plt.subplots(NUM_TEST_TO_SHOW, TOP_N_RESULTS+1, 41 | figsize=(25,25)) 42 | 43 | for i, tidx in enumerate(test_idx): 44 | assert test_labels[tidx, 0] == test[tidx][1], "Mismatched test labels!" 45 | 46 | # display the image 47 | test_img = test[tidx][0].permute(1,2,0) 48 | ax_arr[i, 0].imshow(np.asarray(test_img)) 49 | ax_arr[i, 0].axis("off") 50 | ax_arr[i, 0].set_title("Query") 51 | 52 | # display the top N images 53 | gallery_idx = sorted_idx[:TOP_N_RESULTS, i] 54 | 55 | for j, gidx in enumerate(gallery_idx): 56 | gallery_img = gallery[gidx][0].permute(1,2,0) 57 | ax_arr[i, j+1].imshow(np.asarray(gallery_img)) 58 | ax_arr[i, j+1].axis("off") 59 | 60 | if truth_table[gidx, tidx]: 61 | ax_arr[i, j+1].set_title("MATCH", color="g") 62 | else: 63 | ax_arr[i, j+1].set_title("MISMATCH", color="r") 64 | 65 | # plt.show() 66 | --------------------------------------------------------------------------------