├── .gitignore
├── README.md
├── align.py
├── aligned_dataset.pickle
├── calc_map.py
├── calc_pre_rec.py
├── conda_macos.yml
├── conda_ubuntu.yml
├── confusion.py
├── dataset.pickle
├── dataset.py
├── ddh.py
├── ddh2.py
├── ddh3.py
├── ddh4.py
├── eval_perf.py
├── gather.py
├── hamming_dist.py
├── logger.py
├── predict.py
├── run.py
├── stats.py
├── utils.py
└── viz.py


/.gitignore:
--------------------------------------------------------------------------------
 1 | .ipynb_checkpoints
 2 | __pycache__
 3 | .DS_Store
 4 | data/*
 5 | aligned_data/*
 6 | logs/*
 7 | saved_models/*
 8 | stats/*
 9 | codes/*
10 | *.pyc
11 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Face Hasing Using Neural Networks
 2 | 
 3 | ## File structure
 4 | 
 5 | ```bash
 6 | |--- code/ # contains code for the project
 7 | |    |--- data/ # contains preprocessed data
 8 | |--- facescrub/ # from https://github.com/faceteam/facescrub.git
 9 | |    |--- download/ # data from the download.py script
10 | |    |--- download.py # script to download data
11 | ```
12 | 
13 | ## Packages
14 | Run `source activate cs670project` to activate project conda environment.
15 | 
16 | ### Package Versions
17 | - python=3.6
18 | - numpy=1.15.4
19 | - scipy=1.1.0
20 | - opencv-python=3.4.3.18
21 | - matplotlib=3.0.1
22 | - jupyter=1.0.0
23 | - sklearn=0.20.1 (pip)
24 | - cmake=3.13.2 (pip)
25 | - dlib=19.16.0 (pip)
26 | 
27 | ### Installing Pytorch
28 | `conda install pytorch torchvision -c pytorch`
29 | 
30 | ## Miscellaneous Notes
31 | 
32 | - Location of dataset https://github.com/faceteam/facescrub.git. **NOTE:** Need Python 2.7 to run download.py.
33 | 
34 | ## Instructions
35 | 
36 | 1. Download miniconda and pip,
37 | 2. Install the packages noted above,
38 | 3. `git clone https://github.com/faceteam/facescrub.git` into the same level as the project's path (see file structure above),
39 | 4. Run `python download.py` with Python 2.7 to download the FaceScrub images,
40 | 5. Run `python utils.py` to preprocess the images and move them into the project's ./data folder,
41 | 
42 | - To run on a specific CUDA device, run `python run.py CUDA_VISIBLE_DEVICES=#` where # is the number of the device in `nvidia-smi`.
43 | 


--------------------------------------------------------------------------------
/align.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import cv2
  3 | import dlib
  4 | import numpy as np
  5 | from pdb import set_trace
  6 | from matplotlib import pyplot as plt
  7 | from utils import lsdir, mkdir
  8 | 
  9 | # From
 10 | # https://www.pyimagesearch.com/2017/04/03/facial-landmarks-dlib-opencv-python/
 11 | PREDICTOR_MODEL_PATH = "./saved_models/shape_predictor_68_face_landmarks.dat"
 12 | RIGHT_EYE_IDX = np.arange(36, 42)
 13 | LEFT_EYE_IDX = np.arange(42, 48)
 14 | 
 15 | def align(img_path, **kwargs):
 16 |     '''
 17 |     https://www.pyimagesearch.com/2017/05/22/face-alignment-with-opencv-and-python/
 18 |     '''
 19 |     desired_left_x = kwargs.get("desired_left_eye_x", 0.25)
 20 |     img_width = kwargs.get("img_width", 400)
 21 | 
 22 |     detector = dlib.get_frontal_face_detector()
 23 |     predictor = dlib.shape_predictor(PREDICTOR_MODEL_PATH)
 24 |     img = cv2.imread(img_path)
 25 |     img = cv2.resize(img, (img_width, img_width))
 26 |     img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
 27 |     gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
 28 |     rects = detector(gray, 1)
 29 |     # get all of the 68 points
 30 |     shape = _shape_to_np(predictor(gray, rects[0]))
 31 |     # center of all eye points
 32 |     left_eye_center = shape[LEFT_EYE_IDX, :].mean(axis=0).astype('int')
 33 |     right_eye_center = shape[RIGHT_EYE_IDX, :].mean(axis=0).astype('int')
 34 |     # difference between x and y
 35 |     dy = right_eye_center[1] - left_eye_center[1]
 36 |     dx = right_eye_center[0] - left_eye_center[0]
 37 |     # rotated angle
 38 |     angle = np.degrees(np.arctan2(dy, dx)) - 180
 39 |     # where the eyes are along x-axis
 40 |     desired_right_x = 1.0 - desired_left_x
 41 |     # calculate scale to get to desired size
 42 |     dist = np.sqrt((dx ** 2) + (dy ** 2))
 43 |     desired_dist = desired_right_x - desired_left_x
 44 |     desired_dist *= round(img_width * 0.7)
 45 |     scale = desired_dist / dist
 46 |     # calculate eye center
 47 |     eye_center = ((left_eye_center[0] + right_eye_center[0])) // 2, \
 48 |                  ((left_eye_center[1] + right_eye_center[1])) // 2
 49 |     # rotational matrix
 50 |     M = cv2.getRotationMatrix2D(eye_center, angle, scale)
 51 |     # update translation component
 52 |     tx = img_width * 0.45
 53 |     ty = img_width * desired_left_x + 20
 54 |     M[0,2] += (tx - eye_center[0])
 55 |     M[1,2] += (ty - eye_center[1])
 56 |     output = cv2.warpAffine(img_rgb, M, (img_width, img_width))
 57 |     return output[:350, :350, :]
 58 | 
 59 | def _shape_to_np(shape, dtype="int"):
 60 |     '''
 61 |     From
 62 |     https://www.pyimagesearch.com/2017/04/03/facial-landmarks-dlib-opencv-python/
 63 |     '''
 64 |     coords = np.zeros((68, 2), dtype=dtype)
 65 |     for i in range(0, 68):
 66 |         coords[i] = (shape.part(i).x, shape.part(i).y)
 67 |     return coords
 68 | 
 69 | def _align_imgs(img_files, from_path, to_path):
 70 |     counter = 0
 71 |     for img_name in img_files:
 72 |         img_path = from_path + "/" +  img_name
 73 |         aligned_path = to_path + "/" + img_name
 74 |         try:
 75 |             aligned_img = align(img_path)
 76 |             cv2.imwrite(aligned_path,
 77 |                         cv2.cvtColor(aligned_img, cv2.COLOR_RGB2BGR))
 78 |         except Exception as error:
 79 |             pass
 80 |         finally:
 81 |             counter += 1
 82 |     return counter
 83 | 
 84 | if __name__ == "__main__":
 85 |     # from dataset import FaceScrubDataset
 86 |     # dataset = FaceScrubDataset()
 87 |     # img_path = dataset.img_paths[4000]
 88 |     # img = cv2.imread(img_path)
 89 |     # rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
 90 |     # output = align(img_path)
 91 |     # plt.imshow(output)
 92 |     # plt.show()
 93 | 
 94 |     root_path = "./data"
 95 |     target_path = "./aligned_data"
 96 |     mkdir(target_path)
 97 |     names = lsdir(root_path)
 98 |     counter = 0
 99 |     for name in names:
100 |         from_path = root_path + "/" + name
101 |         to_path = target_path + "/" + name
102 |         mkdir(to_path)
103 |         # training files
104 |         img_names = list(filter(lambda x: ".jpg" in x, lsdir(from_path)))
105 |         counter += _align_imgs(img_names, from_path, to_path)
106 |         # validation files
107 |         val_from_path = from_path + "/val"
108 |         val_to_path = to_path + "/val"
109 |         mkdir(val_to_path)
110 |         img_names = lsdir(val_from_path)
111 |         counter += _align_imgs(img_names, val_from_path, val_to_path)
112 |         # test files
113 |         test_from_path = from_path + "/test"
114 |         test_to_path = to_path + "/test"
115 |         mkdir(test_to_path)
116 |         img_names = lsdir(test_from_path)
117 |         counter += _align_imgs(img_names, test_from_path, test_to_path)
118 |         print("Aligned {} images.".format(counter))
119 | 


--------------------------------------------------------------------------------
/aligned_dataset.pickle:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/treble-maker123/deep-face-hashing/0da530f5486685ae093d79994919b411dc4a4ac5/aligned_dataset.pickle


--------------------------------------------------------------------------------
/calc_map.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | def calc_map(matches, rankings, top_k):
 4 |     '''
 5 |     Calculate the mean average precision for each of the T test samples, from G
 6 |     training/gallery samples.
 7 | 
 8 |     Args:
 9 |         - matches (numpy.ndarray): a ground-truth matrix mapping whether a
10 |             pair of gallery data and test data are of the same person.
11 |             Shape (G, T).
12 |         - rankings (numpy.ndarray): a sorted index of hamming distance ranking.
13 |             Shape (G, T).
14 |         - top_k (int): the number of shortest distance gallery images to
15 |             compare to the test images.
16 | 
17 |     Returns:
18 |         (numpy.ndarray): an array containing mean average precision scores for
19 |             each of the T test samples. Of shape (T, )
20 |     '''
21 |     num_gallery, num_test = rankings.shape
22 | 
23 |     # if top_k is greater than the number of training images, set it to the
24 |     # number of training images, only for testing.
25 |     if num_gallery < top_k: top_k = num_gallery
26 | 
27 |     correct_retrievals = np.zeros((top_k, num_test), dtype="int8")
28 | 
29 |     for idx in range(num_test):
30 |         # get the top_k matches for the test data
31 |         rank_slice = rankings[:top_k, idx]
32 |         correct_retrievals[:, idx] = matches[rank_slice, idx]
33 | 
34 |     # [[1,2,3,...,top_k], [1,2,3,...,top_k]]
35 |     # to facilitate calculating the average precision
36 |     idx = np.linspace(1, top_k, top_k, dtype="int").reshape((top_k, 1))
37 |     correct_idx = np.repeat(idx, num_test, axis=1)
38 |     # sum up all of the scores across the top_k for each test sample
39 |     summed_scores = np.zeros_like(correct_idx)
40 |     for idx in range(top_k):
41 |         # sum the number of correct retrievals from 1 up to idx
42 |         summed_scores[idx, :] = correct_retrievals[:idx+1, :].sum(axis=0)
43 | 
44 |     # mask out the scores for the incorrect retrievals
45 |     summed_scores = summed_scores * correct_retrievals
46 |     # count the number of correct retrievals to be divided
47 |     num_corrects = correct_retrievals.sum(axis=0)
48 | 
49 |     # calculate mean average precision
50 |     ap = (summed_scores / correct_idx).sum(axis=0) / num_corrects
51 |     # set the nan values to 0
52 |     ap[ap != ap] = 0
53 | 
54 |     return ap.mean()
55 | 
56 | if __name__ == "__main__":
57 |     matches = np.array([
58 |         [1,1,0,1,0],
59 |         [1,0,1,0,1],
60 |         [1,1,1,1,0],
61 |         [0,0,0,0,0],
62 |         [0,0,1,0,1]
63 |     ], dtype="int8")
64 |     ranking = np.array([
65 |         [0,3,2,1,0],
66 |         [4,0,4,0,2],
67 |         [1,4,1,2,3],
68 |         [3,1,0,3,1],
69 |         [2,2,3,4,4]
70 |     ], dtype="int8")
71 |     mean_ap = calc_map(matches, ranking, top_k=3)
72 |     target = np.array([5/6, 1/2, 1, 7/12, 0]).mean()
73 |     assert np.isclose(mean_ap, target), "Invalid calculation!"
74 | 


--------------------------------------------------------------------------------
/calc_pre_rec.py:
--------------------------------------------------------------------------------
 1 | from sklearn.metrics import precision_recall_curve
 2 | from pdb import set_trace
 3 | 
 4 | def calc_pre_rec(hamm_dist, gt, radius):
 5 |     '''
 6 |     Calculates the precision-recall curve values.
 7 |     '''
 8 |     # distance within radius counts as 0
 9 |     dist = hamm_dist * (hamm_dist > radius)
10 |     # normalize the distance values, so the smaller distance, the closer to 1
11 |     max_val = dist.max()
12 |     scores = ((max_val - dist) / max_val) ** 2
13 |     scores[scores != scores] = 1
14 |     # calculate the "micro average" of the curves
15 |     pre_curve, rec_curve, _ = precision_recall_curve(gt.ravel(), scores.ravel())
16 | 
17 |     # pred == 1 is what the model believes to be the person
18 |     pred = (dist == 0).astype("int8")
19 |     # true positives
20 |     tp = (pred * gt).sum(axis=0)
21 |     # recall
22 |     rec = tp / gt.sum(axis=0)
23 |     rec[rec != rec] = 0
24 |     # precision
25 |     pre = tp / pred.sum(axis=0)
26 |     pre[pre != pre] = 0
27 |     # harmonic mean
28 |     hmean = 2 * (pre * rec) / (pre + rec)
29 |     hmean[hmean != hmean] = 0
30 | 
31 |     return pre.mean(), rec.mean(), hmean.mean(), pre_curve, rec_curve
32 | 


--------------------------------------------------------------------------------
/conda_macos.yml:
--------------------------------------------------------------------------------
  1 | name: cs670project
  2 | channels:
  3 |   - pytorch
  4 |   - anaconda-fusion
  5 |   - defaults
  6 | dependencies:
  7 |   - appnope=0.1.0=py36hf537a9a_0
  8 |   - backcall=0.1.0=py36_0
  9 |   - blas=1.0=mkl
 10 |   - bleach=3.0.2=py36_0
 11 |   - ca-certificates=2018.03.07=0
 12 |   - certifi=2018.10.15=py36_0
 13 |   - cffi=1.11.5=py36h6174b99_1
 14 |   - cycler=0.10.0=py36hfc81398_0
 15 |   - dbus=1.13.2=h760590f_1
 16 |   - decorator=4.3.0=py36_0
 17 |   - entrypoints=0.2.3=py36_2
 18 |   - expat=2.2.6=h0a44026_0
 19 |   - freetype=2.9.1=hb4e5f40_0
 20 |   - gettext=0.19.8.1=h15daf44_3
 21 |   - glib=2.56.2=hd9629dc_0
 22 |   - icu=58.2=h4b95b61_1
 23 |   - intel-openmp=2019.1=144
 24 |   - ipykernel=5.1.0=py36h39e3cac_0
 25 |   - ipython=7.2.0=py36h39e3cac_0
 26 |   - ipython_genutils=0.2.0=py36h241746c_0
 27 |   - ipywidgets=7.4.2=py36_0
 28 |   - jedi=0.13.1=py36_0
 29 |   - jinja2=2.10=py36_0
 30 |   - jpeg=9b=he5867d9_2
 31 |   - jsonschema=2.6.0=py36hb385e00_0
 32 |   - jupyter=1.0.0=py36_7
 33 |   - jupyter_client=5.2.3=py36_0
 34 |   - jupyter_console=6.0.0=py36_0
 35 |   - jupyter_core=4.4.0=py36_0
 36 |   - kiwisolver=1.0.1=py36h0a44026_0
 37 |   - libcxx=4.0.1=hcfea43d_1
 38 |   - libcxxabi=4.0.1=hcfea43d_1
 39 |   - libedit=3.1.20170329=hb402a30_2
 40 |   - libffi=3.2.1=h475c297_4
 41 |   - libgfortran=3.0.1=h93005f0_2
 42 |   - libiconv=1.15=hdd342a3_7
 43 |   - libpng=1.6.35=ha441bb4_0
 44 |   - libsodium=1.0.16=h3efe00b_0
 45 |   - libtiff=4.0.9=hcb84e12_2
 46 |   - markupsafe=1.1.0=py36h1de35cc_0
 47 |   - matplotlib=3.0.1=py36h54f8f79_0
 48 |   - mistune=0.8.4=py36h1de35cc_0
 49 |   - mkl=2018.0.3=1
 50 |   - mkl_fft=1.0.6=py36hb8a8100_0
 51 |   - mkl_random=1.0.1=py36h5d10147_1
 52 |   - nbconvert=5.3.1=py36_0
 53 |   - nbformat=4.4.0=py36h827af21_0
 54 |   - ncurses=6.1=h0a44026_0
 55 |   - ninja=1.8.2=py36h04f5b5a_1
 56 |   - notebook=5.7.2=py36_0
 57 |   - numpy=1.15.4=py36h6a91979_0
 58 |   - numpy-base=1.15.4=py36h8a80b8c_0
 59 |   - olefile=0.46=py36_0
 60 |   - openssl=1.1.1a=h1de35cc_0
 61 |   - pandoc=2.2.3.2=0
 62 |   - pandocfilters=1.4.2=py36_1
 63 |   - parso=0.3.1=py36_0
 64 |   - pcre=8.42=h378b8a2_0
 65 |   - pexpect=4.6.0=py36_0
 66 |   - pickleshare=0.7.5=py36_0
 67 |   - pillow=5.3.0=py36hb68e598_0
 68 |   - pip=18.1=py36_0
 69 |   - prometheus_client=0.4.2=py36_0
 70 |   - prompt_toolkit=2.0.7=py36_0
 71 |   - ptyprocess=0.6.0=py36_0
 72 |   - pycparser=2.19=py36_0
 73 |   - pygments=2.2.0=py36h240cd3f_0
 74 |   - pyparsing=2.3.0=py36_0
 75 |   - pyqt=5.9.2=py36h655552a_2
 76 |   - python=3.6.7=haf84260_0
 77 |   - python-dateutil=2.7.5=py36_0
 78 |   - pytz=2018.7=py36_0
 79 |   - pyzmq=17.1.2=py36h1de35cc_0
 80 |   - qt=5.9.6=h45cd832_2
 81 |   - qtconsole=4.4.2=py36_0
 82 |   - readline=7.0=h1de35cc_5
 83 |   - scipy=1.1.0=py36h28f7352_1
 84 |   - send2trash=1.5.0=py36_0
 85 |   - setuptools=40.6.2=py36_0
 86 |   - sip=4.19.8=py36h0a44026_0
 87 |   - six=1.11.0=py36_1
 88 |   - sqlite=3.25.3=ha441bb4_0
 89 |   - terminado=0.8.1=py36_1
 90 |   - testpath=0.4.2=py36_0
 91 |   - tk=8.6.8=ha441bb4_0
 92 |   - tornado=5.1.1=py36h1de35cc_0
 93 |   - traitlets=4.3.2=py36h65bd3ce_0
 94 |   - wcwidth=0.1.7=py36h8c6ec74_0
 95 |   - webencodings=0.5.1=py36_1
 96 |   - wheel=0.32.3=py36_0
 97 |   - widgetsnbextension=3.4.2=py36_0
 98 |   - xz=5.2.4=h1de35cc_4
 99 |   - zeromq=4.2.5=h0a44026_1
100 |   - zlib=1.2.11=h1de35cc_3
101 |   - pytorch=0.4.1=py36_cuda0.0_cudnn0.0_1
102 |   - torchvision=0.2.1=py36_1
103 |   - pip:
104 |     - blessed==1.14.2
105 |     - botocore==1.7.42
106 |     - cement==2.8.2
107 |     - docker-py==1.7.2
108 |     - dockerpty==0.4.1
109 |     - docopt==0.6.2
110 |     - docutils==0.14
111 |     - opencv-python==3.4.3.18
112 |     - pathspec==0.5.0
113 |     - requests==2.9.1
114 |     - semantic-version==2.5.0
115 |     - tabulate==0.7.5
116 |     - termcolor==1.1.0
117 |     - torch==0.4.1
118 |     - websocket-client==0.44.0
119 | prefix: /Users/qui-gon/Workspace/anaconda3/envs/cs670project
120 | 
121 | 


--------------------------------------------------------------------------------
/conda_ubuntu.yml:
--------------------------------------------------------------------------------
  1 | name: cs670project
  2 | channels:
  3 |   - pytorch
  4 |   - defaults
  5 | dependencies:
  6 |   - backcall=0.1.0=py36_0
  7 |   - blas=1.0=mkl
  8 |   - bleach=3.0.2=py36_0
  9 |   - ca-certificates=2018.03.07=0
 10 |   - certifi=2018.10.15=py36_0
 11 |   - cffi=1.11.5=py36he75722e_1
 12 |   - cycler=0.10.0=py36h93f1223_0
 13 |   - dbus=1.13.2=h714fa37_1
 14 |   - decorator=4.3.0=py36_0
 15 |   - entrypoints=0.2.3=py36_2
 16 |   - expat=2.2.6=he6710b0_0
 17 |   - fontconfig=2.13.0=h9420a91_0
 18 |   - freetype=2.9.1=h8a8886c_1
 19 |   - glib=2.56.2=hd408876_0
 20 |   - gmp=6.1.2=h6c8ec71_1
 21 |   - gst-plugins-base=1.14.0=hbbd80ab_1
 22 |   - gstreamer=1.14.0=hb453b48_1
 23 |   - icu=58.2=h9c2bf20_1
 24 |   - intel-openmp=2019.1=144
 25 |   - ipykernel=5.1.0=py36h39e3cac_0
 26 |   - ipython=7.2.0=py36h39e3cac_0
 27 |   - ipython_genutils=0.2.0=py36_0
 28 |   - ipywidgets=7.4.2=py36_0
 29 |   - jedi=0.13.1=py36_0
 30 |   - jinja2=2.10=py36_0
 31 |   - jpeg=9b=h024ee3a_2
 32 |   - jsonschema=2.6.0=py36_0
 33 |   - jupyter=1.0.0=py36_7
 34 |   - jupyter_client=5.2.3=py36_0
 35 |   - jupyter_console=6.0.0=py36_0
 36 |   - jupyter_core=4.4.0=py36_0
 37 |   - kiwisolver=1.0.1=py36hf484d3e_0
 38 |   - libedit=3.1.20170329=h6b74fdf_2
 39 |   - libffi=3.2.1=hd88cf55_4
 40 |   - libgcc-ng=8.2.0=hdf63c60_1
 41 |   - libgfortran-ng=7.3.0=hdf63c60_0
 42 |   - libpng=1.6.35=hbc83047_0
 43 |   - libsodium=1.0.16=h1bed415_0
 44 |   - libstdcxx-ng=8.2.0=hdf63c60_1
 45 |   - libtiff=4.0.9=he85c1e1_2
 46 |   - libuuid=1.0.3=h1bed415_2
 47 |   - libxcb=1.13=h1bed415_1
 48 |   - libxml2=2.9.8=h26e45fe_1
 49 |   - markupsafe=1.1.0=py36h7b6447c_0
 50 |   - matplotlib=3.0.1=py36h5429711_0
 51 |   - mistune=0.8.4=py36h7b6447c_0
 52 |   - mkl=2018.0.3=1
 53 |   - mkl_fft=1.0.6=py36h7dd41cf_0
 54 |   - mkl_random=1.0.1=py36h4414c95_1
 55 |   - nbconvert=5.3.1=py36_0
 56 |   - nbformat=4.4.0=py36h31c9010_0
 57 |   - ncurses=6.1=he6710b0_1
 58 |   - ninja=1.8.2=py36h6bb024c_1
 59 |   - notebook=5.7.2=py36_0
 60 |   - numpy=1.15.4=py36h1d66e8a_0
 61 |   - numpy-base=1.15.4=py36h81de0dd_0
 62 |   - olefile=0.46=py36_0
 63 |   - openssl=1.0.2p=h14c3975_0
 64 |   - pandoc=2.2.3.2=0
 65 |   - pandocfilters=1.4.2=py36_1
 66 |   - parso=0.3.1=py36_0
 67 |   - pcre=8.42=h439df22_0
 68 |   - pexpect=4.6.0=py36_0
 69 |   - pickleshare=0.7.5=py36_0
 70 |   - pillow=5.3.0=py36h34e0f95_0
 71 |   - pip=18.1=py36_0
 72 |   - prometheus_client=0.4.2=py36_0
 73 |   - prompt_toolkit=2.0.7=py36_0
 74 |   - ptyprocess=0.6.0=py36_0
 75 |   - pycparser=2.19=py36_0
 76 |   - pygments=2.2.0=py36_0
 77 |   - pyparsing=2.3.0=py36_0
 78 |   - pyqt=5.9.2=py36h05f1152_2
 79 |   - python=3.6.6=h6e4f718_2
 80 |   - python-dateutil=2.7.5=py36_0
 81 |   - pytz=2018.7=py36_0
 82 |   - pyzmq=17.1.2=py36h14c3975_0
 83 |   - qt=5.9.6=h8703b6f_2
 84 |   - qtconsole=4.4.2=py36_0
 85 |   - readline=7.0=h7b6447c_5
 86 |   - scipy=1.1.0=py36hfa4b5c9_1
 87 |   - send2trash=1.5.0=py36_0
 88 |   - setuptools=40.6.2=py36_0
 89 |   - sip=4.19.8=py36hf484d3e_0
 90 |   - six=1.11.0=py36_1
 91 |   - sqlite=3.25.3=h7b6447c_0
 92 |   - terminado=0.8.1=py36_1
 93 |   - testpath=0.4.2=py36_0
 94 |   - tk=8.6.8=hbc83047_0
 95 |   - tornado=5.1.1=py36h7b6447c_0
 96 |   - traitlets=4.3.2=py36_0
 97 |   - wcwidth=0.1.7=py36_0
 98 |   - webencodings=0.5.1=py36_1
 99 |   - wheel=0.32.3=py36_0
100 |   - widgetsnbextension=3.4.2=py36_0
101 |   - xz=5.2.4=h14c3975_4
102 |   - zeromq=4.2.5=hf484d3e_1
103 |   - zlib=1.2.11=h7b6447c_3
104 |   - pytorch=0.4.1=py36_py35_py27__9.0.176_7.1.2_2
105 |   - torchvision=0.2.1=py36_1
106 |   - pip:
107 |     - opencv-python==3.4.4.19
108 |     - torch==0.4.1.post2
109 | 


--------------------------------------------------------------------------------
/confusion.py:
--------------------------------------------------------------------------------
 1 | import pickle
 2 | from hamming_dist import *
 3 | from functools import reduce
 4 | from calc_pre_rec import calc_pre_rec
 5 | 
 6 | CODES_PATH = "./codes"
 7 | CODES_FILE = "/12-17_20-25-11_25F03D.codes"
 8 | 
 9 | if __name__ == "__main__":
10 |     with open(CODES_PATH + CODES_FILE, "rb") as file:
11 |         codes = pickle.load(file)
12 | 
13 |     gallery_codes, gallery_labels, test_codes, test_labels = codes
14 |     gallery_codes = np.array(gallery_codes)
15 |     test_codes = np.array(test_codes)
16 | 
17 |     gallery_labels = gallery_labels.numpy()
18 |     test_labels = test_labels.numpy()
19 |     gt = gallery_labels == test_labels.T
20 | 
21 |     hamm_dist = hamming_dist(gallery_codes, test_codes)
22 |     total = reduce(lambda x,y: x*y, hamm_dist.shape)
23 |     pt = hamm_dist <= 2
24 |     true_pos = ((pt == gt) * pt).sum()
25 |     true_neg = ((~pt == ~gt) * ~pt).sum()
26 |     false_pos = ((pt == ~gt) * pt).sum()
27 |     false_neg = ((~pt == gt) * ~pt).sum()
28 |     print(true_pos, true_neg, false_pos, false_neg, total)
29 |     print(true_pos + true_neg)
30 |     print(false_pos + false_neg)
31 |     print(true_pos + false_pos)
32 |     print(true_neg + false_neg)
33 | 
34 |     avg_pre, avg_rec, _, _, _ = calc_pre_rec(hamm_dist, gt, 2)
35 | 


--------------------------------------------------------------------------------
/dataset.pickle:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/treble-maker123/deep-face-hashing/0da530f5486685ae093d79994919b411dc4a4ac5/dataset.pickle


--------------------------------------------------------------------------------
/dataset.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import torch
  3 | import numpy as np
  4 | from torch.utils.data import Dataset
  5 | import torchvision.transforms as T
  6 | from utils import DATA_DIR, ALIGNED_DATA_DIR, get_data_path, mkdir, lsdir
  7 | from pdb import set_trace
  8 | from align import align
  9 | from matplotlib import pyplot as plt
 10 | import multiprocessing
 11 | import torchvision.transforms.functional as tF
 12 | from torch.utils.data.dataloader import default_collate
 13 | from PIL import Image
 14 | 
 15 | class FaceScrubDataset(Dataset):
 16 |     '''
 17 |     The dataset has a total of 63903 images of 530 faces. The most images a person has is 191, and the least images a person has is 39.
 18 | 
 19 | 
 20 |     Statistics for "comparison" type,
 21 | 
 22 |     The combination of datapoints is (63903 choose 2), which is
 23 |         (63903 * 63902) / 2 = 2,041,764,753
 24 | 
 25 |     The permutation of datapoints is
 26 |         63903 ** 2 = 4,083,593,409
 27 | 
 28 |     Five images per person set aside for validation,
 29 |         7,022,500
 30 | 
 31 |     Five images per person set aside for test,
 32 |         7,022,500
 33 |     '''
 34 |     def __init__(self, **kwargs):
 35 |         hash_dim = kwargs.get("hash_dim", 48)
 36 |         type = kwargs.get("type", "label")
 37 |         mode = kwargs.get("mode", "train")
 38 |         transform = kwargs.get("transform", [])
 39 |         normalize = kwargs.get("normalize", False)
 40 |         align = kwargs.get("align", False)
 41 | 
 42 |         if mode not in ["train", "val", "test"]:
 43 |             raise Exception("Invalid dataset mode")
 44 |         if type not in ["label", "comparison"]:
 45 |             raise Exception("Invalid dataset type")
 46 | 
 47 |         self.data_dir = ALIGNED_DATA_DIR if align else DATA_DIR
 48 |         self.mode = mode
 49 |         self.type = type
 50 |         self.names = lsdir(self.data_dir)
 51 |         self.img_paths = self._get_all_img_paths()
 52 |         self.hash_dim = hash_dim
 53 |         self.transform = T.Compose(transform)
 54 | 
 55 |     def __len__(self):
 56 |         if self.type == "comparison":
 57 |             return len(self.img_paths) ** 2
 58 |         elif self.type == "label":
 59 |             # if self.mode == "train": return 67
 60 |             # elif self.mode == "val": return 2
 61 |             # else: return 2
 62 |             return len(self.img_paths)
 63 | 
 64 |     def __getitem__(self, index):
 65 |         if self.type == "comparison":
 66 |             return self._get_data_comparison(index)
 67 |         elif self.type == "label":
 68 |             return self._get_data_label(index)
 69 |         else:
 70 |             raise Exception("Invalid dataset type")
 71 | 
 72 |     def _get_data_comparison(self, index):
 73 |         '''
 74 |         For __getitem__() method. Return data at index in the format,
 75 |             (baseline_image, comparison_image, label)
 76 | 
 77 |         Label is an integer specifying whether the baseline and comparison are of the same person. 1 is True and 0 is False.
 78 |         '''
 79 |         baseline, compare = self._get_pair_from_index(index)
 80 |         label = baseline.split("/")[2] == compare.split("/")[2]
 81 |         bimg = self._get_img_from_path(baseline)
 82 |         cimg = self._get_img_from_path(compare)
 83 |         return (bimg, cimg, int(label))
 84 | 
 85 |     def _get_data_label(self, index):
 86 |         '''
 87 |         For __getitem__() method. Return data at index in the format,
 88 |             (image, hash_code)
 89 | 
 90 |         hash_code is a numpy array of integers of 0s and 1s, mapping the name
 91 |         of the peson into Hamming space.
 92 |         '''
 93 |         img_path = self.img_paths[index]
 94 |         name = img_path.split("/")[2]
 95 |         try:
 96 |             output = self._get_img_from_path(img_path), self.names.index(name)
 97 |         except Exception as error:
 98 |             # print("Exception countered ({}): {}".format(index, error))
 99 |             output = None
100 | 
101 |         return output
102 | 
103 |     def _get_pair_from_index(self, index):
104 |         '''
105 |         Return the paths to a pair of images based on the index.
106 |         '''
107 |         num_imgs = len(self.img_paths)
108 |         x, y = index % num_imgs, index // num_imgs
109 |         return self.img_paths[x], self.img_paths[y]
110 | 
111 |     def _get_folder_paths(self):
112 |         '''
113 |         Return a list of folder paths for all of the people.
114 |         '''
115 |         return list(map(lambda name: self.data_dir + "/" + name, self.names))
116 | 
117 |     def _get_all_img_paths(self):
118 |         '''
119 |         Return a list of all image paths.
120 |         '''
121 |         paths = list(map(self._get_img_paths, self.names))
122 |         return sum(paths, [])
123 | 
124 |     def _get_img_paths(self, name):
125 |         '''
126 |         Returns a list of image paths for the given person.
127 |         '''
128 |         folder = self.data_dir + "/" + name
129 | 
130 |         if self.mode == "train":
131 |             pass
132 |         elif self.mode == "val":
133 |             folder += "/val"
134 |         elif self.mode == "test":
135 |             folder += "/test"
136 |         else:
137 |             raise Exception("Invalid dataset mode")
138 | 
139 |         files = list(filter(lambda f: f not in ["val", "test"], lsdir(folder)))
140 |         return list(map(lambda fp: folder + "/" + fp, files))
141 | 
142 |     def _get_img_from_path(self, path):
143 |         '''
144 |         Returns an image and applies the transformations defined in self.transform.
145 |         '''
146 |         img = Image.open(path)
147 |         if self.transform is not None:
148 |             img = self.transform(img)
149 |         return img
150 | 
151 | def invalid_collate(batch):
152 |     batch = list(filter(lambda X: X is not None, batch))
153 |     return default_collate(batch)
154 | 
155 | def create_set(mode, num_imgs=5):
156 |     '''
157 |     This method randomly picks num_imgs images from the DATA_DIR folder and places them in a folder.
158 |     '''
159 |     options = ["val", "test"]
160 |     if mode not in options: return
161 |     # path of all of the people names, "./name"
162 |     name_paths = list(map(lambda name: DATA_DIR + "/" + name, lsdir(DATA_DIR)))
163 |     for path in name_paths:
164 |         # "./name/val"
165 |         test_path = path + "/" + mode
166 |         mkdir(test_path)
167 |         file_names = list(filter(lambda i: i not in options, lsdir(path)))
168 |         num_names = len(file_names)
169 |         idx = list(set(np.random.randint(0, num_names, num_names)))[:num_imgs]
170 |         for i in idx:
171 |             os.rename(path+"/"+file_names[i], test_path+"/"+file_names[i])
172 | 
173 | def undo_create_set(mode):
174 |     '''
175 |     This method will undo create_test_set().
176 |     '''
177 |     options = ["val", "test"]
178 |     if mode not in options: return
179 |     # path of all of the people names
180 |     name_paths = list(map(lambda name: DATA_DIR + "/" + name,lsdir(DATA_DIR)))
181 |     for path in name_paths:
182 |         test_path = path + "/" + mode
183 |         if not os.path.exists(test_path):
184 |             continue
185 |         test_imgs = lsdir(test_path)
186 |         for i in range(len(test_imgs)):
187 |             os.rename(test_path+"/"+test_imgs[i], path+"/"+test_imgs[i])
188 | 
189 | def assert_data_split_correct():
190 |     undo_create_set("val")
191 |     undo_create_set("test")
192 |     total_num = len(FaceScrubDataset(mode="train"))
193 |     num_people = len(FaceScrubDataset(mode="train").names)
194 |     assert total_num == 4083593409, "INCORRECT NUMBER OF IMAGES"
195 |     create_set("val")
196 |     create_set("test")
197 |     train = len(FaceScrubDataset(mode="train"))
198 |     val = len(FaceScrubDataset(mode="val"))
199 |     test = len(FaceScrubDataset(mode="test"))
200 |     assert val == (num_people * 5) ** 2
201 |     assert test == (num_people * 5) ** 2
202 | 
203 | def calc_mean(X):
204 |     array = np.asarray(X[0])
205 |     R = array[:,:,0].mean()
206 |     G = array[:,:,1].mean()
207 |     B = array[:,:,2].mean()
208 |     return R, G, B
209 | 
210 | def calc_std(X):
211 |     array = np.asarray(X[0])
212 |     R = array[:,:,0].std()
213 |     G = array[:,:,1].std()
214 |     B = array[:,:,2].std()
215 |     return R, G, B
216 | 
217 | def get_mean_std():
218 |     dataset = FaceScrubDataset(type="label")
219 |     pool = multiprocessing.Pool(max(1, multiprocessing.cpu_count()-2))
220 |     print("Started calculating mean and stds")
221 |     means = pool.map(calc_mean, dataset)
222 |     stds = pool.map(calc_std, dataset)
223 |     pool.close()
224 |     pool.join()
225 |     return means, stds
226 | 
227 | if __name__ == "__main__":
228 |     TRANSFORMS = [
229 |         T.Resize((64, 64)),
230 |         T.ToTensor()
231 |     ]
232 |     dataset = FaceScrubDataset(transform=TRANSFORMS)
233 |     img = dataset[4000]
234 |     # assert_data_split_correct()
235 | 
236 |     # means, stds = get_mean_std()
237 |     # red_mean = 0.6118626050840847
238 |     # green_mean = 0.4627732225147951
239 |     # blue_mean = 0.39181750819165523
240 |     # red_std = 0.24004882860157573
241 |     # green_std = 0.20515205679125115
242 |     # blue_std = 0.19287499225344598
243 |     pass
244 | 


--------------------------------------------------------------------------------
/ddh.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import torch.optim as optim
  4 | import torch.nn.functional as F
  5 | import torchvision.transforms as T
  6 | import multiprocessing
  7 | from time import time
  8 | from torch.utils.data import DataLoader
  9 | from matplotlib import pyplot as plt
 10 | 
 11 | from dataset import *
 12 | 
 13 | class DDH(nn.Module):
 14 |     '''
 15 |     # ==========================================================================
 16 |     # Discriminative Deep Hashing for Scalable Face Image Retrieval
 17 |     # https://www.ijcai.org/proceedings/2017/0315.pdf
 18 |     # ==========================================================================
 19 | 
 20 |     Image resized to 32x32, batch size of 256
 21 | 
 22 |     Conv1 = 3x3 kernel, 1 stride, 20 dim (output 31x31)
 23 |     Batch
 24 |     Pool1 = 2x2 kernel (output 15x15)
 25 | 
 26 |     Conv2 = 2x2 kernel, 1 stride, 40 dim (output 14x14)
 27 |     Batch
 28 |     Pool2 = 2x2 kernel (output 7x7)
 29 | 
 30 |     Conv3 = 2x2 kernel, 1 stride, 60 dim (output 6x6)
 31 |     Batch
 32 |     Pool3 = 2x2 kernel (output 3x3)
 33 | 
 34 |     Conv4 = 2x2 kernel, 1 stride, 80 dim (output 2x2)
 35 |     Batch
 36 | 
 37 |     Merge = 60*3*3 + 80*2*2 = 860
 38 | 
 39 |     Split into K groups, let K = 96
 40 | 
 41 |     480 face features
 42 |     48 groups of 10 features
 43 |     48-bits
 44 | 
 45 |     # ==========================================================================
 46 |     # Simultaneous Feature Learning and Hash Coding with Deep Neural Networks
 47 |     # https://arxiv.org/pdf/1504.03410.pdf
 48 |     # ==========================================================================
 49 |     '''
 50 |     def __init__(self, hash_dim=48, split_num=10, num_classes=530):
 51 |         super().__init__()
 52 |         self.cn1 = nn.Conv2d(3, 20, kernel_size=3)
 53 |         nn.init.kaiming_normal_(self.cn1.weight)
 54 |         self.bn1 = nn.BatchNorm2d(20)
 55 |         self.mp1 = nn.MaxPool2d(2)
 56 | 
 57 |         self.cn2 = nn.Conv2d(20, 40, kernel_size=2)
 58 |         nn.init.kaiming_normal_(self.cn2.weight)
 59 |         self.bn2 = nn.BatchNorm2d(40)
 60 |         self.mp2 = nn.MaxPool2d(2)
 61 | 
 62 |         self.cn3 = nn.Conv2d(40, 60, kernel_size=2)
 63 |         nn.init.kaiming_normal_(self.cn3.weight)
 64 |         self.bn3 = nn.BatchNorm2d(60)
 65 |         self.mp3 = nn.MaxPool2d(2)
 66 | 
 67 |         self.cn4 = nn.Conv2d(60, 80, kernel_size=2)
 68 |         nn.init.kaiming_normal_(self.cn4.weight)
 69 |         self.bn4 = nn.BatchNorm2d(80)
 70 | 
 71 |         # merge layer
 72 |         self.mg1 = Merge()
 73 |         self.fc1 = nn.Linear(29180, hash_dim*split_num)
 74 | 
 75 |         # hash layer
 76 |         self.de1 = DivideEncode(hash_dim*split_num, split_num)
 77 | 
 78 |         self.fc2 = nn.Linear(hash_dim, num_classes)
 79 | 
 80 |     def forward(self, X):
 81 |         l1 = self.mp1(F.relu(self.bn1(self.cn1(X))))
 82 |         l2 = self.mp2(F.relu(self.bn2(self.cn2(l1))))
 83 |         l3 = self.mp3(F.relu(self.bn3(self.cn3(l2))))
 84 |         l4 = F.relu(self.bn4(self.cn4(l3)))
 85 |         # merge of output from layer 3 and 4
 86 |         l5 = self.mg1(l3, l4)
 87 |         # face feature layer
 88 |         l6 = F.relu(self.fc1(l5))
 89 |         # divide and encode
 90 |         codes = self.de1(l6)
 91 |         scores = self.fc2(codes)
 92 |         return codes, scores
 93 | 
 94 | class Merge(nn.Module):
 95 |     '''
 96 |     Implementation of the Merged Layer in,
 97 | 
 98 |     Discriminative Deep Hashing for Scalable Face Image Retrieval
 99 |     https://www.ijcai.org/proceedings/2017/0315.pdf
100 |     '''
101 |     def __init__(self):
102 |         super().__init__()
103 | 
104 |     def forward(self, X1, X2):
105 |         X1, X2 = self._flatten(X1), self._flatten(X2)
106 |         return self._merge(X1, X2)
107 | 
108 |     def _flatten(self, X):
109 |         N = X.shape[0]
110 |         return X.view(N, -1)
111 | 
112 |     def _merge(self, X1, X2):
113 |         return torch.cat((X1, X2), 1)
114 | 
115 | class DivideEncode(nn.Module):
116 |     '''
117 |     Implementation of the divide-and-encode module in,
118 | 
119 |     Simultaneous Feature Learning and Hash Coding with Deep Neural Networks
120 |     https://arxiv.org/pdf/1504.03410.pdf
121 |     '''
122 |     def __init__(self, num_inputs, num_per_group):
123 |         super().__init__()
124 |         assert num_inputs % num_per_group == 0, \
125 |             "num_per_group should be divisible by num_inputs."
126 |         self.num_groups = num_inputs // num_per_group
127 |         self.num_per_group = num_per_group
128 |         weights_dim = (self.num_groups, self.num_per_group)
129 |         self.weights = nn.Parameter(torch.empty(weights_dim))
130 |         nn.init.xavier_normal_(self.weights)
131 | 
132 |     def forward(self, X):
133 |         X = X.view((-1, self.num_groups, self.num_per_group))
134 |         return X.mul(self.weights).sum(2)
135 | 
136 | # ==========================
137 | # Hyperparameters
138 | # ==========================
139 | 
140 | # number of epochs to train
141 | NUM_EPOCHS = 60
142 | # the number of hash bits in the output
143 | HASH_DIM = 48
144 | # the distance to use for calculating precision/recall
145 | HAMM_RADIUS = 2
146 | # top_k closet images to score for mean average precision
147 | TOP_K = 50
148 | # optimizer parameters
149 | OPTIM_PARAMS = {
150 |     "lr": 1e-2,
151 |     "weight_decay":2e-4
152 | }
153 | CUSTOM_PARAMS = {
154 |     "beta": 1.0, # quantization loss regularizer
155 |     "img_size": 128
156 | }
157 | BATCH_SIZE = {
158 |     "train": 256,
159 |     "gallery": 128,
160 |     "val": 256,
161 |     "test": 256
162 | }
163 | LOADER_PARAMS = {
164 |     "num_workers": multiprocessing.cpu_count() - 2,
165 |     # "num_workers": 1
166 | }
167 | 
168 | # ==========================
169 | # Setup
170 | # ==========================
171 | 
172 | # uncomment to reset the data
173 | # undo_create_set("val")
174 | # undo_create_set("test")
175 | # create_set("val")
176 | # create_set("test")
177 | 
178 | TRANSFORMS = [
179 |     T.Resize((CUSTOM_PARAMS['img_size'], CUSTOM_PARAMS['img_size'])),
180 |     T.ToTensor()
181 | ]
182 | 
183 | data_train = FaceScrubDataset(type="label",
184 |                               mode="train",
185 |                               transform=TRANSFORMS,
186 |                               hash_dim=HASH_DIM)
187 | 
188 | data_val = FaceScrubDataset(type="label",
189 |                             mode="val",
190 |                             transform=TRANSFORMS,
191 |                             hash_dim=HASH_DIM)
192 | 
193 | data_test = FaceScrubDataset(type="label",
194 |                              mode="test",
195 |                              transform=TRANSFORMS,
196 |                              hash_dim=HASH_DIM)
197 | 
198 | # for training use, shuffling
199 | loader_train = DataLoader(data_train,
200 |                           batch_size=BATCH_SIZE["train"],
201 |                           shuffle=True,
202 |                           **LOADER_PARAMS)
203 | 
204 | # for use as gallery, no shuffling
205 | loader_gallery = DataLoader(data_train,
206 |                           batch_size=BATCH_SIZE["gallery"],
207 |                           shuffle=False,
208 |                           **LOADER_PARAMS)
209 | 
210 | loader_val = DataLoader(data_val,
211 |                           batch_size=BATCH_SIZE["val"],
212 |                           shuffle=False,
213 |                           **LOADER_PARAMS)
214 | loader_test = DataLoader(data_test,
215 |                           batch_size=BATCH_SIZE["test"],
216 |                           shuffle=False,
217 |                           **LOADER_PARAMS)
218 | 
219 | model_class = DDH
220 | model = model_class(hash_dim=HASH_DIM)
221 | optimizer = optim.Adam(model.parameters(), **OPTIM_PARAMS)
222 | 
223 | def train(model, loader, optim, logger, **kwargs):
224 |     '''
225 |     Train for one epoch.
226 |     '''
227 |     device = kwargs.get("device", torch.device("cpu"))
228 |     print_iter = kwargs.get("print_iter", 40)
229 | 
230 |     model.to(device=device)
231 |     # set model to train mode
232 |     model.train()
233 |     quant_losses = []
234 |     score_losses = []
235 | 
236 |     for num_iter, (X, y) in enumerate(loader):
237 |         optim.zero_grad()
238 | 
239 |         X = X.to(device).float()
240 |         y = y.to(device).long()
241 |         codes, scores = model(X)
242 |         # quantization loss
243 |         quant_loss = CUSTOM_PARAMS['beta'] * (codes.abs() - 1).abs().mean()
244 |         # score error
245 |         score_loss = F.cross_entropy(scores, y)
246 |         # total loss
247 |         loss = quant_loss + score_loss
248 |         loss.backward()
249 |         # apply gradient
250 |         optim.step()
251 |         # save the lossses
252 |         quant_losses.append(quant_loss.item())
253 |         score_losses.append(score_loss.item())
254 | 
255 |         if (num_iter+1) % print_iter == 0:
256 |             logger.write(
257 |                 "iter {} ".format(num_iter+1) +
258 |                 "- quant loss: {:.8f}, score loss: {:.8f}"
259 |                     .format(quant_loss, score_loss))
260 | 
261 |     return sum(quant_losses)/len(quant_losses), \
262 |            sum(score_losses)/len(score_losses)
263 | 
264 | 
265 | if __name__ == "__main__":
266 |     # visualize the images
267 |     # img = data_train[100][0].transpose(0, 1).transpose(1, 2)
268 |     # plt.imshow(img)
269 |     # plt.show()
270 |     pass
271 | 


--------------------------------------------------------------------------------
/ddh2.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import torch.optim as optim
  4 | import torch.nn.functional as F
  5 | import torchvision.transforms as T
  6 | import multiprocessing
  7 | from time import time
  8 | from torch.utils.data import DataLoader
  9 | from matplotlib import pyplot as plt
 10 | 
 11 | from dataset import *
 12 | 
 13 | class DDH2(nn.Module):
 14 |     def __init__(self, hash_dim=48, split_num=40, num_classes=530):
 15 |         super().__init__()
 16 |         self.cn1 = nn.Conv2d(3, 32, kernel_size=3)
 17 |         nn.init.kaiming_normal_(self.cn1.weight)
 18 |         self.bn1 = nn.BatchNorm2d(32)
 19 |         self.mp1 = nn.MaxPool2d(2)
 20 | 
 21 |         self.cn2 = nn.Conv2d(32, 64, kernel_size=2)
 22 |         nn.init.kaiming_normal_(self.cn2.weight)
 23 |         self.bn2 = nn.BatchNorm2d(64)
 24 |         self.mp2 = nn.MaxPool2d(2)
 25 | 
 26 |         self.cn3 = nn.Conv2d(64, 128, kernel_size=2)
 27 |         nn.init.kaiming_normal_(self.cn3.weight)
 28 |         self.bn3 = nn.BatchNorm2d(128)
 29 |         self.mp3 = nn.MaxPool2d(2)
 30 | 
 31 |         self.cn4 = nn.Conv2d(128, 256, kernel_size=2)
 32 |         nn.init.kaiming_normal_(self.cn4.weight)
 33 |         self.bn4 = nn.BatchNorm2d(256)
 34 | 
 35 |         # merge layer
 36 |         self.mg1 = Merge()
 37 |         self.fc1 = nn.Linear(78976, hash_dim*split_num)
 38 | 
 39 |         # hash layer
 40 |         self.de1 = DivideEncode(hash_dim*split_num, split_num)
 41 | 
 42 |         self.fc2 = nn.Linear(hash_dim, num_classes)
 43 | 
 44 |     def forward(self, X):
 45 |         l1 = self.mp1(F.relu(self.bn1(self.cn1(X))))
 46 |         l2 = self.mp2(F.relu(self.bn2(self.cn2(l1))))
 47 |         l3 = self.mp3(F.relu(self.bn3(self.cn3(l2))))
 48 |         l4 = F.relu(self.bn4(self.cn4(l3)))
 49 |         # merge of output from layer 3 and 4
 50 |         l5 = self.mg1(l3, l4)
 51 |         # face feature layer
 52 |         l6 = F.relu(self.fc1(l5))
 53 |         # divide and encode
 54 |         codes = self.de1(l6)
 55 |         scores = self.fc2(codes)
 56 |         return codes, scores
 57 | 
 58 | class Merge(nn.Module):
 59 |     '''
 60 |     Implementation of the Merged Layer in,
 61 | 
 62 |     Discriminative Deep Hashing for Scalable Face Image Retrieval
 63 |     https://www.ijcai.org/proceedings/2017/0315.pdf
 64 |     '''
 65 |     def __init__(self):
 66 |         super().__init__()
 67 | 
 68 |     def forward(self, X1, X2):
 69 |         X1, X2 = self._flatten(X1), self._flatten(X2)
 70 |         return self._merge(X1, X2)
 71 | 
 72 |     def _flatten(self, X):
 73 |         N = X.shape[0]
 74 |         return X.view(N, -1)
 75 | 
 76 |     def _merge(self, X1, X2):
 77 |         return torch.cat((X1, X2), 1)
 78 | 
 79 | class DivideEncode(nn.Module):
 80 |     '''
 81 |     Implementation of the divide-and-encode module in,
 82 | 
 83 |     Simultaneous Feature Learning and Hash Coding with Deep Neural Networks
 84 |     https://arxiv.org/pdf/1504.03410.pdf
 85 |     '''
 86 |     def __init__(self, num_inputs, num_per_group):
 87 |         super().__init__()
 88 |         assert num_inputs % num_per_group == 0, \
 89 |             "num_per_group should be divisible by num_inputs."
 90 |         self.num_groups = num_inputs // num_per_group
 91 |         self.num_per_group = num_per_group
 92 |         weights_dim = (self.num_groups, self.num_per_group)
 93 |         self.weights = nn.Parameter(torch.empty(weights_dim))
 94 |         nn.init.xavier_normal_(self.weights)
 95 | 
 96 |     def forward(self, X):
 97 |         X = X.view((-1, self.num_groups, self.num_per_group))
 98 |         return X.mul(self.weights).sum(2)
 99 | 
100 | # ==========================
101 | # Hyperparameters
102 | # ==========================
103 | 
104 | # number of epochs to train
105 | NUM_EPOCHS = 40
106 | # the number of hash bits in the output
107 | HASH_DIM = 48
108 | # the distance to use for calculating precision/recall
109 | HAMM_RADIUS = 2
110 | # top_k closet images to score for mean average precision
111 | TOP_K = 50
112 | # optimizer parameters
113 | OPTIM_PARAMS = {
114 |     "lr": 1e-2,
115 |     "weight_decay": 2e-4
116 | }
117 | CUSTOM_PARAMS = {
118 |     "beta": 1.0, # quantization loss regularizer
119 |     "img_size": 128
120 | }
121 | BATCH_SIZE = {
122 |     "train": 64,
123 |     "gallery": 64,
124 |     "val": 64,
125 |     "test": 64
126 | }
127 | LOADER_PARAMS = {
128 |     "num_workers": multiprocessing.cpu_count() - 2
129 | }
130 | 
131 | # ==========================
132 | # Setup
133 | # ==========================
134 | 
135 | # uncomment to reset the data
136 | # undo_create_set("val")
137 | # undo_create_set("test")
138 | # create_set("val")
139 | # create_set("test")
140 | 
141 | TRANSFORMS = [
142 |     T.Resize((CUSTOM_PARAMS['img_size'], CUSTOM_PARAMS['img_size'])),
143 |     T.ToTensor()
144 | ]
145 | 
146 | data_train = FaceScrubDataset(type="label",
147 |                               mode="train",
148 |                               transform=TRANSFORMS,
149 |                               hash_dim=HASH_DIM)
150 | 
151 | data_val = FaceScrubDataset(type="label",
152 |                             mode="val",
153 |                             transform=TRANSFORMS,
154 |                             hash_dim=HASH_DIM)
155 | 
156 | data_test = FaceScrubDataset(type="label",
157 |                              mode="test",
158 |                              transform=TRANSFORMS,
159 |                              hash_dim=HASH_DIM)
160 | 
161 | # for training use, shuffling
162 | loader_train = DataLoader(data_train,
163 |                           batch_size=BATCH_SIZE["train"],
164 |                           shuffle=True,
165 |                           **LOADER_PARAMS)
166 | 
167 | # for use as gallery, no shuffling
168 | loader_gallery = DataLoader(data_train,
169 |                           batch_size=BATCH_SIZE["gallery"],
170 |                           shuffle=False,
171 |                           **LOADER_PARAMS)
172 | 
173 | loader_val = DataLoader(data_val,
174 |                           batch_size=BATCH_SIZE["val"],
175 |                           shuffle=False,
176 |                           **LOADER_PARAMS)
177 | loader_test = DataLoader(data_test,
178 |                           batch_size=BATCH_SIZE["test"],
179 |                           shuffle=False,
180 |                           **LOADER_PARAMS)
181 | 
182 | model_class = DDH2
183 | model = model_class(hash_dim=HASH_DIM)
184 | optimizer = optim.Adam(model.parameters(), **OPTIM_PARAMS)
185 | 
186 | 
187 | def train(model, loader, optim, logger, **kwargs):
188 |     '''
189 |     Train for one epoch.
190 |     '''
191 |     device = kwargs.get("device", torch.device("cpu"))
192 |     print_iter = kwargs.get("print_iter", 40)
193 | 
194 |     model.to(device=device)
195 |     # set model to train mode
196 |     model.train()
197 |     quant_losses = []
198 |     score_losses = []
199 | 
200 |     for num_iter, (X, y) in enumerate(loader):
201 |         optim.zero_grad()
202 | 
203 |         X = X.to(device).float()
204 |         y = y.to(device).long()
205 |         codes, scores = model(X)
206 |         # quantization loss
207 |         quant_loss = CUSTOM_PARAMS['beta'] * (codes.abs() - 1).abs().mean()
208 |         # score error
209 |         score_loss = F.cross_entropy(scores, y)
210 |         # total loss
211 |         loss = quant_loss + score_loss
212 |         loss.backward()
213 |         # apply gradient
214 |         optim.step()
215 |         # save the lossses
216 |         quant_losses.append(quant_loss.item())
217 |         score_losses.append(score_loss.item())
218 | 
219 |         if (num_iter+1) % print_iter == 0:
220 |             logger.write(
221 |                 "iter {} ".format(num_iter+1) +
222 |                 "- quant loss: {:.8f}, score loss: {:.8f}"
223 |                     .format(quant_loss, score_loss))
224 | 
225 |     return sum(quant_losses)/len(quant_losses), \
226 |            sum(score_losses)/len(score_losses)
227 | 


--------------------------------------------------------------------------------
/ddh3.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import torch.optim as optim
  4 | import torch.nn.functional as F
  5 | import torchvision.transforms as T
  6 | import multiprocessing
  7 | from time import time
  8 | from torch.utils.data import DataLoader
  9 | from matplotlib import pyplot as plt
 10 | 
 11 | from dataset import *
 12 | 
 13 | class DDH3(nn.Module):
 14 |     '''
 15 |     # ==========================================================================
 16 |     # Discriminative Deep Hashing for Scalable Face Image Retrieval
 17 |     # https://www.ijcai.org/proceedings/2017/0315.pdf
 18 |     # ==========================================================================
 19 | 
 20 |     Image resized to 32x32, batch size of 256
 21 | 
 22 |     Conv1 = 3x3 kernel, 1 stride, 20 dim (output 31x31)
 23 |     Batch
 24 |     Pool1 = 2x2 kernel (output 15x15)
 25 | 
 26 |     Conv2 = 2x2 kernel, 1 stride, 40 dim (output 14x14)
 27 |     Batch
 28 |     Pool2 = 2x2 kernel (output 7x7)
 29 | 
 30 |     Conv3 = 2x2 kernel, 1 stride, 60 dim (output 6x6)
 31 |     Batch
 32 |     Pool3 = 2x2 kernel (output 3x3)
 33 | 
 34 |     Conv4 = 2x2 kernel, 1 stride, 80 dim (output 2x2)
 35 |     Batch
 36 | 
 37 |     Merge = 60*3*3 + 80*2*2 = 860
 38 | 
 39 |     Split into K groups, let K = 96
 40 | 
 41 |     480 face features
 42 |     48 groups of 10 features
 43 |     48-bits
 44 | 
 45 |     # ==========================================================================
 46 |     # Simultaneous Feature Learning and Hash Coding with Deep Neural Networks
 47 |     # https://arxiv.org/pdf/1504.03410.pdf
 48 |     # ==========================================================================
 49 |     '''
 50 |     def __init__(self, hash_dim=48, split_num=10):
 51 |         super().__init__()
 52 |         self.cn1 = nn.Conv2d(3, 20, kernel_size=3)
 53 |         nn.init.kaiming_normal_(self.cn1.weight)
 54 |         self.bn1 = nn.BatchNorm2d(20)
 55 |         self.mp1 = nn.MaxPool2d(2)
 56 | 
 57 |         self.cn2 = nn.Conv2d(20, 40, kernel_size=2)
 58 |         nn.init.kaiming_normal_(self.cn2.weight)
 59 |         self.bn2 = nn.BatchNorm2d(40)
 60 |         self.mp2 = nn.MaxPool2d(2)
 61 | 
 62 |         self.cn3 = nn.Conv2d(40, 60, kernel_size=2)
 63 |         nn.init.kaiming_normal_(self.cn3.weight)
 64 |         self.bn3 = nn.BatchNorm2d(60)
 65 |         self.mp3 = nn.MaxPool2d(2)
 66 | 
 67 |         self.cn4 = nn.Conv2d(60, 80, kernel_size=2)
 68 |         nn.init.kaiming_normal_(self.cn4.weight)
 69 |         self.bn4 = nn.BatchNorm2d(80)
 70 | 
 71 |         # merge layer
 72 |         self.mg1 = Merge()
 73 |         self.fc1 = nn.Linear(29180, hash_dim*split_num)
 74 |         nn.init.kaiming_normal_(self.fc1.weight)
 75 |         self.bn5 = nn.BatchNorm2d(hash_dim*split_num)
 76 | 
 77 |         # hash layer
 78 |         self.fc2 = nn.Linear(hash_dim*split_num, hash_dim)
 79 | 
 80 |     def forward(self, X):
 81 |         l1 = self.mp1(F.relu(self.bn1(self.cn1(X))))
 82 |         l2 = self.mp2(F.relu(self.bn2(self.cn2(l1))))
 83 |         l3 = self.mp3(F.relu(self.bn3(self.cn3(l2))))
 84 |         l4 = F.relu(self.bn4(self.cn4(l3)))
 85 |         # merge of output from layer 3 and 4
 86 |         l5 = self.mg1(l3, l4)
 87 |         # face feature layer
 88 |         l6 = F.relu(self.fc1(l5))
 89 |         # divide and encode
 90 |         codes = self.fc2(l6)
 91 |         return torch.tanh(codes), None
 92 | 
 93 | class Merge(nn.Module):
 94 |     '''
 95 |     Implementation of the Merged Layer in,
 96 | 
 97 |     Discriminative Deep Hashing for Scalable Face Image Retrieval
 98 |     https://www.ijcai.org/proceedings/2017/0315.pdf
 99 |     '''
100 |     def __init__(self):
101 |         super(Merge, self).__init__()
102 | 
103 |     def forward(self, X1, X2):
104 |         X1, X2 = self._flatten(X1), self._flatten(X2)
105 |         return self._merge(X1, X2)
106 | 
107 |     def _flatten(self, X):
108 |         N = X.shape[0]
109 |         return X.view(N, -1)
110 | 
111 |     def _merge(self, X1, X2):
112 |         return torch.cat((X1, X2), 1)
113 | 
114 | # ==========================
115 | # Hyperparameters
116 | # ==========================
117 | 
118 | # number of epochs to train
119 | NUM_EPOCHS = 40
120 | # the number of hash bits in the output
121 | HASH_DIM = 48
122 | # the distance to use for calculating precision/recall
123 | HAMM_RADIUS = 2
124 | # top_k closet images to score for mean average precision
125 | TOP_K = 50
126 | # optimizer parameters
127 | OPTIM_PARAMS = {
128 |     "lr": 1e-2,
129 |     "weight_decay": 2e-4
130 | }
131 | CUSTOM_PARAMS = {
132 |     "dist_threshold": 6, # distance threshold
133 |     "alpha": 1e-10, # quantization error
134 |     "print_iter": 1, # print every n iterations
135 |     "eps": 1e-8, # term added to l2_distance
136 |     "gamma": 1e-3, # negative slope when calculating threshold
137 |     "img_size": 128
138 | }
139 | BATCH_SIZE = {
140 |     # "train": 512,
141 |     "train": 32,
142 |     "gallery": 128,
143 |     "val": 512,
144 |     "test": 512
145 | }
146 | LOADER_PARAMS = {
147 |     "num_workers": multiprocessing.cpu_count() - 2,
148 |     # "num_workers": 1
149 | }
150 | 
151 | # ==========================
152 | # Setup
153 | # ==========================
154 | 
155 | # uncomment to reset the data
156 | # undo_create_set("val")
157 | # undo_create_set("test")
158 | # create_set("val")
159 | # create_set("test")
160 | 
161 | TRANSFORMS = [
162 |     T.Resize((CUSTOM_PARAMS['img_size'], CUSTOM_PARAMS['img_size'])),
163 |     T.ToTensor()
164 | ]
165 | 
166 | data_train = FaceScrubDataset(type="label",
167 |                               mode="train",
168 |                               transform=TRANSFORMS,
169 |                               hash_dim=HASH_DIM)
170 | 
171 | data_val = FaceScrubDataset(type="label",
172 |                             mode="val",
173 |                             transform=TRANSFORMS,
174 |                             hash_dim=HASH_DIM)
175 | 
176 | data_test = FaceScrubDataset(type="label",
177 |                              mode="test",
178 |                              transform=TRANSFORMS,
179 |                              hash_dim=HASH_DIM)
180 | 
181 | # for training use, shuffling
182 | loader_train = DataLoader(data_train,
183 |                           batch_size=BATCH_SIZE["train"],
184 |                           shuffle=True,
185 |                           **LOADER_PARAMS)
186 | 
187 | # for use as gallery, no shuffling
188 | loader_gallery = DataLoader(data_train,
189 |                           batch_size=BATCH_SIZE["gallery"],
190 |                           shuffle=False,
191 |                           **LOADER_PARAMS)
192 | 
193 | loader_val = DataLoader(data_val,
194 |                           batch_size=BATCH_SIZE["val"],
195 |                           shuffle=False,
196 |                           **LOADER_PARAMS)
197 | loader_test = DataLoader(data_test,
198 |                           batch_size=BATCH_SIZE["test"],
199 |                           shuffle=False,
200 |                           **LOADER_PARAMS)
201 | 
202 | model_class = DDH3
203 | model = model_class(hash_dim=HASH_DIM)
204 | optimizer = optim.Adam(model.parameters(), **OPTIM_PARAMS)
205 | 
206 | def train(model, loader, optim, logger, **kwargs):
207 |     '''
208 |     Train for one epoch.
209 |     '''
210 |     device = kwargs.get("device", torch.device("cpu"))
211 |     print_iter = kwargs.get("print_iter", 40)
212 |     # the distance threshold above which the dissimilar pairs will contribute 0
213 |     # to loss.
214 |     mu = kwargs.get("dist_threshold", 2)
215 |     # quantization loss regularizer
216 |     alpha = kwargs.get("alpha", 0.01)
217 | 
218 |     model.to(device=device)
219 |     # set model to train mode
220 |     model.train()
221 | 
222 |     for num_iter, (X, y) in enumerate(loader):
223 |         optim.zero_grad()
224 | 
225 |         half_size = BATCH_SIZE["train"] // 2
226 |         half_size = len(X) // 2 if len(X) < half_size else half_size
227 |         X1 = X[:half_size].float().to(device=device)
228 |         X2 = X[half_size:].float().to(device=device)
229 |         y1 = y[:half_size].long().to(device=device)
230 |         y2 = y[half_size:].long().to(device=device)
231 |         with torch.no_grad():
232 |             if len(X2) > len(X1):
233 |                 # get rid of the last row
234 |                 X2 = X2[:-1]
235 |                 y2 = y2[:-1]
236 | 
237 |         # figure out the ground truth table
238 |         y1_gt = y1[None, :].repeat(half_size, 1)
239 |         y2_gt = y2[:, None].repeat(1, half_size)
240 |         # 1 for similar pairs, 0 for dissimilar pairs
241 |         sim_gt = (y1_gt == y2_gt).float()
242 |         dissim_gt = (1 - sim_gt)
243 | 
244 |         C1, _ = model(X1)
245 |         C2, _ = model(X2)
246 | 
247 |         l2_dist = ((C1[:, None, :] - C2) ** 2 + 1e-8).sum(dim=2).sqrt()
248 |         # minimize l2_dist for similar pairs (gt at i, j == 1)
249 |         similar_loss = (sim_gt * l2_dist).sum()
250 |         similar_loss /= (sim_gt + 1).sum()
251 |         # maximize l2_dist for dissimilar pairs
252 |         threshold = F.leaky_relu(mu - l2_dist,
253 |                                  negative_slope=CUSTOM_PARAMS['gamma'])
254 |         dissimilar_loss = ((1 - sim_gt) * threshold).sum()
255 |         dissimilar_loss /= (dissim_gt.sum() + 1)
256 |         # similarity loss
257 |         sim_loss = similar_loss + dissimilar_loss
258 |         # quantization loss
259 |         quant_loss = alpha * \
260 |                         ((C1.abs() - 1).abs() + ((C2.abs() - 1)).abs()).sum()
261 |         set_trace()
262 |         # total_loss
263 |         loss = sim_loss + quant_loss
264 |         # back-propagate
265 |         loss.backward()
266 |         # apply gradient
267 |         optim.step()
268 | 
269 |         if (num_iter+1) % print_iter == 0:
270 |             logger.write(
271 |                 "iter {}/{} ".format(num_iter+1, len(loader)) +
272 |                 "- quant loss: {:.4f}, sim loss: {:.4f}, dissim loss: {:.4f}"
273 |                     .format(quant_loss.item(), similar_loss.item(),
274 |                             dissimilar_loss.item()))
275 | 


--------------------------------------------------------------------------------
/ddh4.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import torch.optim as optim
  4 | import torch.nn.functional as F
  5 | import torchvision.transforms as T
  6 | import multiprocessing
  7 | from time import time
  8 | from torch.utils.data import DataLoader
  9 | from matplotlib import pyplot as plt
 10 | 
 11 | from dataset import *
 12 | 
 13 | class DDH4(nn.Module):
 14 |     '''
 15 |     # ==========================================================================
 16 |     # Discriminative Deep Hashing for Scalable Face Image Retrieval
 17 |     # https://www.ijcai.org/proceedings/2017/0315.pdf
 18 |     # ==========================================================================
 19 | 
 20 |     Introduced distance loss metrics.
 21 | 
 22 |     Conv1 = 3x3 kernel, 1 stride, 20 dim (output 31x31)
 23 |     Batch
 24 |     Pool1 = 2x2 kernel (output 15x15)
 25 | 
 26 |     Conv2 = 2x2 kernel, 1 stride, 40 dim (output 14x14)
 27 |     Batch
 28 |     Pool2 = 2x2 kernel (output 7x7)
 29 | 
 30 |     Conv3 = 2x2 kernel, 1 stride, 60 dim (output 6x6)
 31 |     Batch
 32 |     Pool3 = 2x2 kernel (output 3x3)
 33 | 
 34 |     Conv4 = 2x2 kernel, 1 stride, 80 dim (output 2x2)
 35 |     Batch
 36 | 
 37 |     Merge = 60*3*3 + 80*2*2 = 860
 38 | 
 39 |     Split into K groups, let K = 96
 40 | 
 41 |     480 face features
 42 |     48 groups of 10 features
 43 |     48-bits
 44 | 
 45 |     # ==========================================================================
 46 |     # Simultaneous Feature Learning and Hash Coding with Deep Neural Networks
 47 |     # https://arxiv.org/pdf/1504.03410.pdf
 48 |     # ==========================================================================
 49 |     '''
 50 |     def __init__(self, hash_dim=48, split_num=10, num_classes=530):
 51 |         super().__init__()
 52 |         self.cn1 = nn.Conv2d(3, 20, kernel_size=3)
 53 |         nn.init.kaiming_normal_(self.cn1.weight)
 54 |         self.bn1 = nn.BatchNorm2d(20)
 55 |         self.mp1 = nn.MaxPool2d(2)
 56 | 
 57 |         self.cn2 = nn.Conv2d(20, 40, kernel_size=2)
 58 |         nn.init.kaiming_normal_(self.cn2.weight)
 59 |         self.bn2 = nn.BatchNorm2d(40)
 60 |         self.mp2 = nn.MaxPool2d(2)
 61 | 
 62 |         self.cn3 = nn.Conv2d(40, 60, kernel_size=2)
 63 |         nn.init.kaiming_normal_(self.cn3.weight)
 64 |         self.bn3 = nn.BatchNorm2d(60)
 65 |         self.mp3 = nn.MaxPool2d(2)
 66 | 
 67 |         self.cn4 = nn.Conv2d(60, 80, kernel_size=2)
 68 |         nn.init.kaiming_normal_(self.cn4.weight)
 69 |         self.bn4 = nn.BatchNorm2d(80)
 70 | 
 71 |         # merge layer
 72 |         self.mg1 = Merge()
 73 |         self.fc1 = nn.Linear(29180, hash_dim*split_num)
 74 | 
 75 |         # hash layer
 76 |         self.de1 = DivideEncode(hash_dim*split_num, split_num)
 77 | 
 78 |         self.fc2 = nn.Linear(hash_dim, num_classes)
 79 | 
 80 |     def forward(self, X):
 81 |         l1 = self.mp1(F.relu(self.bn1(self.cn1(X))))
 82 |         l2 = self.mp2(F.relu(self.bn2(self.cn2(l1))))
 83 |         l3 = self.mp3(F.relu(self.bn3(self.cn3(l2))))
 84 |         l4 = F.relu(self.bn4(self.cn4(l3)))
 85 |         # merge of output from layer 3 and 4
 86 |         l5 = self.mg1(l3, l4)
 87 |         # face feature layer
 88 |         l6 = F.relu(self.fc1(l5))
 89 |         # divide and encode
 90 |         codes = self.de1(l6)
 91 |         scores = self.fc2(codes)
 92 |         return codes, scores
 93 | 
 94 | class Merge(nn.Module):
 95 |     '''
 96 |     Implementation of the Merged Layer in,
 97 | 
 98 |     Discriminative Deep Hashing for Scalable Face Image Retrieval
 99 |     https://www.ijcai.org/proceedings/2017/0315.pdf
100 |     '''
101 |     def __init__(self):
102 |         super().__init__()
103 | 
104 |     def forward(self, X1, X2):
105 |         X1, X2 = self._flatten(X1), self._flatten(X2)
106 |         return self._merge(X1, X2)
107 | 
108 |     def _flatten(self, X):
109 |         N = X.shape[0]
110 |         return X.view(N, -1)
111 | 
112 |     def _merge(self, X1, X2):
113 |         return torch.cat((X1, X2), 1)
114 | 
115 | class DivideEncode(nn.Module):
116 |     '''
117 |     Implementation of the divide-and-encode module in,
118 | 
119 |     Simultaneous Feature Learning and Hash Coding with Deep Neural Networks
120 |     https://arxiv.org/pdf/1504.03410.pdf
121 |     '''
122 |     def __init__(self, num_inputs, num_per_group):
123 |         super().__init__()
124 |         assert num_inputs % num_per_group == 0, \
125 |             "num_per_group should be divisible by num_inputs."
126 |         self.num_groups = num_inputs // num_per_group
127 |         self.num_per_group = num_per_group
128 |         weights_dim = (self.num_groups, self.num_per_group)
129 |         self.weights = nn.Parameter(torch.empty(weights_dim))
130 |         nn.init.xavier_normal_(self.weights)
131 | 
132 |     def forward(self, X):
133 |         X = X.view((-1, self.num_groups, self.num_per_group))
134 |         return X.mul(self.weights).sum(2)
135 | 
136 | # ==========================
137 | # Hyperparameters
138 | # ==========================
139 | 
140 | # number of epochs to train
141 | NUM_EPOCHS = 60
142 | # the number of hash bits in the output
143 | HASH_DIM = 48
144 | # the distance to use for calculating precision/recall
145 | HAMM_RADIUS = 2
146 | # top_k closet images to score for mean average precision
147 | TOP_K = 50
148 | # optimizer parameters
149 | OPTIM_PARAMS = {
150 |     "lr": 1e-2,
151 |     "weight_decay":2e-4
152 | }
153 | CUSTOM_PARAMS = {
154 |     "alpha": 1.0, # quantization loss regularizer
155 |     "beta": 1.0, # score loss regularizer
156 |     "gamma": 1.0, # distance loss regularizer
157 |     "mu": 6, # threshold for distance contribution to loss
158 |     "print_iter": 40, # print every n iterations
159 |     "img_size": 128
160 | }
161 | BATCH_SIZE = {
162 |     "train": 256,
163 |     "gallery": 256,
164 |     "val": 256,
165 |     "test": 256
166 | }
167 | LOADER_PARAMS = {
168 |     # "num_workers": 4,
169 |     "num_workers": multiprocessing.cpu_count() - 1,
170 |     "collate_fn": invalid_collate
171 | }
172 | 
173 | # ==========================
174 | # Setup
175 | # ==========================
176 | 
177 | # uncomment to reset the data
178 | # undo_create_set("val")
179 | # undo_create_set("test")
180 | # create_set("val")
181 | # create_set("test")
182 | TRANSFORMS = [
183 |     T.Resize((CUSTOM_PARAMS['img_size'], CUSTOM_PARAMS['img_size'])),
184 |     T.ToTensor()
185 | ]
186 | 
187 | DATASET_PARAMS = {
188 |     "align": True,
189 |     "type": "label",
190 |     "transform": TRANSFORMS,
191 |     "hash_dim": HASH_DIM
192 | }
193 | 
194 | data_train = FaceScrubDataset(mode="train",
195 |                               **DATASET_PARAMS)
196 | 
197 | data_val = FaceScrubDataset(mode="val",
198 |                             **DATASET_PARAMS)
199 | 
200 | data_test = FaceScrubDataset(mode="test",
201 |                              **DATASET_PARAMS)
202 | 
203 | # for training use, shuffling
204 | loader_train = DataLoader(data_train,
205 |                           batch_size=BATCH_SIZE["train"],
206 |                           shuffle=True,
207 |                           **LOADER_PARAMS)
208 | 
209 | # for use as gallery, no shuffling
210 | loader_gallery = DataLoader(data_train,
211 |                           batch_size=BATCH_SIZE["gallery"],
212 |                           shuffle=False,
213 |                           **LOADER_PARAMS)
214 | 
215 | loader_val = DataLoader(data_val,
216 |                           batch_size=BATCH_SIZE["val"],
217 |                           shuffle=False,
218 |                           **LOADER_PARAMS)
219 | loader_test = DataLoader(data_test,
220 |                           batch_size=BATCH_SIZE["test"],
221 |                           shuffle=False,
222 |                           **LOADER_PARAMS)
223 | 
224 | model_class = DDH4
225 | model = model_class(hash_dim=HASH_DIM)
226 | optimizer = optim.Adam(model.parameters(), **OPTIM_PARAMS)
227 | 
228 | def train(model, loader, optim, logger, **kwargs):
229 |     '''
230 |     Train for one epoch.
231 |     '''
232 |     device = kwargs.get("device", torch.device("cpu"))
233 |     print_iter = kwargs.get("print_iter", CUSTOM_PARAMS['print_iter'])
234 | 
235 |     model.to(device=device)
236 |     # set model to train mode
237 |     model.train()
238 |     quant_losses = []
239 |     score_losses = []
240 | 
241 |     for num_iter, (X, y) in enumerate(loader):
242 |         optim.zero_grad()
243 | 
244 |         X = X.to(device).float()
245 |         y = y.to(device).long()
246 |         codes, scores = model(X)
247 | 
248 |         with torch.no_grad():
249 |             half_size = len(X) // 2
250 | 
251 |         C1, C2 = codes[:half_size], codes[half_size:]
252 |         y1, y2 = y[:half_size][None, :], y[half_size:][:, None]
253 |         y1, y2 = y1.repeat(half_size, 1), y2.repeat(1, half_size)
254 | 
255 |         with torch.no_grad():
256 |             if len(C2) > len(C1):
257 |                 C2, y2 = C2[:-1], y2[:-1]
258 | 
259 |         sim_gt = (y1 == y2).float()
260 |         diff_gt = 1 - sim_gt
261 | 
262 |         # distance loss
263 |         l2_dist = ((C1[:, None, :] - C2) ** 2 + 1e-8).sum(dim=2).sqrt()
264 |         sim_loss = (sim_gt * l2_dist).mean()
265 |         threshold = torch.max(CUSTOM_PARAMS['mu'] - l2_dist,
266 |                               torch.zeros_like(l2_dist))
267 |         diff_loss = ((1 - sim_gt) * threshold).mean()
268 |         dist_loss = 0.10 * sim_loss + 0.90 * diff_loss
269 |         # quantization loss
270 |         quant_loss = (codes.abs() - 1).abs().mean()
271 |         # score error
272 |         score_loss = F.cross_entropy(scores, y)
273 |         # # total loss
274 |         loss = CUSTOM_PARAMS['alpha'] * quant_loss + \
275 |                CUSTOM_PARAMS['beta'] * score_loss + \
276 |                CUSTOM_PARAMS['gamma'] * dist_loss
277 |         loss.backward()
278 |         # apply gradient
279 |         optim.step()
280 |         # save the lossses
281 |         quant_losses.append(quant_loss.item())
282 |         score_losses.append(score_loss.item())
283 | 
284 |         if (num_iter+1) % print_iter == 0:
285 |             logger.write(
286 |                 "iter {} ".format(num_iter+1) +
287 |                 "- quant loss: {:.4f}, score loss: {:.4f}, sim loss: {:.4f}, diff loss: {:.4f}"
288 |                     .format(quant_loss.item(), score_loss.item(), sim_loss.item(), diff_loss.item()))
289 | 


--------------------------------------------------------------------------------
/eval_perf.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from hamming_dist import *
 3 | from calc_map import *
 4 | from calc_pre_rec import *
 5 | 
 6 | def eval_perf(gallery_codes, gallery_label, test_codes, test_label, **kwargs):
 7 |     top_k = kwargs.get("top_k", 50)
 8 |     hamm_radius = kwargs.get("hamm_radius", 2)
 9 | 
10 |     gallery_codes = gallery_codes.cpu().numpy()
11 |     gallery_label = gallery_label.cpu().numpy()
12 |     test_codes = test_codes.cpu().numpy()
13 |     test_label = test_label.cpu().numpy()
14 | 
15 |      # how many matches between train and test
16 |     label_match = (gallery_label == test_label.T).astype("int8")
17 | 
18 |     dist = hamming_dist(gallery_codes, test_codes)
19 |     ranked = np.argsort(dist, axis=0)
20 | 
21 |     # mean average precision
22 |     mean_ap = calc_map(label_match, ranked, top_k=top_k)
23 | 
24 |     # calculate precision and recall curve
25 |     avg_pre, avg_rec, avg_hmean, pre_curve, rec_curve = \
26 |             calc_pre_rec(dist, label_match, hamm_radius)
27 | 
28 |     return avg_pre, avg_rec, avg_hmean, pre_curve, rec_curve, mean_ap
29 | 


--------------------------------------------------------------------------------
/gather.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import pickle
 3 | from ddh2 import *
 4 | from logger import *
 5 | from predict import *
 6 | from utils import *
 7 | 
 8 | MODELS_PATH = "./saved_models"
 9 | MODEL_NAME = "/12-08_16-14-22_BA977C.pt"
10 | CODES_PATH = "./codes"
11 | 
12 | if torch.cuda.is_available():
13 |     device = torch.device("cuda")
14 |     torch.set_default_tensor_type('torch.cuda.FloatTensor')
15 | else:
16 |     device = torch.device("cpu")
17 | 
18 | mkdir(CODES_PATH)
19 | 
20 | if __name__ == "__main__":
21 |     gallery = loader_gallery
22 |     test = loader_test
23 |     model = model_class(hash_dim=HASH_DIM)
24 |     model.load_state_dict(torch.load(MODELS_PATH + MODEL_NAME))
25 | 
26 |     with Logger(write_to_file=False) as logger:
27 |         gallery_codes, gallery_label, test_codes, test_label = \
28 |             predict(model, gallery, test, logger, device=device)
29 | 
30 |         logger.write("Finished generating codes, writing to output...")
31 |         output = (gallery_codes.cpu(), gallery_label.cpu(),
32 |                   test_codes.cpu(), test_label.cpu())
33 |         output_fn = MODEL_NAME.split(".")[0] + ".codes"
34 |         with open(CODES_PATH + output_fn, "wb") as file:
35 |             pickle.dump(output, file)
36 | 


--------------------------------------------------------------------------------
/hamming_dist.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from pdb import set_trace
 3 | 
 4 | def hamming_dist(A, B):
 5 |     '''
 6 |     Calculates the hamming distance between two arrays with binary values.
 7 | 
 8 |     Args:
 9 |         - A (numpy.ndarray): array of shape (N, C) where N is the number of
10 |             samples and C is the number of binary codes.
11 |         - B (numpy.ndarray): array of shape (M, C) where M is the number of
12 |             samples and C is the number of binary codes.
13 | 
14 |     Returns:
15 |         (numpy.ndarray): array of shape (N, M), where the ith N and jth M is
16 |             the hamming distance between the ith A and jth B.
17 |     '''
18 |     code_len = A.shape[1]
19 |     A = (2 * A) - 1
20 |     B = (2  * B) - 1
21 |     dists = 0.5 * (code_len - A.dot(B.T))
22 |     return dists.astype("int")
23 | 
24 | if __name__ == "__main__":
25 |     A = np.array([[0,1,0],[1,1,0]])
26 |     B = np.array([[1,0,1],[1,1,1]])
27 |     # [0,1,0] ^ [1,0,1] => 3
28 |     # [0,1,0] ^ [1,1,1] => 2
29 |     # ...
30 |     output = np.array([[3, 2],[2,1]])
31 |     num_correct = (hamming_dist(A, B) == output).sum()
32 |     assert num_correct == output.size, "Invalid output!"
33 | 


--------------------------------------------------------------------------------
/logger.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from time import time, strftime
 3 | from datetime import datetime
 4 | from utils import *
 5 | 
 6 | class Logger(object):
 7 |     '''
 8 |     Convenience class for logging metrics to a file.
 9 |     '''
10 |     def __init__(self, **kwargs):
11 |         self.print_to_stdout = kwargs.get("print_to_std", True)
12 |         self._write_to_file = kwargs.get("write_to_file", False)
13 |         current_dir = os.getcwd()
14 |         file_name = kwargs.get("file_name", self._get_time())
15 |         self.output_path = kwargs.get("output_path",
16 |                                       current_dir + "/logs/{}.txt"
17 |                                         .format(file_name))
18 |         if self._write_to_file:
19 |             mkdir(current_dir + "/logs")
20 | 
21 |     def __enter__(self):
22 |         if self._write_to_file:
23 |             self.file = open(self.output_path, "a+") # create and append
24 |             self.file.write("{}: Logger initialized\n".format(self._get_time()))
25 |         return self
26 | 
27 |     def __exit__(self, type, value, traceback):
28 |         if self._write_to_file:
29 |             self.file.write("{}: Logger closing\n\n\n".format(self._get_time()))
30 |             self.file.close()
31 | 
32 |     def write(self, message):
33 |         if self.print_to_stdout: print(message)
34 |         if self._write_to_file:
35 |             self.file.write("{}: {}\n".format(self._get_time(), message))
36 | 
37 |     def _get_time(self):
38 |         return datetime.now().strftime("%Y-%m-%d-%H:%M:%S")
39 | 


--------------------------------------------------------------------------------
/predict.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | 
 4 | def predict(model, loader_gallery, loader_test, logger, **kwargs):
 5 |     # moving model to CPU because GPU doesn't have enough memory
 6 |     device = kwargs.get("device", torch.device("cpu"))
 7 |     print_iter = kwargs.get("print_iter", 100)
 8 | 
 9 |     model.to(device=device)
10 |     # set model to evaluation mode
11 |     model.eval()
12 | 
13 |     # [gallery_codes, gallery_label, test_codes, test_label]
14 |     data = [None] * 4
15 | 
16 |     with torch.no_grad():
17 |         # process the gallery images
18 |         logger.write("Hashing {} gallery images..."
19 |                         .format(len(loader_gallery.dataset)))
20 |         for idx, (X, y) in enumerate(loader_gallery):
21 |             gcodes, _ = model(X.to(device=device))
22 | 
23 |             if data[0] is None:
24 |                 data[0] = gcodes
25 |             else:
26 |                 data[0] = torch.cat((data[0], gcodes))
27 | 
28 |             if data[1] is None:
29 |                 data[1] = y
30 |             else:
31 |                 data[1] = torch.cat((data[1], y))
32 | 
33 |             if idx % print_iter == 0:
34 |                 logger.write("{}/{} gallery batches completed..." \
35 |                                 .format(idx, len(loader_gallery)))
36 | 
37 |         assert len(loader_gallery.dataset) == len(data[0])
38 |         assert len(loader_gallery.dataset) == len(data[1])
39 | 
40 |         logger.write("Hashing test images and labels...")
41 |         # process the test images
42 |         for idx, (X, y) in enumerate(loader_test):
43 |             tcodes, _ = model(X.to(device=device))
44 | 
45 |             if data[2] is None:
46 |                 data[2] = tcodes
47 |             else:
48 |                 data[2] = torch.cat((data[2], tcodes))
49 | 
50 |             if data[3] is None:
51 |                 data[3] = y
52 |             else:
53 |                 data[3] = torch.cat((data[3], y))
54 | 
55 |             if idx % print_iter == 0:
56 |                 logger.write("{}/{} test batches completed..." \
57 |                                 .format(idx, len(loader_test)))
58 | 
59 |         gallery_codes, gallery_label, test_codes, test_label = data
60 |         # activating with sign function
61 |         bin_gallery_codes = gallery_codes > 0
62 |         bin_test_codes = test_codes > 0
63 | 
64 |         # reshape labels so gallery and test match shape
65 |         gallery_label = gallery_label.unsqueeze(1)
66 |         test_label = test_label.unsqueeze(1)
67 |         gallery_label = gallery_label.repeat(1, test_label.shape[0])
68 |         test_label = test_label.repeat(1, gallery_label.shape[0])
69 | 
70 |         return bin_gallery_codes, gallery_label, bin_test_codes, test_label
71 | 


--------------------------------------------------------------------------------
/run.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import cv2
  3 | import uuid
  4 | import pickle
  5 | from functools import reduce
  6 | from time import time, strftime
  7 | from datetime import datetime
  8 | 
  9 | import torch
 10 | import numpy as np
 11 | import torchvision.transforms as T
 12 | from torch.utils.data import DataLoader, Dataset, sampler
 13 | from matplotlib import pyplot as plt
 14 | 
 15 | from pdb import set_trace
 16 | from logger import *
 17 | from eval_perf import *
 18 | from predict import *
 19 | 
 20 | 
 21 | if torch.cuda.is_available():
 22 |     device = torch.device("cuda")
 23 |     torch.set_default_tensor_type('torch.cuda.FloatTensor')
 24 | else:
 25 |     device = torch.device("cpu")
 26 | 
 27 | # ignore all of the "invalid value encountered in true_divide" errors
 28 | np.seterr(divide='ignore', invalid='ignore')
 29 | 
 30 | # ==================================
 31 | # PARAMETERS
 32 | # ==================================
 33 | WRITE_TO_FILE = True
 34 | 
 35 | # whether to load from an intermediate point.
 36 | LOAD_SAVED = False
 37 | # required
 38 | SAVED_MODEL_PATH = "./saved_models/12-15_16-57-52_FADE57.pt"
 39 | RUN_ID = None
 40 | # optional
 41 | SAVED_STATS_PATH = None
 42 | 
 43 | # from ddh import *
 44 | # from ddh2 import *
 45 | # from ddh3 import *
 46 | from ddh4 import *
 47 | 
 48 | if LOAD_SAVED and SAVED_MODEL_PATH:
 49 |     print("Loading existing model...")
 50 |     model.load_state_dict(torch.load(SAVED_MODEL_PATH))
 51 | 
 52 |     if SAVED_STATS_PATH:
 53 |         print("Loading existing stats...")
 54 |         with open(SAVED_STATS_PATH, "rb") as file:
 55 |             stats = pickle.load(file)
 56 | 
 57 | if not LOAD_SAVED or not SAVED_STATS_PATH:
 58 |     print("Creating new stats...")
 59 |     stats = {
 60 |         "val_mean_aps": [],
 61 |         "val_avg_pre": [],
 62 |         "val_avg_rec": [],
 63 |         "val_avg_hmean": [],
 64 |         "highest_map": 0.0,
 65 | 
 66 |         "test_avg_pre": 0.0,
 67 |         "test_avg_rec": 0.0,
 68 |         "test_avg_hmean": 0.0,
 69 |         "test_mean_ap": 0.0,
 70 |         "test_pre_curve": None,
 71 |         "test_rec_curve": None,
 72 |     }
 73 | 
 74 | if LOAD_SAVED and RUN_ID:
 75 |     run_id = RUN_ID
 76 | else:
 77 |     run_id = uuid.uuid4().hex.upper()[0:6]
 78 | 
 79 | now = datetime.now().strftime("%m-%d_%H-%M-%S")
 80 | file_name = now + "_" + run_id
 81 | # model checkpoint
 82 | saved_models_path = os.getcwd() + "/saved_models"
 83 | mkdir(saved_models_path)
 84 | checkpoint_path = saved_models_path + "/{}.pt" \
 85 |                     .format(file_name)
 86 | 
 87 | # stats collection
 88 | stats_path = os.getcwd() + "/stats"
 89 | mkdir(stats_path)
 90 | stats_file_path = stats_path + "/{}.pickle".format(file_name)
 91 | 
 92 | with Logger(write_to_file=WRITE_TO_FILE, file_name=file_name) as logger:
 93 |     logger.write(
 94 |         "Starting run {} for {} epochs with model {}, and following params"
 95 |             .format(run_id, NUM_EPOCHS, type(model).__name__))
 96 |     logger.write("hash_dim: " + str(HASH_DIM))
 97 |     logger.write(OPTIM_PARAMS)
 98 |     logger.write(CUSTOM_PARAMS)
 99 |     logger.write(BATCH_SIZE)
100 |     logger.write(LOADER_PARAMS)
101 |     logger.write(DATASET_PARAMS)
102 |     logger.write("====== START ======")
103 |     logger.write("")
104 | 
105 |     for epoch in range(NUM_EPOCHS):
106 |         # ======================================================================
107 |         # TRAINING
108 |         # ======================================================================
109 |         logger.write("Epoch {}/{}".format(epoch+1, NUM_EPOCHS))
110 |         logger.write("--------------")
111 | 
112 |         start = time()
113 |         train(model, loader_train, optimizer, logger,
114 |                 device=device,
115 |                 **CUSTOM_PARAMS)
116 |         logger.write("Training completed in {:.0f} seconds."
117 |                         .format(time() - start))
118 |         logger.write("")
119 | 
120 |         # ======================================================================
121 |         # validation
122 |         # ======================================================================
123 |         start = time()
124 |         # get all of the codes for gallery and test images
125 |         gallery_codes, gallery_label, test_codes, test_label = \
126 |             predict(model, loader_gallery, loader_val, logger, device=device)
127 |         # evaluate the performance
128 |         avg_pre, avg_rec, avg_hmean, _, _, mean_ap = \
129 |             eval_perf(gallery_codes, gallery_label, test_codes, test_label,
130 |                       top_k=TOP_K, hamm_radius=HAMM_RADIUS)
131 |         stats['val_mean_aps'].append(mean_ap)
132 |         stats['val_avg_pre'].append(avg_pre)
133 |         stats['val_avg_rec'].append(avg_rec)
134 |         stats['val_avg_hmean'].append(avg_hmean)
135 | 
136 |         if mean_ap > stats["highest_map"]:
137 |             logger.write(
138 |                 "Higher mean avg precision {:.8f}/{:.8f}, saving!"
139 |                     .format(stats["highest_map"], mean_ap))
140 |             # saves the state of this model
141 |             torch.save(model.state_dict(), checkpoint_path)
142 |             stats["highest_map"] = mean_ap
143 | 
144 |         logger.write("Validation completed in {:.0f} seconds."
145 |                         .format(time() - start))
146 | 
147 |         logger.write("val MAP: {:.8f}, ".format(mean_ap) +
148 |                      "avg precision: {:.6f}, ".format(avg_pre) +
149 |                      "avg recall: {:.6f}, ".format(avg_rec) +
150 |                      "avg harmonic mean: {:0.6f}".format(avg_hmean))
151 |         logger.write("")
152 | 
153 |     # ==========================================================================
154 |     # test
155 |     # ==========================================================================
156 |     best_model = model_class(hash_dim=HASH_DIM)
157 |     best_model.load_state_dict(torch.load(checkpoint_path))
158 | 
159 |     start = time()
160 | 
161 |     # get all of the codes for gallery and test images
162 |     gallery_codes, gallery_label, test_codes, test_label= \
163 |         predict(best_model, loader_gallery, loader_test, logger, device=device)
164 |     # evaluate the performance
165 |     stats['test_avg_pre'], stats['test_avg_rec'], stats['test_avg_hmean'], \
166 |     stats['test_pre_curve'], stats['test_rec_curve'], stats['test_mean_ap'] = \
167 |             eval_perf(gallery_codes, gallery_label, test_codes, test_label,
168 |                       top_k=TOP_K, hamm_radius=HAMM_RADIUS)
169 | 
170 |     logger.write("Test completed in {:0.0f} seconds"
171 |                     .format(time() - start))
172 |     logger.write("test MAP: {:.8f}, ".format(stats['test_mean_ap']) +
173 |                  "avg precision: {:.6f}, ".format(stats['test_avg_pre']) +
174 |                  "avg recall: {:.6f}, ".format(stats['test_avg_rec']) +
175 |                  "avg harmonic mean: {:0.6f}"
176 |                     .format(stats['test_avg_hmean']))
177 | 
178 |     logger.write("====== END ======")
179 |     logger.write("Completed run for {}".format(run_id))
180 | 
181 | with open(stats_file_path, 'wb') as file:
182 |     pickle.dump(stats, file)
183 | 


--------------------------------------------------------------------------------
/stats.py:
--------------------------------------------------------------------------------
  1 | import pickle
  2 | import numpy as np
  3 | from matplotlib import pyplot as plt
  4 | from pdb import set_trace
  5 | from scipy.interpolate import make_interp_spline, BSpline
  6 | 
  7 | STATS_PATH = "./stats"
  8 | FILE_NAME = "/12-17_21-59-29_E7A1ED.pickle"
  9 | 
 10 | def visualize_val_stats(stats):
 11 |     val_mean_aps = np.array(stats['val_mean_aps'])
 12 |     val_avg_pre = np.array(stats['val_avg_pre'])
 13 |     val_avg_rec = np.array(stats['val_avg_rec'])
 14 |     val_avg_hmean = np.array(stats['val_avg_hmean'])
 15 |     ticks = len(val_avg_hmean)
 16 | 
 17 |     plt.subplot(2, 1, 1)
 18 | 
 19 |     plt.plot(val_mean_aps)
 20 |     # plt.xticks(np.linspace(1, ticks, ticks, dtype="int8"))
 21 |     plt.title("TOP 50 MEAN AVERAGE PRECISION")
 22 |     plt.xlabel("EPOCHS")
 23 |     plt.ylabel("MAP")
 24 | 
 25 |     plt.subplot(2, 1, 2)
 26 |     plt.plot(val_avg_pre)
 27 |     plt.plot(val_avg_rec)
 28 |     plt.plot(val_avg_hmean)
 29 |     plt.title("STATS OVER TIME")
 30 |     plt.xlabel("EPOCHS")
 31 |     plt.ylabel("AVERAGE VALUE")
 32 |     plt.ylim([0.0, 0.7])
 33 |     plt.legend(
 34 |         ["Average Precision", "Average Recall", "Average Harmonic Mean"],
 35 |         loc="upper right")
 36 | 
 37 |     plt.subplots_adjust(hspace=1.0)
 38 | 
 39 |     plt.show()
 40 | 
 41 | def visualize_test_stats(stats):
 42 |     test_recall = np.array(stats['test_rec_curve'])
 43 |     test_precision = np.array(stats['test_pre_curve'])
 44 |     test_ap = np.array(stats['test_avg_pre'])
 45 |     test_map = np.array(stats['test_mean_ap'])
 46 | 
 47 |     plt.step(test_recall, test_precision)
 48 |     plt.fill_between(test_recall, test_precision, alpha=0.2, color='b')
 49 |     plt.xlabel("Recall")
 50 |     plt.xlim([0.0, 1.0])
 51 |     plt.ylabel("Precision")
 52 |     plt.ylim([0.0, 1.05])
 53 |     plt.title("Retrieval Performance: AP={:.4f}; TOP50 MAP={:.4f}."
 54 |                 .format(test_ap, test_map))
 55 |     plt.show()
 56 | 
 57 | if __name__ == "__main__":
 58 |     # stats = None
 59 | 
 60 |     # with open(STATS_PATH + FILE_NAME, "rb") as file:
 61 |     #     stats = pickle.load(file)
 62 | 
 63 |     # visualize_val_stats(stats)
 64 |     # visualize_test_stats(stats)
 65 | 
 66 |     name = [
 67 |         "Low Res Baseline",
 68 |         "High Res Baseline",
 69 |         "L2 Constraint",
 70 |         "Image Alignment"
 71 |     ]
 72 | 
 73 |     stat_paths = [
 74 |         "/12-17_20-18-32_99AD3C.pickle",
 75 |         "/12-17_20-25-11_25F03D.pickle",
 76 |         "/12-17_21-59-29_E7A1ED.pickle",
 77 |         "/12-19_14-18-00_CE7872.pickle"
 78 |     ]
 79 | 
 80 |     colors = ['r', 'g', 'b', 'y']
 81 | 
 82 |     for idx, path in enumerate(stat_paths):
 83 |         with open(STATS_PATH + path, "rb") as file:
 84 |             stats = pickle.load(file)
 85 |         print(stats['test_mean_ap'])
 86 | 
 87 |         # val_mean_aps = np.array(stats['val_mean_aps'])
 88 |         # ticks = np.linspace(1, len(val_mean_aps),
 89 |         #                     len(val_mean_aps), dtype="uint8")
 90 | 
 91 |         # X = np.linspace(ticks.min(), ticks.max(), 100)
 92 |         # spline = make_interp_spline(ticks, val_mean_aps, k=3)
 93 |         # smooth = spline(X)
 94 |         # # plt.plot(val_mean_aps)
 95 |         # plt.plot(X, smooth, color=colors[idx])
 96 |         # # plt.xticks(np.linspace(1, ticks, ticks // 2, dtype="int8"))
 97 |         # plt.title("TOP 50 MEAN AVERAGE PRECISION")
 98 |         # plt.xlabel("EPOCHS")
 99 |         # plt.ylabel("MAP")
100 | 
101 |     # plt.legend(name, loc="lower right")
102 |     # plt.show()
103 | 


--------------------------------------------------------------------------------
/utils.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import cv2
  3 | import torch
  4 | import numpy as np
  5 | from time import time
  6 | from multiprocessing import Pool, cpu_count
  7 | from functools import reduce
  8 | from pdb import set_trace
  9 | 
 10 | # ==============================================================================
 11 | # NOTE: Run all of the code from the ./code directory!
 12 | # ==============================================================================
 13 | 
 14 | DATA_DIR = "./data"
 15 | ALIGNED_DATA_DIR = "./aligned_data"
 16 | FACESCRUB_DIR = "../facescrub"
 17 | RUN_ASSERTS = True
 18 | VERBOSE = False
 19 | 
 20 | def mkdir(path):
 21 |     '''
 22 |     Creates the specified directory if it doesn't exist.
 23 |     '''
 24 |     if not os.path.exists(path):
 25 |         os.mkdir(path)
 26 | 
 27 | def lsdir(path):
 28 |     '''
 29 |     Lists all of the files in the specified path, excluding files that start
 30 |     with a "."
 31 |     '''
 32 |     files = os.listdir(path)
 33 |     return list(filter(lambda name: name[0] != ".", files))
 34 | 
 35 | def preprocess():
 36 |     '''
 37 |     Preprocess the data in ../facescrub/download and store them in ./data
 38 |     folder.
 39 |     '''
 40 |     start = time()
 41 |     mkdir(DATA_DIR)
 42 |     names = lsdir(FACESCRUB_DIR + "/download")
 43 |     with Pool(max(1, cpu_count())) as pool:
 44 |         pool.map(_process_faces, names)
 45 |     print("Preprocessed images in {:.2f} seconds.".format(time() - start))
 46 | 
 47 | def _count_before_imgs():
 48 |     '''
 49 |     Count the number of images in the ./facescrub/download/*/face folders.
 50 |     '''
 51 |     names = lsdir(FACESCRUB_DIR + "/download")
 52 |     paths = list(map(lambda name: get_facescrub_path(name), names))
 53 |     return sum(list(map(lambda path: len(lsdir(path)), paths)))
 54 | 
 55 | def _count_after_imgs():
 56 |     '''
 57 |     Count the number of images in the ./code/data/* folders
 58 |     '''
 59 |     names = lsdir(DATA_DIR)
 60 |     paths = list(map(lambda name: get_data_path(name), names))
 61 |     return sum(list(map(lambda path: len(lsdir(path)), paths)))
 62 | 
 63 | def _process_faces(name):
 64 |     '''
 65 |     Process the person's face images and save them in the ./code/data directory.
 66 |     '''
 67 |     if VERBOSE:
 68 |         print_name = name.replace("_", " ")
 69 |         print("Processing images for {}...".format(print_name))
 70 |         start = time()
 71 | 
 72 |     # using cropped faces
 73 |     faces_dir = get_facescrub_path(name)
 74 |     # create directory for the person in the ./code/data folder
 75 |     output_dir = get_data_path(name)
 76 |     mkdir(output_dir)
 77 |     # list of names of images
 78 |     img_names = lsdir(faces_dir)
 79 |     for img_name in img_names:
 80 |         output_path = output_dir + "/" + img_name
 81 | 
 82 |         if os.path.isfile(output_path):
 83 |             if VERBOSE:
 84 |                 print("File {} already exists, skipping...".format(img_name))
 85 |             continue
 86 | 
 87 |         img = cv2.imread(faces_dir + "/" + img_name)
 88 | 
 89 |         if img is None:
 90 |             if VERBOSE:
 91 |                 print("Invalid image, skipping...")
 92 |             continue
 93 | 
 94 |         # eliminate empty images (white for some reason), threshold set at 85%
 95 |         max_threshold = round(reduce(lambda x,y: x*y, img.shape) * 255 * 0.85)
 96 |         if img.sum() > max_threshold:
 97 |             if VERBOSE:
 98 |                 print("Image above pixel value threshold, skipping...")
 99 |             continue
100 | 
101 |         # save it
102 |         cv2.imwrite(output_path, img)
103 | 
104 |     output_files = lsdir(output_dir)
105 |     if VERBOSE:
106 |         print("Processed images for {} in {:.2f} seconds. {} images before, {} images after".format(print_name, time() - start,
107 |                          len(img_names), len(output_files)))
108 | 
109 | def get_facescrub_path(name):
110 |     return FACESCRUB_DIR + "/download/{}/face".format(name)
111 | 
112 | def get_data_path(name):
113 |     return DATA_DIR + "/{}".format(name)
114 | 
115 | if __name__ == "__main__":
116 |     # preprocess()
117 |     # print("There are {} images in ./facescrub/download/*/face folder."
118 |     #         .format(_count_before_imgs()))
119 |     # print("There are {} images in ./code/data/*."
120 |     #         .format(_count_after_imgs()))
121 |     pass
122 | 


--------------------------------------------------------------------------------
/viz.py:
--------------------------------------------------------------------------------
 1 | import pickle
 2 | import numpy as np
 3 | import matplotlib.pyplot as plt
 4 | from hamming_dist import *
 5 | from ddh2 import *
 6 | 
 7 | CODES_PATH = "./codes"
 8 | CODES_FILE = "/12-19_14-18-00_CE7872.codes"
 9 | DATASET_PATHS = "./aligned_dataset.pickle"
10 | 
11 | # how many test subjects to pick up and examine
12 | NUM_TEST_TO_SHOW = 6
13 | TOP_N_RESULTS = 10
14 | 
15 | if __name__ == "__main__":
16 |     with open(CODES_PATH + CODES_FILE, "rb") as file:
17 |         codes = pickle.load(file)
18 | 
19 |     gallery_codes, gallery_labels, test_codes, test_labels = codes
20 |     gallery_codes = np.array(gallery_codes)
21 |     test_codes = np.array(test_codes)
22 | 
23 |     gallery_labels = gallery_labels.numpy()
24 |     test_labels = test_labels.numpy()
25 |     truth_table = gallery_labels == test_labels.T
26 | 
27 |     with open(DATASET_PATHS, "rb") as file:
28 |         gallery, test = pickle.load(file)
29 | 
30 |     num_gallery, num_test = len(gallery_codes), len(test_codes)
31 | 
32 |     # only looking at a subset of test subjects
33 |     test_idx = np.random.randint(0, num_test, num_test)[:NUM_TEST_TO_SHOW]
34 |     test_subset = test_codes[test_idx, :]
35 |     # calculate the hamming dists
36 |     dist = hamming_dist(gallery_codes, test_subset)
37 |     # get the sorted idx
38 |     sorted_idx = dist.argsort(axis=0)
39 | 
40 |     fig, ax_arr = plt.subplots(NUM_TEST_TO_SHOW, TOP_N_RESULTS+1,
41 |                                figsize=(25,25))
42 | 
43 |     for i, tidx in enumerate(test_idx):
44 |         assert test_labels[tidx, 0] == test[tidx][1], "Mismatched test labels!"
45 | 
46 |         # display the image
47 |         test_img = test[tidx][0].permute(1,2,0)
48 |         ax_arr[i, 0].imshow(np.asarray(test_img))
49 |         ax_arr[i, 0].axis("off")
50 |         ax_arr[i, 0].set_title("Query")
51 | 
52 |         # display the top N images
53 |         gallery_idx = sorted_idx[:TOP_N_RESULTS, i]
54 | 
55 |         for j, gidx in enumerate(gallery_idx):
56 |             gallery_img = gallery[gidx][0].permute(1,2,0)
57 |             ax_arr[i, j+1].imshow(np.asarray(gallery_img))
58 |             ax_arr[i, j+1].axis("off")
59 | 
60 |             if truth_table[gidx, tidx]:
61 |                 ax_arr[i, j+1].set_title("MATCH", color="g")
62 |             else:
63 |                 ax_arr[i, j+1].set_title("MISMATCH", color="r")
64 | 
65 |     # plt.show()
66 | 


--------------------------------------------------------------------------------