├── LICENSE
├── PASSify
    ├── README.md
    ├── face
    │   ├── __init__.py
    │   ├── convert_to_onnx.py
    │   ├── data
    │   │   ├── __init__.py
    │   │   ├── config.py
    │   │   ├── data_augment.py
    │   │   └── wider_face.py
    │   ├── detect.py
    │   ├── layers
    │   │   ├── __init__.py
    │   │   ├── functions
    │   │   │   ├── __init__.py
    │   │   │   └── prior_box.py
    │   │   └── modules
    │   │   │   ├── __init__.py
    │   │   │   └── multibox_loss.py
    │   ├── main_face_detector.py
    │   ├── models
    │   │   ├── __init__.py
    │   │   ├── net.py
    │   │   └── retinaface.py
    │   ├── sbatch_face_example.sh
    │   ├── train.py
    │   ├── utils
    │   │   ├── __init__.py
    │   │   ├── box_utils.py
    │   │   ├── nms
    │   │   │   ├── __init__.py
    │   │   │   └── py_cpu_nms.py
    │   │   └── timer.py
    │   └── weights
    │   │   └── mobilenet0.25_Final.pth
    ├── passify.py
    └── person
    │   ├── __init__.py
    │   ├── cascade_rcnn.yaml
    │   ├── main_person_detector.py
    │   └── sbatch_person_example.sh
├── README.md
├── download.sh
├── hubconf.py
├── img.png
├── pass.gif
├── version_history.txt
└── vision_transformer.py


/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2021 Yuki M. Asano
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/PASSify/README.md:
--------------------------------------------------------------------------------
 1 | # PASSify your dataset
 2 | Here we provide the automated scripts that remove humans from your dataset using a face detector and a person detector.
 3 | This automated procedure does **not** guarantee full exclusion of humans or personal identifiable information (e.g. licence plates will still be present, some humans might slip through). 
 4 | However with this you can find:
 5 | * how much of your dataset roughly includes humans and human faces
 6 | * how much your model performance changes when trained on the PASSified version of your dataset.
 7 | 
 8 | ## Running instructions
 9 | For all of the following, we provide instructions for running commands on a SLURM managed cluster, but you can tailor them to run on a single machine too.
10 | You very likely need to adapt the slurm headers slightly to fit your cluster, you can find them in `passify.py`.
11 | Have your dataset of images in in a structure as `/path/to/dataset/{folders}/{imagename}`.
12 | 
13 | 1. Face detector
14 | We start with the face detector as this one is cheaper to run and can be run on CPUs.
15 |     ```sh
16 |     DATA_DIRECTORY=/path/to/dataset/
17 |     python passify.py 0 $DATA_DIRECTORY
18 |     sbatch face/sbatch_face.sh
19 |     ```
20 | 
21 | 2. Person detector
22 |     Next we run the person detector on GPUs. For this you need to have installed the [detectron2 repo](https://github.com/facebookresearch/detectron2).
23 |     ```sh
24 |     
25 |     python passify.py 1
26 |     sbatch person/sbatch_person.sh
27 |     ```
28 | 
29 | 3. Final list
30 |    Finally count the files that you're left with.
31 |     ```sh
32 |     python passify.py 2
33 |     ```
34 | 
35 | ## References
36 | This work relies on two excellent repos:
37 | 
38 | The facedetector is from [Retinaface](https://github.com/biubug6/Pytorch_Retinaface) (MIT Licence)
39 | ```
40 | @inproceedings{deng2019retinaface,
41 | title={RetinaFace: Single-stage Dense Face Localisation in the Wild},
42 | author={Deng, Jiankang and Guo, Jia and Yuxiang, Zhou and Jinke Yu and Irene Kotsia and Zafeiriou, Stefanos},
43 | booktitle={arxiv},
44 | year={2019}
45 | ```
46 | The person detector is from [detectron2 repo](https://github.com/facebookresearch/detectron2) (Apache Licence), specifically, the Cascade-RCNN trained with 3x.
47 | ```
48 | @misc{wu2019detectron2,
49 | author =       {Yuxin Wu and Alexander Kirillov and Francisco Massa and
50 |               Wan-Yen Lo and Ross Girshick},
51 | title =        {Detectron2},
52 | howpublished = {\url{https://github.com/facebookresearch/detectron2}},
53 | year =         {2019}
54 | }
55 | ```
56 | ## Citation
57 | If you found this useful please consider citing
58 | ```
59 | @Article{asano21pass,
60 | author = "Yuki M. Asano and Christian Rupprecht and Andrew Zisserman and Andrea Vedaldi",
61 | title = "PASS: An ImageNet replacement for self-supervised pretraining without humans",
62 | journal = "NeurIPS Track on Datasets and Benchmarks",
63 | year = "2021"
64 | }
65 | ```
66 | 


--------------------------------------------------------------------------------
/PASSify/face/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yukimasano/PASS/e91b4fc9daf219c765ec5816610e76306de0150c/PASSify/face/__init__.py


--------------------------------------------------------------------------------
/PASSify/face/convert_to_onnx.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function
 2 | import os
 3 | import argparse
 4 | import torch
 5 | from data import cfg_mnet, cfg_re50
 6 | from layers.functions.prior_box import PriorBox
 7 | from models.retinaface import RetinaFace
 8 | 
 9 | parser = argparse.ArgumentParser(description='Test')
10 | parser.add_argument('-m', '--trained_model', default='./weights/mobilenet0.25_Final.pth',
11 |                     type=str, help='Trained state_dict file path to open')
12 | parser.add_argument('--network', default='mobile0.25', help='Backbone network mobile0.25 or resnet50')
13 | parser.add_argument('--long_side', default=640, help='when origin_size is false, long_side is scaled size(320 or 640 for long side)')
14 | parser.add_argument('--cpu', action="store_true", default=True, help='Use cpu inference')
15 | 
16 | args = parser.parse_args()
17 | 
18 | 
19 | def check_keys(model, pretrained_state_dict):
20 |     ckpt_keys = set(pretrained_state_dict.keys())
21 |     model_keys = set(model.state_dict().keys())
22 |     used_pretrained_keys = model_keys & ckpt_keys
23 |     unused_pretrained_keys = ckpt_keys - model_keys
24 |     missing_keys = model_keys - ckpt_keys
25 |     print('Missing keys:{}'.format(len(missing_keys)))
26 |     print('Unused checkpoint keys:{}'.format(len(unused_pretrained_keys)))
27 |     print('Used keys:{}'.format(len(used_pretrained_keys)))
28 |     assert len(used_pretrained_keys) > 0, 'load NONE from pretrained checkpoint'
29 |     return True
30 | 
31 | 
32 | def remove_prefix(state_dict, prefix):
33 |     ''' Old style model is stored with all names of parameters sharing common prefix 'module.' '''
34 |     print('remove prefix \'{}\''.format(prefix))
35 |     f = lambda x: x.split(prefix, 1)[-1] if x.startswith(prefix) else x
36 |     return {f(key): value for key, value in state_dict.items()}
37 | 
38 | 
39 | def load_model(model, pretrained_path, load_to_cpu):
40 |     print('Loading pretrained model from {}'.format(pretrained_path))
41 |     if load_to_cpu:
42 |         pretrained_dict = torch.load(pretrained_path, map_location=lambda storage, loc: storage)
43 |     else:
44 |         device = torch.cuda.current_device()
45 |         pretrained_dict = torch.load(pretrained_path, map_location=lambda storage, loc: storage.cuda(device))
46 |     if "state_dict" in pretrained_dict.keys():
47 |         pretrained_dict = remove_prefix(pretrained_dict['state_dict'], 'module.')
48 |     else:
49 |         pretrained_dict = remove_prefix(pretrained_dict, 'module.')
50 |     check_keys(model, pretrained_dict)
51 |     model.load_state_dict(pretrained_dict, strict=False)
52 |     return model
53 | 
54 | 
55 | if __name__ == '__main__':
56 |     torch.set_grad_enabled(False)
57 |     cfg = None
58 |     if args.network == "mobile0.25":
59 |         cfg = cfg_mnet
60 |     elif args.network == "resnet50":
61 |         cfg = cfg_re50
62 |     # net and model
63 |     net = RetinaFace(cfg=cfg, phase = 'test')
64 |     net = load_model(net, args.trained_model, args.cpu)
65 |     net.eval()
66 |     print('Finished loading model!')
67 |     print(net)
68 |     device = torch.device("cpu" if args.cpu else "cuda")
69 |     net = net.to(device)
70 | 
71 |     # ------------------------ export -----------------------------
72 |     output_onnx = 'FaceDetector.onnx'
73 |     print("==> Exporting model to ONNX format at '{}'".format(output_onnx))
74 |     input_names = ["input0"]
75 |     output_names = ["output0"]
76 |     inputs = torch.randn(1, 3, args.long_side, args.long_side).to(device)
77 | 
78 |     torch_out = torch.onnx._export(net, inputs, output_onnx, export_params=True, verbose=False,
79 |                                    input_names=input_names, output_names=output_names)
80 | 
81 | 
82 | 


--------------------------------------------------------------------------------
/PASSify/face/data/__init__.py:
--------------------------------------------------------------------------------
1 | from .wider_face import WiderFaceDetection, detection_collate
2 | from .data_augment import *
3 | from .config import *
4 | 


--------------------------------------------------------------------------------
/PASSify/face/data/config.py:
--------------------------------------------------------------------------------
 1 | # config.py
 2 | 
 3 | cfg_mnet = {
 4 |     'name': 'mobilenet0.25',
 5 |     'min_sizes': [[16, 32], [64, 128], [256, 512]],
 6 |     'steps': [8, 16, 32],
 7 |     'variance': [0.1, 0.2],
 8 |     'clip': False,
 9 |     'loc_weight': 2.0,
10 |     'gpu_train': True,
11 |     'batch_size': 32,
12 |     'ngpu': 1,
13 |     'epoch': 250,
14 |     'decay1': 190,
15 |     'decay2': 220,
16 |     'image_size': 640,
17 |     'pretrain': True,
18 |     'return_layers': {'stage1': 1, 'stage2': 2, 'stage3': 3},
19 |     'in_channel': 32,
20 |     'out_channel': 64
21 | }
22 | 
23 | cfg_re50 = {
24 |     'name': 'Resnet50',
25 |     'min_sizes': [[16, 32], [64, 128], [256, 512]],
26 |     'steps': [8, 16, 32],
27 |     'variance': [0.1, 0.2],
28 |     'clip': False,
29 |     'loc_weight': 2.0,
30 |     'gpu_train': True,
31 |     'batch_size': 24,
32 |     'ngpu': 4,
33 |     'epoch': 100,
34 |     'decay1': 70,
35 |     'decay2': 90,
36 |     'image_size': 840,
37 |     'pretrain': True,
38 |     'return_layers': {'layer2': 1, 'layer3': 2, 'layer4': 3},
39 |     'in_channel': 256,
40 |     'out_channel': 256
41 | }
42 | 
43 | 


--------------------------------------------------------------------------------
/PASSify/face/data/data_augment.py:
--------------------------------------------------------------------------------
  1 | import cv2
  2 | import numpy as np
  3 | import random
  4 | from utils.box_utils import matrix_iof
  5 | 
  6 | 
  7 | def _crop(image, boxes, labels, landm, img_dim):
  8 |     height, width, _ = image.shape
  9 |     pad_image_flag = True
 10 | 
 11 |     for _ in range(250):
 12 |         """
 13 |         if random.uniform(0, 1) <= 0.2:
 14 |             scale = 1.0
 15 |         else:
 16 |             scale = random.uniform(0.3, 1.0)
 17 |         """
 18 |         PRE_SCALES = [0.3, 0.45, 0.6, 0.8, 1.0]
 19 |         scale = random.choice(PRE_SCALES)
 20 |         short_side = min(width, height)
 21 |         w = int(scale * short_side)
 22 |         h = w
 23 | 
 24 |         if width == w:
 25 |             l = 0
 26 |         else:
 27 |             l = random.randrange(width - w)
 28 |         if height == h:
 29 |             t = 0
 30 |         else:
 31 |             t = random.randrange(height - h)
 32 |         roi = np.array((l, t, l + w, t + h))
 33 | 
 34 |         value = matrix_iof(boxes, roi[np.newaxis])
 35 |         flag = (value >= 1)
 36 |         if not flag.any():
 37 |             continue
 38 | 
 39 |         centers = (boxes[:, :2] + boxes[:, 2:]) / 2
 40 |         mask_a = np.logical_and(roi[:2] < centers, centers < roi[2:]).all(axis=1)
 41 |         boxes_t = boxes[mask_a].copy()
 42 |         labels_t = labels[mask_a].copy()
 43 |         landms_t = landm[mask_a].copy()
 44 |         landms_t = landms_t.reshape([-1, 5, 2])
 45 | 
 46 |         if boxes_t.shape[0] == 0:
 47 |             continue
 48 | 
 49 |         image_t = image[roi[1]:roi[3], roi[0]:roi[2]]
 50 | 
 51 |         boxes_t[:, :2] = np.maximum(boxes_t[:, :2], roi[:2])
 52 |         boxes_t[:, :2] -= roi[:2]
 53 |         boxes_t[:, 2:] = np.minimum(boxes_t[:, 2:], roi[2:])
 54 |         boxes_t[:, 2:] -= roi[:2]
 55 | 
 56 |         # landm
 57 |         landms_t[:, :, :2] = landms_t[:, :, :2] - roi[:2]
 58 |         landms_t[:, :, :2] = np.maximum(landms_t[:, :, :2], np.array([0, 0]))
 59 |         landms_t[:, :, :2] = np.minimum(landms_t[:, :, :2], roi[2:] - roi[:2])
 60 |         landms_t = landms_t.reshape([-1, 10])
 61 | 
 62 | 
 63 | 	# make sure that the cropped image contains at least one face > 16 pixel at training image scale
 64 |         b_w_t = (boxes_t[:, 2] - boxes_t[:, 0] + 1) / w * img_dim
 65 |         b_h_t = (boxes_t[:, 3] - boxes_t[:, 1] + 1) / h * img_dim
 66 |         mask_b = np.minimum(b_w_t, b_h_t) > 0.0
 67 |         boxes_t = boxes_t[mask_b]
 68 |         labels_t = labels_t[mask_b]
 69 |         landms_t = landms_t[mask_b]
 70 | 
 71 |         if boxes_t.shape[0] == 0:
 72 |             continue
 73 | 
 74 |         pad_image_flag = False
 75 | 
 76 |         return image_t, boxes_t, labels_t, landms_t, pad_image_flag
 77 |     return image, boxes, labels, landm, pad_image_flag
 78 | 
 79 | 
 80 | def _distort(image):
 81 | 
 82 |     def _convert(image, alpha=1, beta=0):
 83 |         tmp = image.astype(float) * alpha + beta
 84 |         tmp[tmp < 0] = 0
 85 |         tmp[tmp > 255] = 255
 86 |         image[:] = tmp
 87 | 
 88 |     image = image.copy()
 89 | 
 90 |     if random.randrange(2):
 91 | 
 92 |         #brightness distortion
 93 |         if random.randrange(2):
 94 |             _convert(image, beta=random.uniform(-32, 32))
 95 | 
 96 |         #contrast distortion
 97 |         if random.randrange(2):
 98 |             _convert(image, alpha=random.uniform(0.5, 1.5))
 99 | 
100 |         image = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
101 | 
102 |         #saturation distortion
103 |         if random.randrange(2):
104 |             _convert(image[:, :, 1], alpha=random.uniform(0.5, 1.5))
105 | 
106 |         #hue distortion
107 |         if random.randrange(2):
108 |             tmp = image[:, :, 0].astype(int) + random.randint(-18, 18)
109 |             tmp %= 180
110 |             image[:, :, 0] = tmp
111 | 
112 |         image = cv2.cvtColor(image, cv2.COLOR_HSV2BGR)
113 | 
114 |     else:
115 | 
116 |         #brightness distortion
117 |         if random.randrange(2):
118 |             _convert(image, beta=random.uniform(-32, 32))
119 | 
120 |         image = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
121 | 
122 |         #saturation distortion
123 |         if random.randrange(2):
124 |             _convert(image[:, :, 1], alpha=random.uniform(0.5, 1.5))
125 | 
126 |         #hue distortion
127 |         if random.randrange(2):
128 |             tmp = image[:, :, 0].astype(int) + random.randint(-18, 18)
129 |             tmp %= 180
130 |             image[:, :, 0] = tmp
131 | 
132 |         image = cv2.cvtColor(image, cv2.COLOR_HSV2BGR)
133 | 
134 |         #contrast distortion
135 |         if random.randrange(2):
136 |             _convert(image, alpha=random.uniform(0.5, 1.5))
137 | 
138 |     return image
139 | 
140 | 
141 | def _expand(image, boxes, fill, p):
142 |     if random.randrange(2):
143 |         return image, boxes
144 | 
145 |     height, width, depth = image.shape
146 | 
147 |     scale = random.uniform(1, p)
148 |     w = int(scale * width)
149 |     h = int(scale * height)
150 | 
151 |     left = random.randint(0, w - width)
152 |     top = random.randint(0, h - height)
153 | 
154 |     boxes_t = boxes.copy()
155 |     boxes_t[:, :2] += (left, top)
156 |     boxes_t[:, 2:] += (left, top)
157 |     expand_image = np.empty(
158 |         (h, w, depth),
159 |         dtype=image.dtype)
160 |     expand_image[:, :] = fill
161 |     expand_image[top:top + height, left:left + width] = image
162 |     image = expand_image
163 | 
164 |     return image, boxes_t
165 | 
166 | 
167 | def _mirror(image, boxes, landms):
168 |     _, width, _ = image.shape
169 |     if random.randrange(2):
170 |         image = image[:, ::-1]
171 |         boxes = boxes.copy()
172 |         boxes[:, 0::2] = width - boxes[:, 2::-2]
173 | 
174 |         # landm
175 |         landms = landms.copy()
176 |         landms = landms.reshape([-1, 5, 2])
177 |         landms[:, :, 0] = width - landms[:, :, 0]
178 |         tmp = landms[:, 1, :].copy()
179 |         landms[:, 1, :] = landms[:, 0, :]
180 |         landms[:, 0, :] = tmp
181 |         tmp1 = landms[:, 4, :].copy()
182 |         landms[:, 4, :] = landms[:, 3, :]
183 |         landms[:, 3, :] = tmp1
184 |         landms = landms.reshape([-1, 10])
185 | 
186 |     return image, boxes, landms
187 | 
188 | 
189 | def _pad_to_square(image, rgb_mean, pad_image_flag):
190 |     if not pad_image_flag:
191 |         return image
192 |     height, width, _ = image.shape
193 |     long_side = max(width, height)
194 |     image_t = np.empty((long_side, long_side, 3), dtype=image.dtype)
195 |     image_t[:, :] = rgb_mean
196 |     image_t[0:0 + height, 0:0 + width] = image
197 |     return image_t
198 | 
199 | 
200 | def _resize_subtract_mean(image, insize, rgb_mean):
201 |     interp_methods = [cv2.INTER_LINEAR, cv2.INTER_CUBIC, cv2.INTER_AREA, cv2.INTER_NEAREST, cv2.INTER_LANCZOS4]
202 |     interp_method = interp_methods[random.randrange(5)]
203 |     image = cv2.resize(image, (insize, insize), interpolation=interp_method)
204 |     image = image.astype(np.float32)
205 |     image -= rgb_mean
206 |     return image.transpose(2, 0, 1)
207 | 
208 | 
209 | class preproc(object):
210 | 
211 |     def __init__(self, img_dim, rgb_means):
212 |         self.img_dim = img_dim
213 |         self.rgb_means = rgb_means
214 | 
215 |     def __call__(self, image, targets):
216 |         assert targets.shape[0] > 0, "this image does not have gt"
217 | 
218 |         boxes = targets[:, :4].copy()
219 |         labels = targets[:, -1].copy()
220 |         landm = targets[:, 4:-1].copy()
221 | 
222 |         image_t, boxes_t, labels_t, landm_t, pad_image_flag = _crop(image, boxes, labels, landm, self.img_dim)
223 |         image_t = _distort(image_t)
224 |         image_t = _pad_to_square(image_t,self.rgb_means, pad_image_flag)
225 |         image_t, boxes_t, landm_t = _mirror(image_t, boxes_t, landm_t)
226 |         height, width, _ = image_t.shape
227 |         image_t = _resize_subtract_mean(image_t, self.img_dim, self.rgb_means)
228 |         boxes_t[:, 0::2] /= width
229 |         boxes_t[:, 1::2] /= height
230 | 
231 |         landm_t[:, 0::2] /= width
232 |         landm_t[:, 1::2] /= height
233 | 
234 |         labels_t = np.expand_dims(labels_t, 1)
235 |         targets_t = np.hstack((boxes_t, landm_t, labels_t))
236 | 
237 |         return image_t, targets_t
238 | 


--------------------------------------------------------------------------------
/PASSify/face/data/wider_face.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import os.path
  3 | import sys
  4 | import torch
  5 | import torch.utils.data as data
  6 | import cv2
  7 | import numpy as np
  8 | 
  9 | class WiderFaceDetection(data.Dataset):
 10 |     def __init__(self, txt_path, preproc=None):
 11 |         self.preproc = preproc
 12 |         self.imgs_path = []
 13 |         self.words = []
 14 |         f = open(txt_path,'r')
 15 |         lines = f.readlines()
 16 |         isFirst = True
 17 |         labels = []
 18 |         for line in lines:
 19 |             line = line.rstrip()
 20 |             if line.startswith('#'):
 21 |                 if isFirst is True:
 22 |                     isFirst = False
 23 |                 else:
 24 |                     labels_copy = labels.copy()
 25 |                     self.words.append(labels_copy)
 26 |                     labels.clear()
 27 |                 path = line[2:]
 28 |                 path = txt_path.replace('label.txt','images/') + path
 29 |                 self.imgs_path.append(path)
 30 |             else:
 31 |                 line = line.split(' ')
 32 |                 label = [float(x) for x in line]
 33 |                 labels.append(label)
 34 | 
 35 |         self.words.append(labels)
 36 | 
 37 |     def __len__(self):
 38 |         return len(self.imgs_path)
 39 | 
 40 |     def __getitem__(self, index):
 41 |         img = cv2.imread(self.imgs_path[index])
 42 |         height, width, _ = img.shape
 43 | 
 44 |         labels = self.words[index]
 45 |         annotations = np.zeros((0, 15))
 46 |         if len(labels) == 0:
 47 |             return annotations
 48 |         for idx, label in enumerate(labels):
 49 |             annotation = np.zeros((1, 15))
 50 |             # bbox
 51 |             annotation[0, 0] = label[0]  # x1
 52 |             annotation[0, 1] = label[1]  # y1
 53 |             annotation[0, 2] = label[0] + label[2]  # x2
 54 |             annotation[0, 3] = label[1] + label[3]  # y2
 55 | 
 56 |             # landmarks
 57 |             annotation[0, 4] = label[4]    # l0_x
 58 |             annotation[0, 5] = label[5]    # l0_y
 59 |             annotation[0, 6] = label[7]    # l1_x
 60 |             annotation[0, 7] = label[8]    # l1_y
 61 |             annotation[0, 8] = label[10]   # l2_x
 62 |             annotation[0, 9] = label[11]   # l2_y
 63 |             annotation[0, 10] = label[13]  # l3_x
 64 |             annotation[0, 11] = label[14]  # l3_y
 65 |             annotation[0, 12] = label[16]  # l4_x
 66 |             annotation[0, 13] = label[17]  # l4_y
 67 |             if (annotation[0, 4]<0):
 68 |                 annotation[0, 14] = -1
 69 |             else:
 70 |                 annotation[0, 14] = 1
 71 | 
 72 |             annotations = np.append(annotations, annotation, axis=0)
 73 |         target = np.array(annotations)
 74 |         if self.preproc is not None:
 75 |             img, target = self.preproc(img, target)
 76 | 
 77 |         return torch.from_numpy(img), target
 78 | 
 79 | def detection_collate(batch):
 80 |     """Custom collate fn for dealing with batches of images that have a different
 81 |     number of associated object annotations (bounding boxes).
 82 | 
 83 |     Arguments:
 84 |         batch: (tuple) A tuple of tensor images and lists of annotations
 85 | 
 86 |     Return:
 87 |         A tuple containing:
 88 |             1) (tensor) batch of images stacked on their 0 dim
 89 |             2) (list of tensors) annotations for a given image are stacked on 0 dim
 90 |     """
 91 |     targets = []
 92 |     imgs = []
 93 |     for _, sample in enumerate(batch):
 94 |         for _, tup in enumerate(sample):
 95 |             if torch.is_tensor(tup):
 96 |                 imgs.append(tup)
 97 |             elif isinstance(tup, type(np.empty(0))):
 98 |                 annos = torch.from_numpy(tup).float()
 99 |                 targets.append(annos)
100 | 
101 |     return (torch.stack(imgs, 0), targets)
102 | 


--------------------------------------------------------------------------------
/PASSify/face/detect.py:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function
  2 | import argparse
  3 | import torch
  4 | import torch.backends.cudnn as cudnn
  5 | import numpy as np
  6 | from data import cfg_mnet, cfg_re50
  7 | from layers.functions.prior_box import PriorBox
  8 | from experts.face.utils import py_cpu_nms
  9 | import cv2
 10 | from models.retinaface import RetinaFace
 11 | from experts.face.utils import decode, decode_landm
 12 | import time
 13 | 
 14 | parser = argparse.ArgumentParser(description='Retinaface')
 15 | 
 16 | parser.add_argument('-m', '--trained_model', default='./weights/Resnet50_Final.pth',
 17 |                     type=str, help='Trained state_dict file path to open')
 18 | parser.add_argument('--network', default='resnet50', help='Backbone network mobile0.25 or resnet50')
 19 | parser.add_argument('--cpu', action="store_true", default=False, help='Use cpu inference')
 20 | parser.add_argument('--confidence_threshold', default=0.02, type=float, help='confidence_threshold')
 21 | parser.add_argument('--top_k', default=5000, type=int, help='top_k')
 22 | parser.add_argument('--nms_threshold', default=0.4, type=float, help='nms_threshold')
 23 | parser.add_argument('--keep_top_k', default=750, type=int, help='keep_top_k')
 24 | parser.add_argument('-s', '--save_image', action="store_true", default=True, help='show detection results')
 25 | parser.add_argument('--vis_thres', default=0.6, type=float, help='visualization_threshold')
 26 | args = parser.parse_args()
 27 | 
 28 | 
 29 | def check_keys(model, pretrained_state_dict):
 30 |     ckpt_keys = set(pretrained_state_dict.keys())
 31 |     model_keys = set(model.state_dict().keys())
 32 |     used_pretrained_keys = model_keys & ckpt_keys
 33 |     unused_pretrained_keys = ckpt_keys - model_keys
 34 |     missing_keys = model_keys - ckpt_keys
 35 |     print('Missing keys:{}'.format(len(missing_keys)))
 36 |     print('Unused checkpoint keys:{}'.format(len(unused_pretrained_keys)))
 37 |     print('Used keys:{}'.format(len(used_pretrained_keys)))
 38 |     assert len(used_pretrained_keys) > 0, 'load NONE from pretrained checkpoint'
 39 |     return True
 40 | 
 41 | 
 42 | def remove_prefix(state_dict, prefix):
 43 |     ''' Old style model is stored with all names of parameters sharing common prefix 'module.' '''
 44 |     print('remove prefix \'{}\''.format(prefix))
 45 |     f = lambda x: x.split(prefix, 1)[-1] if x.startswith(prefix) else x
 46 |     return {f(key): value for key, value in state_dict.items()}
 47 | 
 48 | 
 49 | def load_model(model, pretrained_path, load_to_cpu):
 50 |     print('Loading pretrained model from {}'.format(pretrained_path))
 51 |     if load_to_cpu:
 52 |         pretrained_dict = torch.load(pretrained_path, map_location=lambda storage, loc: storage)
 53 |     else:
 54 |         device = torch.cuda.current_device()
 55 |         pretrained_dict = torch.load(pretrained_path, map_location=lambda storage, loc: storage.cuda(device))
 56 |     if "state_dict" in pretrained_dict.keys():
 57 |         pretrained_dict = remove_prefix(pretrained_dict['state_dict'], 'module.')
 58 |     else:
 59 |         pretrained_dict = remove_prefix(pretrained_dict, 'module.')
 60 |     check_keys(model, pretrained_dict)
 61 |     model.load_state_dict(pretrained_dict, strict=False)
 62 |     return model
 63 | 
 64 | 
 65 | if __name__ == '__main__':
 66 |     torch.set_grad_enabled(False)
 67 |     cfg = None
 68 |     if args.network == "mobile0.25":
 69 |         cfg = cfg_mnet
 70 |     elif args.network == "resnet50":
 71 |         cfg = cfg_re50
 72 |     # net and model
 73 |     net = RetinaFace(cfg=cfg, phase = 'test')
 74 |     net = load_model(net, args.trained_model, args.cpu)
 75 |     net.eval()
 76 |     print('Finished loading model!')
 77 |     print(net)
 78 |     cudnn.benchmark = True
 79 |     device = torch.device("cpu" if args.cpu else "cuda")
 80 |     net = net.to(device)
 81 | 
 82 |     resize = 1
 83 | 
 84 |     # testing begin
 85 |     for i in range(100):
 86 |         image_path = "curve/test.jpg"
 87 |         img_raw = cv2.imread(image_path, cv2.IMREAD_COLOR)
 88 | 
 89 |         img = np.float32(img_raw)
 90 | 
 91 |         im_height, im_width, _ = img.shape
 92 |         scale = torch.Tensor([img.shape[1], img.shape[0], img.shape[1], img.shape[0]])
 93 |         img -= (104, 117, 123)
 94 |         img = img.transpose(2, 0, 1)
 95 |         img = torch.from_numpy(img).unsqueeze(0)
 96 |         img = img.to(device)
 97 |         scale = scale.to(device)
 98 | 
 99 |         tic = time.time()
100 |         loc, conf, landms = net(img)  # forward pass
101 |         print('net forward time: {:.4f}'.format(time.time() - tic))
102 | 
103 |         priorbox = PriorBox(cfg, image_size=(im_height, im_width))
104 |         priors = priorbox.forward()
105 |         priors = priors.to(device)
106 |         prior_data = priors.data
107 |         boxes = decode(loc.data.squeeze(0), prior_data, cfg['variance'])
108 |         boxes = boxes * scale / resize
109 |         boxes = boxes.cpu().numpy()
110 |         scores = conf.squeeze(0).data.cpu().numpy()[:, 1]
111 |         landms = decode_landm(landms.data.squeeze(0), prior_data, cfg['variance'])
112 |         scale1 = torch.Tensor([img.shape[3], img.shape[2], img.shape[3], img.shape[2],
113 |                                img.shape[3], img.shape[2], img.shape[3], img.shape[2],
114 |                                img.shape[3], img.shape[2]])
115 |         scale1 = scale1.to(device)
116 |         landms = landms * scale1 / resize
117 |         landms = landms.cpu().numpy()
118 | 
119 |         # ignore low scores
120 |         inds = np.where(scores > args.confidence_threshold)[0]
121 |         boxes = boxes[inds]
122 |         landms = landms[inds]
123 |         scores = scores[inds]
124 | 
125 |         # keep top-K before NMS
126 |         order = scores.argsort()[::-1][:args.top_k]
127 |         boxes = boxes[order]
128 |         landms = landms[order]
129 |         scores = scores[order]
130 | 
131 |         # do NMS
132 |         dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32, copy=False)
133 |         keep = py_cpu_nms(dets, args.nms_threshold)
134 |         # keep = nms(dets, args.nms_threshold,force_cpu=args.cpu)
135 |         dets = dets[keep, :]
136 |         landms = landms[keep]
137 | 
138 |         # keep top-K faster NMS
139 |         dets = dets[:args.keep_top_k, :]
140 |         landms = landms[:args.keep_top_k, :]
141 | 
142 |         dets = np.concatenate((dets, landms), axis=1)
143 | 
144 |         # show image
145 |         if args.save_image:
146 |             for b in dets:
147 |                 if b[4] < args.vis_thres:
148 |                     continue
149 |                 text = "{:.4f}".format(b[4])
150 |                 b = list(map(int, b))
151 |                 cv2.rectangle(img_raw, (b[0], b[1]), (b[2], b[3]), (0, 0, 255), 2)
152 |                 cx = b[0]
153 |                 cy = b[1] + 12
154 |                 cv2.putText(img_raw, text, (cx, cy),
155 |                             cv2.FONT_HERSHEY_DUPLEX, 0.5, (255, 255, 255))
156 | 
157 |                 # landms
158 |                 cv2.circle(img_raw, (b[5], b[6]), 1, (0, 0, 255), 4)
159 |                 cv2.circle(img_raw, (b[7], b[8]), 1, (0, 255, 255), 4)
160 |                 cv2.circle(img_raw, (b[9], b[10]), 1, (255, 0, 255), 4)
161 |                 cv2.circle(img_raw, (b[11], b[12]), 1, (0, 255, 0), 4)
162 |                 cv2.circle(img_raw, (b[13], b[14]), 1, (255, 0, 0), 4)
163 |             # save image
164 | 
165 |             name = "test.jpg"
166 |             cv2.imwrite(name, img_raw)
167 | 
168 | 


--------------------------------------------------------------------------------
/PASSify/face/layers/__init__.py:
--------------------------------------------------------------------------------
1 | from .functions import *
2 | from .modules import *
3 | 


--------------------------------------------------------------------------------
/PASSify/face/layers/functions/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yukimasano/PASS/e91b4fc9daf219c765ec5816610e76306de0150c/PASSify/face/layers/functions/__init__.py


--------------------------------------------------------------------------------
/PASSify/face/layers/functions/prior_box.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from itertools import product as product
 3 | import numpy as np
 4 | from math import ceil
 5 | 
 6 | 
 7 | class PriorBox(object):
 8 |     def __init__(self, cfg, image_size=None, phase='train'):
 9 |         super(PriorBox, self).__init__()
10 |         self.min_sizes = cfg['min_sizes']
11 |         self.steps = cfg['steps']
12 |         self.clip = cfg['clip']
13 |         self.image_size = image_size
14 |         self.feature_maps = [[ceil(self.image_size[0]/step), ceil(self.image_size[1]/step)] for step in self.steps]
15 |         self.name = "s"
16 | 
17 |     def forward(self):
18 |         anchors = []
19 |         for k, f in enumerate(self.feature_maps):
20 |             min_sizes = self.min_sizes[k]
21 |             for i, j in product(range(f[0]), range(f[1])):
22 |                 for min_size in min_sizes:
23 |                     s_kx = min_size / self.image_size[1]
24 |                     s_ky = min_size / self.image_size[0]
25 |                     dense_cx = [x * self.steps[k] / self.image_size[1] for x in [j + 0.5]]
26 |                     dense_cy = [y * self.steps[k] / self.image_size[0] for y in [i + 0.5]]
27 |                     for cy, cx in product(dense_cy, dense_cx):
28 |                         anchors += [cx, cy, s_kx, s_ky]
29 | 
30 |         # back to torch land
31 |         output = torch.Tensor(anchors).view(-1, 4)
32 |         if self.clip:
33 |             output.clamp_(max=1, min=0)
34 |         return output
35 | 


--------------------------------------------------------------------------------
/PASSify/face/layers/modules/__init__.py:
--------------------------------------------------------------------------------
1 | from .multibox_loss import MultiBoxLoss
2 | 
3 | __all__ = ['MultiBoxLoss']
4 | 


--------------------------------------------------------------------------------
/PASSify/face/layers/modules/multibox_loss.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import torch.nn.functional as F
  4 | from torch.autograd import Variable
  5 | from utils.box_utils import match, log_sum_exp
  6 | from data import cfg_mnet
  7 | GPU = cfg_mnet['gpu_train']
  8 | 
  9 | class MultiBoxLoss(nn.Module):
 10 |     """SSD Weighted Loss Function
 11 |     Compute Targets:
 12 |         1) Produce Confidence Target Indices by matching  ground truth boxes
 13 |            with (default) 'priorboxes' that have jaccard index > threshold parameter
 14 |            (default threshold: 0.5).
 15 |         2) Produce localization target by 'encoding' variance into offsets of ground
 16 |            truth boxes and their matched  'priorboxes'.
 17 |         3) Hard negative mining to filter the excessive number of negative examples
 18 |            that comes with using a large number of default bounding boxes.
 19 |            (default negative:positive ratio 3:1)
 20 |     Objective Loss:
 21 |         L(x,c,l,g) = (Lconf(x, c) + αLloc(x,l,g)) / N
 22 |         Where, Lconf is the CrossEntropy Loss and Lloc is the SmoothL1 Loss
 23 |         weighted by α which is set to 1 by cross val.
 24 |         Args:
 25 |             c: class confidences,
 26 |             l: predicted boxes,
 27 |             g: ground truth boxes
 28 |             N: number of matched default boxes
 29 |         See: https://arxiv.org/pdf/1512.02325.pdf for more details.
 30 |     """
 31 | 
 32 |     def __init__(self, num_classes, overlap_thresh, prior_for_matching, bkg_label, neg_mining, neg_pos, neg_overlap, encode_target):
 33 |         super(MultiBoxLoss, self).__init__()
 34 |         self.num_classes = num_classes
 35 |         self.threshold = overlap_thresh
 36 |         self.background_label = bkg_label
 37 |         self.encode_target = encode_target
 38 |         self.use_prior_for_matching = prior_for_matching
 39 |         self.do_neg_mining = neg_mining
 40 |         self.negpos_ratio = neg_pos
 41 |         self.neg_overlap = neg_overlap
 42 |         self.variance = [0.1, 0.2]
 43 | 
 44 |     def forward(self, predictions, priors, targets):
 45 |         """Multibox Loss
 46 |         Args:
 47 |             predictions (tuple): A tuple containing loc preds, conf preds,
 48 |             and prior boxes from SSD net.
 49 |                 conf shape: torch.size(batch_size,num_priors,num_classes)
 50 |                 loc shape: torch.size(batch_size,num_priors,4)
 51 |                 priors shape: torch.size(num_priors,4)
 52 | 
 53 |             ground_truth (tensor): Ground truth boxes and labels for a batch,
 54 |                 shape: [batch_size,num_objs,5] (last idx is the label).
 55 |         """
 56 | 
 57 |         loc_data, conf_data, landm_data = predictions
 58 |         priors = priors
 59 |         num = loc_data.size(0)
 60 |         num_priors = (priors.size(0))
 61 | 
 62 |         # match priors (default boxes) and ground truth boxes
 63 |         loc_t = torch.Tensor(num, num_priors, 4)
 64 |         landm_t = torch.Tensor(num, num_priors, 10)
 65 |         conf_t = torch.LongTensor(num, num_priors)
 66 |         for idx in range(num):
 67 |             truths = targets[idx][:, :4].data
 68 |             labels = targets[idx][:, -1].data
 69 |             landms = targets[idx][:, 4:14].data
 70 |             defaults = priors.data
 71 |             match(self.threshold, truths, defaults, self.variance, labels, landms, loc_t, conf_t, landm_t, idx)
 72 |         if GPU:
 73 |             loc_t = loc_t.cuda()
 74 |             conf_t = conf_t.cuda()
 75 |             landm_t = landm_t.cuda()
 76 | 
 77 |         zeros = torch.tensor(0).cuda()
 78 |         # landm Loss (Smooth L1)
 79 |         # Shape: [batch,num_priors,10]
 80 |         pos1 = conf_t > zeros
 81 |         num_pos_landm = pos1.long().sum(1, keepdim=True)
 82 |         N1 = max(num_pos_landm.data.sum().float(), 1)
 83 |         pos_idx1 = pos1.unsqueeze(pos1.dim()).expand_as(landm_data)
 84 |         landm_p = landm_data[pos_idx1].view(-1, 10)
 85 |         landm_t = landm_t[pos_idx1].view(-1, 10)
 86 |         loss_landm = F.smooth_l1_loss(landm_p, landm_t, reduction='sum')
 87 | 
 88 | 
 89 |         pos = conf_t != zeros
 90 |         conf_t[pos] = 1
 91 | 
 92 |         # Localization Loss (Smooth L1)
 93 |         # Shape: [batch,num_priors,4]
 94 |         pos_idx = pos.unsqueeze(pos.dim()).expand_as(loc_data)
 95 |         loc_p = loc_data[pos_idx].view(-1, 4)
 96 |         loc_t = loc_t[pos_idx].view(-1, 4)
 97 |         loss_l = F.smooth_l1_loss(loc_p, loc_t, reduction='sum')
 98 | 
 99 |         # Compute max conf across batch for hard negative mining
100 |         batch_conf = conf_data.view(-1, self.num_classes)
101 |         loss_c = log_sum_exp(batch_conf) - batch_conf.gather(1, conf_t.view(-1, 1))
102 | 
103 |         # Hard Negative Mining
104 |         loss_c[pos.view(-1, 1)] = 0 # filter out pos boxes for now
105 |         loss_c = loss_c.view(num, -1)
106 |         _, loss_idx = loss_c.sort(1, descending=True)
107 |         _, idx_rank = loss_idx.sort(1)
108 |         num_pos = pos.long().sum(1, keepdim=True)
109 |         num_neg = torch.clamp(self.negpos_ratio*num_pos, max=pos.size(1)-1)
110 |         neg = idx_rank < num_neg.expand_as(idx_rank)
111 | 
112 |         # Confidence Loss Including Positive and Negative Examples
113 |         pos_idx = pos.unsqueeze(2).expand_as(conf_data)
114 |         neg_idx = neg.unsqueeze(2).expand_as(conf_data)
115 |         conf_p = conf_data[(pos_idx+neg_idx).gt(0)].view(-1,self.num_classes)
116 |         targets_weighted = conf_t[(pos+neg).gt(0)]
117 |         loss_c = F.cross_entropy(conf_p, targets_weighted, reduction='sum')
118 | 
119 |         # Sum of losses: L(x,c,l,g) = (Lconf(x, c) + αLloc(x,l,g)) / N
120 |         N = max(num_pos.data.sum().float(), 1)
121 |         loss_l /= N
122 |         loss_c /= N
123 |         loss_landm /= N1
124 | 
125 |         return loss_l, loss_c, loss_landm
126 | 


--------------------------------------------------------------------------------
/PASSify/face/main_face_detector.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import argparse
  3 | import torch
  4 | import numpy as np
  5 | from data import cfg_mnet, cfg_re50
  6 | from layers.functions.prior_box import PriorBox
  7 | # import utils
  8 | from utils.nms.py_cpu_nms import py_cpu_nms
  9 | import cv2
 10 | from models.retinaface import RetinaFace
 11 | from utils.box_utils import decode, decode_landm
 12 | from utils.timer import Timer
 13 | 
 14 | 
 15 | parser = argparse.ArgumentParser(description='Retinaface')
 16 | parser.add_argument('-m', '--trained_model', default='./weights/mobilenet0.25_Final.pth',
 17 |                     type=str, help='Trained state_dict file path to open')
 18 | parser.add_argument('--network', default='mobile0.25', help='Backbone network mobile0.25 or resnet50')
 19 | parser.add_argument('--origin_size', default=True, type=str, help='Whether use origin image size to evaluate')
 20 | # parser.add_argument('--save_folder', default='/scratch/shared/beegfs/yuki/fast/yfcc/retinaface_out/', type=str, help='Dir to save txt results')
 21 | parser.add_argument('--save_folder', default='./testout/', type=str, help='Dir to save txt results')
 22 | 
 23 | parser.add_argument('--cpu', action="store_true", default=False, help='Use cpu inference')
 24 | parser.add_argument('--dataset_folder', default='/scratch/shared/beegfs/yuki/data/ILSVRC12/val/n01440764/', type=str, help='dataset path')
 25 | parser.add_argument('--input_txt', default='', type=str, help='dataset path')
 26 | 
 27 | parser.add_argument('--confidence_threshold', default=0.02, type=float, help='confidence_threshold')
 28 | parser.add_argument('--top_k', default=5000, type=int, help='top_k')
 29 | parser.add_argument('--nms_threshold', default=0.4, type=float, help='nms_threshold')
 30 | parser.add_argument('--keep_top_k', default=750, type=int, help='keep_top_k')
 31 | parser.add_argument('-s', '--save_image', action="store_true", default=False, help='show detection results')
 32 | parser.add_argument('--vis_thres', default=0.5, type=float, help='visualization_threshold')
 33 | args = parser.parse_args()
 34 | 
 35 | 
 36 | def check_keys(model, pretrained_state_dict):
 37 |     ckpt_keys = set(pretrained_state_dict.keys())
 38 |     model_keys = set(model.state_dict().keys())
 39 |     used_pretrained_keys = model_keys & ckpt_keys
 40 |     unused_pretrained_keys = ckpt_keys - model_keys
 41 |     missing_keys = model_keys - ckpt_keys
 42 |     print('Missing keys:{}'.format(len(missing_keys)))
 43 |     print('Unused checkpoint keys:{}'.format(len(unused_pretrained_keys)))
 44 |     print('Used keys:{}'.format(len(used_pretrained_keys)))
 45 |     assert len(used_pretrained_keys) > 0, 'load NONE from pretrained checkpoint'
 46 |     return True
 47 | 
 48 | 
 49 | def remove_prefix(state_dict, prefix):
 50 |     ''' Old style model is stored with all names of parameters sharing common prefix 'module.' '''
 51 |     print('remove prefix \'{}\''.format(prefix))
 52 |     f = lambda x: x.split(prefix, 1)[-1] if x.startswith(prefix) else x
 53 |     return {f(key): value for key, value in state_dict.items()}
 54 | 
 55 | 
 56 | def load_model(model, pretrained_path, load_to_cpu):
 57 |     print('Loading pretrained model from {}'.format(pretrained_path))
 58 |     if load_to_cpu:
 59 |         pretrained_dict = torch.load(pretrained_path, map_location=lambda storage, loc: storage)
 60 |     else:
 61 |         device = torch.cuda.current_device()
 62 |         pretrained_dict = torch.load(pretrained_path, map_location=lambda storage, loc: storage.cuda(device))
 63 |     if "state_dict" in pretrained_dict.keys():
 64 |         pretrained_dict = remove_prefix(pretrained_dict['state_dict'], 'module.')
 65 |     else:
 66 |         pretrained_dict = remove_prefix(pretrained_dict, 'module.')
 67 |     check_keys(model, pretrained_dict)
 68 |     model.load_state_dict(pretrained_dict, strict=False)
 69 |     return model
 70 | 
 71 | def touch(fname, times=None):
 72 |     with open(fname, 'a'):
 73 |         os.utime(fname, times)
 74 | 
 75 | 
 76 | 
 77 | if __name__ == '__main__':
 78 |     torch.set_grad_enabled(False)
 79 | 
 80 |     cfg = None
 81 |     if args.network == "mobile0.25":
 82 |         cfg = cfg_mnet
 83 |     elif args.network == "resnet50":
 84 |         cfg = cfg_re50
 85 |     cfg['pretrain'] = False
 86 |     # net and model
 87 |     net = RetinaFace(cfg=cfg, phase = 'test')
 88 |     net = load_model(net, args.trained_model, args.cpu)
 89 |     net.eval()
 90 |     print('Finished loading model!')
 91 |     print(net)
 92 |     # cudnn.benchmark = True
 93 |     device = torch.device("cpu" if args.cpu else "cuda")
 94 |     net = net.to(device)
 95 | 
 96 |     # testing dataset
 97 |     # testset_folder = args.dataset_folder
 98 |     # test_dataset = os.listdir(testset_folder)
 99 |     if args.input_txt != '':
100 |         f_list = open(args.input_txt, 'r')
101 |         test_dataset = f_list.readlines()
102 |         test_dataset = [_d.strip() for _d in test_dataset]
103 |     print(f'done preparing dataset!, N={len(test_dataset)}', flush=True)
104 |     num_images = len(test_dataset)
105 | 
106 |     _t = {'forward_pass': Timer(), 'misc': Timer()}
107 | 
108 |     # testing begin
109 |     for i, image_path in enumerate(test_dataset):
110 |         img_raw = cv2.imread(image_path, cv2.IMREAD_COLOR)
111 |         img_name = image_path.split('/')[-1]
112 |         subfolder = image_path.split('/')[-2]
113 |         save_name = os.path.join(args.save_folder, subfolder, img_name.split('.')[0] + ".txt")
114 |         dirname = os.path.dirname(save_name)
115 |         if not os.path.isdir(dirname):
116 |             os.makedirs(dirname)
117 |         if not os.path.exists(save_name):
118 |             img = np.float32(img_raw)
119 | 
120 |             # testing scale
121 |             target_size = 1600
122 |             max_size = 2150
123 |             im_shape = img.shape
124 |             im_size_min = np.min(im_shape[0:2])
125 |             im_size_max = np.max(im_shape[0:2])
126 |             resize = float(target_size) / float(im_size_min)
127 |             # prevent bigger axis from being more than max_size:
128 |             if np.round(resize * im_size_max) > max_size:
129 |                 resize = float(max_size) / float(im_size_max)
130 |             if args.origin_size:
131 |                 resize = 1
132 | 
133 |             if resize != 1:
134 |                 img = cv2.resize(img, None, None, fx=resize, fy=resize, interpolation=cv2.INTER_LINEAR)
135 |             im_height, im_width, _ = img.shape
136 |             scale = torch.Tensor([img.shape[1], img.shape[0], img.shape[1], img.shape[0]])
137 |             img -= (104, 117, 123)
138 |             img = img.transpose(2, 0, 1)
139 |             img = torch.from_numpy(img).unsqueeze(0)
140 |             img = img.to(device)
141 |             scale = scale.to(device)
142 | 
143 |             _t['forward_pass'].tic()
144 |             loc, conf, landms = net(img)  # forward pass
145 |             _t['forward_pass'].toc()
146 |             _t['misc'].tic()
147 |             priorbox = PriorBox(cfg, image_size=(im_height, im_width))
148 |             priors = priorbox.forward()
149 |             priors = priors.to(device)
150 |             prior_data = priors.data
151 |             boxes = decode(loc.data.squeeze(0), prior_data, cfg['variance'])
152 |             boxes = boxes * scale / resize
153 |             boxes = boxes.cpu().numpy()
154 |             scores = conf.squeeze(0).data.cpu().numpy()[:, 1]
155 |             landms = decode_landm(landms.data.squeeze(0), prior_data, cfg['variance'])
156 |             scale1 = torch.Tensor([img.shape[3], img.shape[2], img.shape[3], img.shape[2],
157 |                                    img.shape[3], img.shape[2], img.shape[3], img.shape[2],
158 |                                    img.shape[3], img.shape[2]])
159 |             scale1 = scale1.to(device)
160 |             landms = landms * scale1 / resize
161 |             landms = landms.cpu().numpy()
162 | 
163 |             # ignore low scores
164 |             inds = np.where(scores > args.confidence_threshold)[0]
165 |             boxes = boxes[inds]
166 |             landms = landms[inds]
167 |             scores = scores[inds]
168 | 
169 |             # keep top-K before NMS
170 |             order = scores.argsort()[::-1]
171 |             # order = scores.argsort()[::-1][:args.top_k]
172 |             boxes = boxes[order]
173 |             landms = landms[order]
174 |             scores = scores[order]
175 | 
176 |             # do NMS
177 |             dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32, copy=False)
178 |             keep = py_cpu_nms(dets, args.nms_threshold)
179 |             # keep = nms(dets, args.nms_threshold,force_cpu=args.cpu)
180 |             dets = dets[keep, :]
181 |             landms = landms[keep]
182 | 
183 |             # keep top-K faster NMS
184 |             # dets = dets[:args.keep_top_k, :]
185 |             # landms = landms[:args.keep_top_k, :]
186 | 
187 |             dets = np.concatenate((dets, landms), axis=1)
188 |             _t['misc'].toc()
189 | 
190 |             # --------------------------------------------------------------------
191 |             name_ifface = os.path.join(args.save_folder, 'face_index',subfolder,  img_name)
192 |             name_ifnoface = os.path.join(args.save_folder, 'noface_index', subfolder,  img_name )
193 | 
194 | 
195 |             with open(save_name, "w") as fd:
196 |                 bboxs = dets
197 |                 file_name = os.path.basename(save_name)[:-4] + "\n"
198 |                 bboxs_num = sum(bboxs[:, 4] > 0.5)
199 |                 # fd.write(file_name)
200 |                 # fd.write(bboxs_num)
201 |                 for box in bboxs:
202 |                     x = int(box[0])
203 |                     y = int(box[1])
204 |                     w = int(box[2]) - int(box[0])
205 |                     h = int(box[3]) - int(box[1])
206 |                     confidence = str(box[4])
207 |                     line = str(x) + "," + str(y) + "," + str(w) + "," + str(h) + "," + confidence + " \n"
208 |                     fd.write(line)
209 |                 if bboxs_num > 0 :
210 |                     if not os.path.isdir(os.path.dirname(name_ifface)):
211 |                         os.makedirs(os.path.dirname(name_ifface), exist_ok=True)
212 |                     touch(name_ifface)
213 |                 else:
214 |                     if not os.path.isdir(os.path.dirname(name_ifnoface)):
215 |                         os.makedirs(os.path.dirname(name_ifnoface), exist_ok=True)
216 |                     touch(name_ifnoface)
217 |             if i % 10 == 0:
218 |                 print(f"im_detect: {i + 1:5}/{num_images} Time: {_t['forward_pass'].average_time+_t['misc'].average_time:.3f}s",
219 |                       f"== {1./(_t['forward_pass'].average_time +_t['misc'].average_time) :.1f}Hz", flush=True)
220 | 


--------------------------------------------------------------------------------
/PASSify/face/models/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yukimasano/PASS/e91b4fc9daf219c765ec5816610e76306de0150c/PASSify/face/models/__init__.py


--------------------------------------------------------------------------------
/PASSify/face/models/net.py:
--------------------------------------------------------------------------------
  1 | import time
  2 | import torch
  3 | import torch.nn as nn
  4 | import torchvision.models._utils as _utils
  5 | import torchvision.models as models
  6 | import torch.nn.functional as F
  7 | from torch.autograd import Variable
  8 | 
  9 | def conv_bn(inp, oup, stride = 1, leaky = 0):
 10 |     return nn.Sequential(
 11 |         nn.Conv2d(inp, oup, 3, stride, 1, bias=False),
 12 |         nn.BatchNorm2d(oup),
 13 |         nn.LeakyReLU(negative_slope=leaky, inplace=True)
 14 |     )
 15 | 
 16 | def conv_bn_no_relu(inp, oup, stride):
 17 |     return nn.Sequential(
 18 |         nn.Conv2d(inp, oup, 3, stride, 1, bias=False),
 19 |         nn.BatchNorm2d(oup),
 20 |     )
 21 | 
 22 | def conv_bn1X1(inp, oup, stride, leaky=0):
 23 |     return nn.Sequential(
 24 |         nn.Conv2d(inp, oup, 1, stride, padding=0, bias=False),
 25 |         nn.BatchNorm2d(oup),
 26 |         nn.LeakyReLU(negative_slope=leaky, inplace=True)
 27 |     )
 28 | 
 29 | def conv_dw(inp, oup, stride, leaky=0.1):
 30 |     return nn.Sequential(
 31 |         nn.Conv2d(inp, inp, 3, stride, 1, groups=inp, bias=False),
 32 |         nn.BatchNorm2d(inp),
 33 |         nn.LeakyReLU(negative_slope= leaky,inplace=True),
 34 | 
 35 |         nn.Conv2d(inp, oup, 1, 1, 0, bias=False),
 36 |         nn.BatchNorm2d(oup),
 37 |         nn.LeakyReLU(negative_slope= leaky,inplace=True),
 38 |     )
 39 | 
 40 | class SSH(nn.Module):
 41 |     def __init__(self, in_channel, out_channel):
 42 |         super(SSH, self).__init__()
 43 |         assert out_channel % 4 == 0
 44 |         leaky = 0
 45 |         if (out_channel <= 64):
 46 |             leaky = 0.1
 47 |         self.conv3X3 = conv_bn_no_relu(in_channel, out_channel//2, stride=1)
 48 | 
 49 |         self.conv5X5_1 = conv_bn(in_channel, out_channel//4, stride=1, leaky = leaky)
 50 |         self.conv5X5_2 = conv_bn_no_relu(out_channel//4, out_channel//4, stride=1)
 51 | 
 52 |         self.conv7X7_2 = conv_bn(out_channel//4, out_channel//4, stride=1, leaky = leaky)
 53 |         self.conv7x7_3 = conv_bn_no_relu(out_channel//4, out_channel//4, stride=1)
 54 | 
 55 |     def forward(self, input):
 56 |         conv3X3 = self.conv3X3(input)
 57 | 
 58 |         conv5X5_1 = self.conv5X5_1(input)
 59 |         conv5X5 = self.conv5X5_2(conv5X5_1)
 60 | 
 61 |         conv7X7_2 = self.conv7X7_2(conv5X5_1)
 62 |         conv7X7 = self.conv7x7_3(conv7X7_2)
 63 | 
 64 |         out = torch.cat([conv3X3, conv5X5, conv7X7], dim=1)
 65 |         out = F.relu(out)
 66 |         return out
 67 | 
 68 | class FPN(nn.Module):
 69 |     def __init__(self,in_channels_list,out_channels):
 70 |         super(FPN,self).__init__()
 71 |         leaky = 0
 72 |         if (out_channels <= 64):
 73 |             leaky = 0.1
 74 |         self.output1 = conv_bn1X1(in_channels_list[0], out_channels, stride = 1, leaky = leaky)
 75 |         self.output2 = conv_bn1X1(in_channels_list[1], out_channels, stride = 1, leaky = leaky)
 76 |         self.output3 = conv_bn1X1(in_channels_list[2], out_channels, stride = 1, leaky = leaky)
 77 | 
 78 |         self.merge1 = conv_bn(out_channels, out_channels, leaky = leaky)
 79 |         self.merge2 = conv_bn(out_channels, out_channels, leaky = leaky)
 80 | 
 81 |     def forward(self, input):
 82 |         # names = list(input.keys())
 83 |         input = list(input.values())
 84 | 
 85 |         output1 = self.output1(input[0])
 86 |         output2 = self.output2(input[1])
 87 |         output3 = self.output3(input[2])
 88 | 
 89 |         up3 = F.interpolate(output3, size=[output2.size(2), output2.size(3)], mode="nearest")
 90 |         output2 = output2 + up3
 91 |         output2 = self.merge2(output2)
 92 | 
 93 |         up2 = F.interpolate(output2, size=[output1.size(2), output1.size(3)], mode="nearest")
 94 |         output1 = output1 + up2
 95 |         output1 = self.merge1(output1)
 96 | 
 97 |         out = [output1, output2, output3]
 98 |         return out
 99 | 
100 | 
101 | 
102 | class MobileNetV1(nn.Module):
103 |     def __init__(self):
104 |         super(MobileNetV1, self).__init__()
105 |         self.stage1 = nn.Sequential(
106 |             conv_bn(3, 8, 2, leaky = 0.1),    # 3
107 |             conv_dw(8, 16, 1),   # 7
108 |             conv_dw(16, 32, 2),  # 11
109 |             conv_dw(32, 32, 1),  # 19
110 |             conv_dw(32, 64, 2),  # 27
111 |             conv_dw(64, 64, 1),  # 43
112 |         )
113 |         self.stage2 = nn.Sequential(
114 |             conv_dw(64, 128, 2),  # 43 + 16 = 59
115 |             conv_dw(128, 128, 1), # 59 + 32 = 91
116 |             conv_dw(128, 128, 1), # 91 + 32 = 123
117 |             conv_dw(128, 128, 1), # 123 + 32 = 155
118 |             conv_dw(128, 128, 1), # 155 + 32 = 187
119 |             conv_dw(128, 128, 1), # 187 + 32 = 219
120 |         )
121 |         self.stage3 = nn.Sequential(
122 |             conv_dw(128, 256, 2), # 219 +3 2 = 241
123 |             conv_dw(256, 256, 1), # 241 + 64 = 301
124 |         )
125 |         self.avg = nn.AdaptiveAvgPool2d((1,1))
126 |         self.fc = nn.Linear(256, 1000)
127 | 
128 |     def forward(self, x):
129 |         x = self.stage1(x)
130 |         x = self.stage2(x)
131 |         x = self.stage3(x)
132 |         x = self.avg(x)
133 |         # x = self.model(x)
134 |         x = x.view(-1, 256)
135 |         x = self.fc(x)
136 |         return x
137 | 
138 | 


--------------------------------------------------------------------------------
/PASSify/face/models/retinaface.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import torchvision.models.detection.backbone_utils as backbone_utils
  4 | import torchvision.models._utils as _utils
  5 | import torch.nn.functional as F
  6 | from collections import OrderedDict
  7 | 
  8 | from models.net import MobileNetV1 as MobileNetV1
  9 | from models.net import FPN as FPN
 10 | from models.net import SSH as SSH
 11 | 
 12 | 
 13 | 
 14 | class ClassHead(nn.Module):
 15 |     def __init__(self,inchannels=512,num_anchors=3):
 16 |         super(ClassHead,self).__init__()
 17 |         self.num_anchors = num_anchors
 18 |         self.conv1x1 = nn.Conv2d(inchannels,self.num_anchors*2,kernel_size=(1,1),stride=1,padding=0)
 19 | 
 20 |     def forward(self,x):
 21 |         out = self.conv1x1(x)
 22 |         out = out.permute(0,2,3,1).contiguous()
 23 |         
 24 |         return out.view(out.shape[0], -1, 2)
 25 | 
 26 | class BboxHead(nn.Module):
 27 |     def __init__(self,inchannels=512,num_anchors=3):
 28 |         super(BboxHead,self).__init__()
 29 |         self.conv1x1 = nn.Conv2d(inchannels,num_anchors*4,kernel_size=(1,1),stride=1,padding=0)
 30 | 
 31 |     def forward(self,x):
 32 |         out = self.conv1x1(x)
 33 |         out = out.permute(0,2,3,1).contiguous()
 34 | 
 35 |         return out.view(out.shape[0], -1, 4)
 36 | 
 37 | class LandmarkHead(nn.Module):
 38 |     def __init__(self,inchannels=512,num_anchors=3):
 39 |         super(LandmarkHead,self).__init__()
 40 |         self.conv1x1 = nn.Conv2d(inchannels,num_anchors*10,kernel_size=(1,1),stride=1,padding=0)
 41 | 
 42 |     def forward(self,x):
 43 |         out = self.conv1x1(x)
 44 |         out = out.permute(0,2,3,1).contiguous()
 45 | 
 46 |         return out.view(out.shape[0], -1, 10)
 47 | 
 48 | class RetinaFace(nn.Module):
 49 |     def __init__(self, cfg = None, phase = 'train'):
 50 |         """
 51 |         :param cfg:  Network related settings.
 52 |         :param phase: train or test.
 53 |         """
 54 |         super(RetinaFace,self).__init__()
 55 |         self.phase = phase
 56 |         backbone = None
 57 |         if cfg['name'] == 'mobilenet0.25':
 58 |             backbone = MobileNetV1()
 59 |             if cfg['pretrain']:
 60 |                 checkpoint = torch.load("./weights/mobilenet0.25_Final.pth", map_location=torch.device('cpu'))
 61 |                 from collections import OrderedDict
 62 |                 new_state_dict = OrderedDict()
 63 |                 for k, v in checkpoint['state_dict'].items():
 64 |                     name = k[7:]  # remove module.
 65 |                     new_state_dict[name] = v
 66 |                 # load params
 67 |                 backbone.load_state_dict(new_state_dict)
 68 |         elif cfg['name'] == 'Resnet50':
 69 |             import torchvision.models as models
 70 |             backbone = models.resnet50(pretrained=cfg['pretrain'])
 71 | 
 72 |         self.body = _utils.IntermediateLayerGetter(backbone, cfg['return_layers'])
 73 |         in_channels_stage2 = cfg['in_channel']
 74 |         in_channels_list = [
 75 |             in_channels_stage2 * 2,
 76 |             in_channels_stage2 * 4,
 77 |             in_channels_stage2 * 8,
 78 |         ]
 79 |         out_channels = cfg['out_channel']
 80 |         self.fpn = FPN(in_channels_list,out_channels)
 81 |         self.ssh1 = SSH(out_channels, out_channels)
 82 |         self.ssh2 = SSH(out_channels, out_channels)
 83 |         self.ssh3 = SSH(out_channels, out_channels)
 84 | 
 85 |         self.ClassHead = self._make_class_head(fpn_num=3, inchannels=cfg['out_channel'])
 86 |         self.BboxHead = self._make_bbox_head(fpn_num=3, inchannels=cfg['out_channel'])
 87 |         self.LandmarkHead = self._make_landmark_head(fpn_num=3, inchannels=cfg['out_channel'])
 88 | 
 89 |     def _make_class_head(self,fpn_num=3,inchannels=64,anchor_num=2):
 90 |         classhead = nn.ModuleList()
 91 |         for i in range(fpn_num):
 92 |             classhead.append(ClassHead(inchannels,anchor_num))
 93 |         return classhead
 94 |     
 95 |     def _make_bbox_head(self,fpn_num=3,inchannels=64,anchor_num=2):
 96 |         bboxhead = nn.ModuleList()
 97 |         for i in range(fpn_num):
 98 |             bboxhead.append(BboxHead(inchannels,anchor_num))
 99 |         return bboxhead
100 | 
101 |     def _make_landmark_head(self,fpn_num=3,inchannels=64,anchor_num=2):
102 |         landmarkhead = nn.ModuleList()
103 |         for i in range(fpn_num):
104 |             landmarkhead.append(LandmarkHead(inchannels,anchor_num))
105 |         return landmarkhead
106 | 
107 |     def forward(self,inputs):
108 |         out = self.body(inputs)
109 | 
110 |         # FPN
111 |         fpn = self.fpn(out)
112 | 
113 |         # SSH
114 |         feature1 = self.ssh1(fpn[0])
115 |         feature2 = self.ssh2(fpn[1])
116 |         feature3 = self.ssh3(fpn[2])
117 |         features = [feature1, feature2, feature3]
118 | 
119 |         bbox_regressions = torch.cat([self.BboxHead[i](feature) for i, feature in enumerate(features)], dim=1)
120 |         classifications = torch.cat([self.ClassHead[i](feature) for i, feature in enumerate(features)],dim=1)
121 |         ldm_regressions = torch.cat([self.LandmarkHead[i](feature) for i, feature in enumerate(features)], dim=1)
122 | 
123 |         if self.phase == 'train':
124 |             output = (bbox_regressions, classifications, ldm_regressions)
125 |         else:
126 |             output = (bbox_regressions, F.softmax(classifications, dim=-1), ldm_regressions)
127 |         return output


--------------------------------------------------------------------------------
/PASSify/face/sbatch_face_example.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | #SBATCH --array=0-X%50 # being nice and not running more than 50 jobs in parallel
 3 | #SBATCH --mem=5G
 4 | #SBATCH --cpus-per-task=2
 5 | #SBATCH --time=8:00:00
 6 | #SBATCH --partition=compute # we only need CPUs
 7 | #SBACTH --open-mode=append
 8 | #SBATCH --job-name=PASSify-face
 9 | #SBATCH --constraint=10GbE
10 | 
11 | 
12 | echo $SLURM_ARRAY_TASK_ID
13 | X=$((${SLURM_ARRAY_TASK_ID}*80000))
14 | Y=$(((${SLURM_ARRAY_TASK_ID} + 1)*80000))
15 | 
16 | in_file=_tmp_all_files_${SLURM_ARRAY_TASK_ID}.txt
17 | rm ${in_file}
18 | results_dir='/facedetector_results/'
19 | < all_files.txt tail -n +"$X" | head -n "$((Y - X))" >> ${in_file}
20 | 
21 | echo "from " ${X}
22 | echo "to " ${Y}
23 | 
24 | # ETA 4-6Hz
25 | /scratch/shared/beegfs/yuki/envs/py37/bin/python3 -W ignore main_face_detector.py \
26 |   --input_txt=${in_file} \
27 |   --save_folder=${results_dir} \
28 |   --cpu


--------------------------------------------------------------------------------
/PASSify/face/train.py:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function
  2 | import os
  3 | import torch
  4 | import torch.optim as optim
  5 | import torch.backends.cudnn as cudnn
  6 | import argparse
  7 | import torch.utils.data as data
  8 | from data import WiderFaceDetection, detection_collate, preproc, cfg_mnet, cfg_re50
  9 | from layers.modules import MultiBoxLoss
 10 | from layers.functions.prior_box import PriorBox
 11 | import time
 12 | import datetime
 13 | import math
 14 | from models.retinaface import RetinaFace
 15 | 
 16 | parser = argparse.ArgumentParser(description='Retinaface Training')
 17 | parser.add_argument('--training_dataset', default='./data/widerface/train/label.txt', help='Training dataset directory')
 18 | parser.add_argument('--network', default='mobile0.25', help='Backbone network mobile0.25 or resnet50')
 19 | parser.add_argument('--num_workers', default=4, type=int, help='Number of workers used in dataloading')
 20 | parser.add_argument('--lr', '--learning-rate', default=1e-3, type=float, help='initial learning rate')
 21 | parser.add_argument('--momentum', default=0.9, type=float, help='momentum')
 22 | parser.add_argument('--resume_net', default=None, help='resume net for retraining')
 23 | parser.add_argument('--resume_epoch', default=0, type=int, help='resume iter for retraining')
 24 | parser.add_argument('--weight_decay', default=5e-4, type=float, help='Weight decay for SGD')
 25 | parser.add_argument('--gamma', default=0.1, type=float, help='Gamma update for SGD')
 26 | parser.add_argument('--save_folder', default='./weights/', help='Location to save checkpoint models')
 27 | 
 28 | args = parser.parse_args()
 29 | 
 30 | if not os.path.exists(args.save_folder):
 31 |     os.mkdir(args.save_folder)
 32 | cfg = None
 33 | if args.network == "mobile0.25":
 34 |     cfg = cfg_mnet
 35 | elif args.network == "resnet50":
 36 |     cfg = cfg_re50
 37 | 
 38 | rgb_mean = (104, 117, 123) # bgr order
 39 | num_classes = 2
 40 | img_dim = cfg['image_size']
 41 | num_gpu = cfg['ngpu']
 42 | batch_size = cfg['batch_size']
 43 | max_epoch = cfg['epoch']
 44 | gpu_train = cfg['gpu_train']
 45 | 
 46 | num_workers = args.num_workers
 47 | momentum = args.momentum
 48 | weight_decay = args.weight_decay
 49 | initial_lr = args.lr
 50 | gamma = args.gamma
 51 | training_dataset = args.training_dataset
 52 | save_folder = args.save_folder
 53 | 
 54 | net = RetinaFace(cfg=cfg)
 55 | print("Printing net...")
 56 | print(net)
 57 | 
 58 | if args.resume_net is not None:
 59 |     print('Loading resume network...')
 60 |     state_dict = torch.load(args.resume_net)
 61 |     # create new OrderedDict that does not contain `module.`
 62 |     from collections import OrderedDict
 63 |     new_state_dict = OrderedDict()
 64 |     for k, v in state_dict.items():
 65 |         head = k[:7]
 66 |         if head == 'module.':
 67 |             name = k[7:] # remove `module.`
 68 |         else:
 69 |             name = k
 70 |         new_state_dict[name] = v
 71 |     net.load_state_dict(new_state_dict)
 72 | 
 73 | if num_gpu > 1 and gpu_train:
 74 |     net = torch.nn.DataParallel(net).cuda()
 75 | else:
 76 |     net = net.cuda()
 77 | 
 78 | cudnn.benchmark = True
 79 | 
 80 | 
 81 | optimizer = optim.SGD(net.parameters(), lr=initial_lr, momentum=momentum, weight_decay=weight_decay)
 82 | criterion = MultiBoxLoss(num_classes, 0.35, True, 0, True, 7, 0.35, False)
 83 | 
 84 | priorbox = PriorBox(cfg, image_size=(img_dim, img_dim))
 85 | with torch.no_grad():
 86 |     priors = priorbox.forward()
 87 |     priors = priors.cuda()
 88 | 
 89 | def train():
 90 |     net.train()
 91 |     epoch = 0 + args.resume_epoch
 92 |     print('Loading Dataset...')
 93 | 
 94 |     dataset = WiderFaceDetection( training_dataset,preproc(img_dim, rgb_mean))
 95 | 
 96 |     epoch_size = math.ceil(len(dataset) / batch_size)
 97 |     max_iter = max_epoch * epoch_size
 98 | 
 99 |     stepvalues = (cfg['decay1'] * epoch_size, cfg['decay2'] * epoch_size)
100 |     step_index = 0
101 | 
102 |     if args.resume_epoch > 0:
103 |         start_iter = args.resume_epoch * epoch_size
104 |     else:
105 |         start_iter = 0
106 | 
107 |     for iteration in range(start_iter, max_iter):
108 |         if iteration % epoch_size == 0:
109 |             # create batch iterator
110 |             batch_iterator = iter(data.DataLoader(dataset, batch_size, shuffle=True, num_workers=num_workers, collate_fn=detection_collate))
111 |             if (epoch % 10 == 0 and epoch > 0) or (epoch % 5 == 0 and epoch > cfg['decay1']):
112 |                 torch.save(net.state_dict(), save_folder + cfg['name']+ '_epoch_' + str(epoch) + '.pth')
113 |             epoch += 1
114 | 
115 |         load_t0 = time.time()
116 |         if iteration in stepvalues:
117 |             step_index += 1
118 |         lr = adjust_learning_rate(optimizer, gamma, epoch, step_index, iteration, epoch_size)
119 | 
120 |         # load train data
121 |         images, targets = next(batch_iterator)
122 |         images = images.cuda()
123 |         targets = [anno.cuda() for anno in targets]
124 | 
125 |         # forward
126 |         out = net(images)
127 | 
128 |         # backprop
129 |         optimizer.zero_grad()
130 |         loss_l, loss_c, loss_landm = criterion(out, priors, targets)
131 |         loss = cfg['loc_weight'] * loss_l + loss_c + loss_landm
132 |         loss.backward()
133 |         optimizer.step()
134 |         load_t1 = time.time()
135 |         batch_time = load_t1 - load_t0
136 |         eta = int(batch_time * (max_iter - iteration))
137 |         print('Epoch:{}/{} || Epochiter: {}/{} || Iter: {}/{} || Loc: {:.4f} Cla: {:.4f} Landm: {:.4f} || LR: {:.8f} || Batchtime: {:.4f} s || ETA: {}'
138 |               .format(epoch, max_epoch, (iteration % epoch_size) + 1,
139 |               epoch_size, iteration + 1, max_iter, loss_l.item(), loss_c.item(), loss_landm.item(), lr, batch_time, str(datetime.timedelta(seconds=eta))))
140 | 
141 |     torch.save(net.state_dict(), save_folder + cfg['name'] + '_Final.pth')
142 |     # torch.save(net.state_dict(), save_folder + 'Final_Retinaface.pth')
143 | 
144 | 
145 | def adjust_learning_rate(optimizer, gamma, epoch, step_index, iteration, epoch_size):
146 |     """Sets the learning rate
147 |     # Adapted from PyTorch Imagenet example:
148 |     # https://github.com/pytorch/examples/blob/master/imagenet/main.py
149 |     """
150 |     warmup_epoch = -1
151 |     if epoch <= warmup_epoch:
152 |         lr = 1e-6 + (initial_lr-1e-6) * iteration / (epoch_size * warmup_epoch)
153 |     else:
154 |         lr = initial_lr * (gamma ** (step_index))
155 |     for param_group in optimizer.param_groups:
156 |         param_group['lr'] = lr
157 |     return lr
158 | 
159 | if __name__ == '__main__':
160 |     train()
161 | 


--------------------------------------------------------------------------------
/PASSify/face/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yukimasano/PASS/e91b4fc9daf219c765ec5816610e76306de0150c/PASSify/face/utils/__init__.py


--------------------------------------------------------------------------------
/PASSify/face/utils/box_utils.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import numpy as np
  3 | 
  4 | 
  5 | def point_form(boxes):
  6 |     """ Convert prior_boxes to (xmin, ymin, xmax, ymax)
  7 |     representation for comparison to point form ground truth data.
  8 |     Args:
  9 |         boxes: (tensor) center-size default boxes from priorbox layers.
 10 |     Return:
 11 |         boxes: (tensor) Converted xmin, ymin, xmax, ymax form of boxes.
 12 |     """
 13 |     return torch.cat((boxes[:, :2] - boxes[:, 2:]/2,     # xmin, ymin
 14 |                      boxes[:, :2] + boxes[:, 2:]/2), 1)  # xmax, ymax
 15 | 
 16 | 
 17 | def center_size(boxes):
 18 |     """ Convert prior_boxes to (cx, cy, w, h)
 19 |     representation for comparison to center-size form ground truth data.
 20 |     Args:
 21 |         boxes: (tensor) point_form boxes
 22 |     Return:
 23 |         boxes: (tensor) Converted xmin, ymin, xmax, ymax form of boxes.
 24 |     """
 25 |     return torch.cat((boxes[:, 2:] + boxes[:, :2])/2,  # cx, cy
 26 |                      boxes[:, 2:] - boxes[:, :2], 1)  # w, h
 27 | 
 28 | 
 29 | def intersect(box_a, box_b):
 30 |     """ We resize both tensors to [A,B,2] without new malloc:
 31 |     [A,2] -> [A,1,2] -> [A,B,2]
 32 |     [B,2] -> [1,B,2] -> [A,B,2]
 33 |     Then we compute the area of intersect between box_a and box_b.
 34 |     Args:
 35 |       box_a: (tensor) bounding boxes, Shape: [A,4].
 36 |       box_b: (tensor) bounding boxes, Shape: [B,4].
 37 |     Return:
 38 |       (tensor) intersection area, Shape: [A,B].
 39 |     """
 40 |     A = box_a.size(0)
 41 |     B = box_b.size(0)
 42 |     max_xy = torch.min(box_a[:, 2:].unsqueeze(1).expand(A, B, 2),
 43 |                        box_b[:, 2:].unsqueeze(0).expand(A, B, 2))
 44 |     min_xy = torch.max(box_a[:, :2].unsqueeze(1).expand(A, B, 2),
 45 |                        box_b[:, :2].unsqueeze(0).expand(A, B, 2))
 46 |     inter = torch.clamp((max_xy - min_xy), min=0)
 47 |     return inter[:, :, 0] * inter[:, :, 1]
 48 | 
 49 | 
 50 | def jaccard(box_a, box_b):
 51 |     """Compute the jaccard overlap of two sets of boxes.  The jaccard overlap
 52 |     is simply the intersection over union of two boxes.  Here we operate on
 53 |     ground truth boxes and default boxes.
 54 |     E.g.:
 55 |         A ∩ B / A ∪ B = A ∩ B / (area(A) + area(B) - A ∩ B)
 56 |     Args:
 57 |         box_a: (tensor) Ground truth bounding boxes, Shape: [num_objects,4]
 58 |         box_b: (tensor) Prior boxes from priorbox layers, Shape: [num_priors,4]
 59 |     Return:
 60 |         jaccard overlap: (tensor) Shape: [box_a.size(0), box_b.size(0)]
 61 |     """
 62 |     inter = intersect(box_a, box_b)
 63 |     area_a = ((box_a[:, 2]-box_a[:, 0]) *
 64 |               (box_a[:, 3]-box_a[:, 1])).unsqueeze(1).expand_as(inter)  # [A,B]
 65 |     area_b = ((box_b[:, 2]-box_b[:, 0]) *
 66 |               (box_b[:, 3]-box_b[:, 1])).unsqueeze(0).expand_as(inter)  # [A,B]
 67 |     union = area_a + area_b - inter
 68 |     return inter / union  # [A,B]
 69 | 
 70 | 
 71 | def matrix_iou(a, b):
 72 |     """
 73 |     return iou of a and b, numpy version for data augenmentation
 74 |     """
 75 |     lt = np.maximum(a[:, np.newaxis, :2], b[:, :2])
 76 |     rb = np.minimum(a[:, np.newaxis, 2:], b[:, 2:])
 77 | 
 78 |     area_i = np.prod(rb - lt, axis=2) * (lt < rb).all(axis=2)
 79 |     area_a = np.prod(a[:, 2:] - a[:, :2], axis=1)
 80 |     area_b = np.prod(b[:, 2:] - b[:, :2], axis=1)
 81 |     return area_i / (area_a[:, np.newaxis] + area_b - area_i)
 82 | 
 83 | 
 84 | def matrix_iof(a, b):
 85 |     """
 86 |     return iof of a and b, numpy version for data augenmentation
 87 |     """
 88 |     lt = np.maximum(a[:, np.newaxis, :2], b[:, :2])
 89 |     rb = np.minimum(a[:, np.newaxis, 2:], b[:, 2:])
 90 | 
 91 |     area_i = np.prod(rb - lt, axis=2) * (lt < rb).all(axis=2)
 92 |     area_a = np.prod(a[:, 2:] - a[:, :2], axis=1)
 93 |     return area_i / np.maximum(area_a[:, np.newaxis], 1)
 94 | 
 95 | 
 96 | def match(threshold, truths, priors, variances, labels, landms, loc_t, conf_t, landm_t, idx):
 97 |     """Match each prior box with the ground truth box of the highest jaccard
 98 |     overlap, encode the bounding boxes, then return the matched indices
 99 |     corresponding to both confidence and location preds.
100 |     Args:
101 |         threshold: (float) The overlap threshold used when mathing boxes.
102 |         truths: (tensor) Ground truth boxes, Shape: [num_obj, 4].
103 |         priors: (tensor) Prior boxes from priorbox layers, Shape: [n_priors,4].
104 |         variances: (tensor) Variances corresponding to each prior coord,
105 |             Shape: [num_priors, 4].
106 |         labels: (tensor) All the class labels for the image, Shape: [num_obj].
107 |         landms: (tensor) Ground truth landms, Shape [num_obj, 10].
108 |         loc_t: (tensor) Tensor to be filled w/ endcoded location targets.
109 |         conf_t: (tensor) Tensor to be filled w/ matched indices for conf preds.
110 |         landm_t: (tensor) Tensor to be filled w/ endcoded landm targets.
111 |         idx: (int) current batch index
112 |     Return:
113 |         The matched indices corresponding to 1)location 2)confidence 3)landm preds.
114 |     """
115 |     # jaccard index
116 |     overlaps = jaccard(
117 |         truths,
118 |         point_form(priors)
119 |     )
120 |     # (Bipartite Matching)
121 |     # [1,num_objects] best prior for each ground truth
122 |     best_prior_overlap, best_prior_idx = overlaps.max(1, keepdim=True)
123 | 
124 |     # ignore hard gt
125 |     valid_gt_idx = best_prior_overlap[:, 0] >= 0.2
126 |     best_prior_idx_filter = best_prior_idx[valid_gt_idx, :]
127 |     if best_prior_idx_filter.shape[0] <= 0:
128 |         loc_t[idx] = 0
129 |         conf_t[idx] = 0
130 |         return
131 | 
132 |     # [1,num_priors] best ground truth for each prior
133 |     best_truth_overlap, best_truth_idx = overlaps.max(0, keepdim=True)
134 |     best_truth_idx.squeeze_(0)
135 |     best_truth_overlap.squeeze_(0)
136 |     best_prior_idx.squeeze_(1)
137 |     best_prior_idx_filter.squeeze_(1)
138 |     best_prior_overlap.squeeze_(1)
139 |     best_truth_overlap.index_fill_(0, best_prior_idx_filter, 2)  # ensure best prior
140 |     # TODO refactor: index  best_prior_idx with long tensor
141 |     # ensure every gt matches with its prior of max overlap
142 |     for j in range(best_prior_idx.size(0)):     # 判别此anchor是预测哪一个boxes
143 |         best_truth_idx[best_prior_idx[j]] = j
144 |     matches = truths[best_truth_idx]            # Shape: [num_priors,4] 此处为每一个anchor对应的bbox取出来
145 |     conf = labels[best_truth_idx]               # Shape: [num_priors]      此处为每一个anchor对应的label取出来
146 |     conf[best_truth_overlap < threshold] = 0    # label as background   overlap<0.35的全部作为负样本
147 |     loc = encode(matches, priors, variances)
148 | 
149 |     matches_landm = landms[best_truth_idx]
150 |     landm = encode_landm(matches_landm, priors, variances)
151 |     loc_t[idx] = loc    # [num_priors,4] encoded offsets to learn
152 |     conf_t[idx] = conf  # [num_priors] top class label for each prior
153 |     landm_t[idx] = landm
154 | 
155 | 
156 | def encode(matched, priors, variances):
157 |     """Encode the variances from the priorbox layers into the ground truth boxes
158 |     we have matched (based on jaccard overlap) with the prior boxes.
159 |     Args:
160 |         matched: (tensor) Coords of ground truth for each prior in point-form
161 |             Shape: [num_priors, 4].
162 |         priors: (tensor) Prior boxes in center-offset form
163 |             Shape: [num_priors,4].
164 |         variances: (list[float]) Variances of priorboxes
165 |     Return:
166 |         encoded boxes (tensor), Shape: [num_priors, 4]
167 |     """
168 | 
169 |     # dist b/t match center and prior's center
170 |     g_cxcy = (matched[:, :2] + matched[:, 2:])/2 - priors[:, :2]
171 |     # encode variance
172 |     g_cxcy /= (variances[0] * priors[:, 2:])
173 |     # match wh / prior wh
174 |     g_wh = (matched[:, 2:] - matched[:, :2]) / priors[:, 2:]
175 |     g_wh = torch.log(g_wh) / variances[1]
176 |     # return target for smooth_l1_loss
177 |     return torch.cat([g_cxcy, g_wh], 1)  # [num_priors,4]
178 | 
179 | def encode_landm(matched, priors, variances):
180 |     """Encode the variances from the priorbox layers into the ground truth boxes
181 |     we have matched (based on jaccard overlap) with the prior boxes.
182 |     Args:
183 |         matched: (tensor) Coords of ground truth for each prior in point-form
184 |             Shape: [num_priors, 10].
185 |         priors: (tensor) Prior boxes in center-offset form
186 |             Shape: [num_priors,4].
187 |         variances: (list[float]) Variances of priorboxes
188 |     Return:
189 |         encoded landm (tensor), Shape: [num_priors, 10]
190 |     """
191 | 
192 |     # dist b/t match center and prior's center
193 |     matched = torch.reshape(matched, (matched.size(0), 5, 2))
194 |     priors_cx = priors[:, 0].unsqueeze(1).expand(matched.size(0), 5).unsqueeze(2)
195 |     priors_cy = priors[:, 1].unsqueeze(1).expand(matched.size(0), 5).unsqueeze(2)
196 |     priors_w = priors[:, 2].unsqueeze(1).expand(matched.size(0), 5).unsqueeze(2)
197 |     priors_h = priors[:, 3].unsqueeze(1).expand(matched.size(0), 5).unsqueeze(2)
198 |     priors = torch.cat([priors_cx, priors_cy, priors_w, priors_h], dim=2)
199 |     g_cxcy = matched[:, :, :2] - priors[:, :, :2]
200 |     # encode variance
201 |     g_cxcy /= (variances[0] * priors[:, :, 2:])
202 |     # g_cxcy /= priors[:, :, 2:]
203 |     g_cxcy = g_cxcy.reshape(g_cxcy.size(0), -1)
204 |     # return target for smooth_l1_loss
205 |     return g_cxcy
206 | 
207 | 
208 | # Adapted from https://github.com/Hakuyume/chainer-ssd
209 | def decode(loc, priors, variances):
210 |     """Decode locations from predictions using priors to undo
211 |     the encoding we did for offset regression at train time.
212 |     Args:
213 |         loc (tensor): location predictions for loc layers,
214 |             Shape: [num_priors,4]
215 |         priors (tensor): Prior boxes in center-offset form.
216 |             Shape: [num_priors,4].
217 |         variances: (list[float]) Variances of priorboxes
218 |     Return:
219 |         decoded bounding box predictions
220 |     """
221 | 
222 |     boxes = torch.cat((
223 |         priors[:, :2] + loc[:, :2] * variances[0] * priors[:, 2:],
224 |         priors[:, 2:] * torch.exp(loc[:, 2:] * variances[1])), 1)
225 |     boxes[:, :2] -= boxes[:, 2:] / 2
226 |     boxes[:, 2:] += boxes[:, :2]
227 |     return boxes
228 | 
229 | def decode_landm(pre, priors, variances):
230 |     """Decode landm from predictions using priors to undo
231 |     the encoding we did for offset regression at train time.
232 |     Args:
233 |         pre (tensor): landm predictions for loc layers,
234 |             Shape: [num_priors,10]
235 |         priors (tensor): Prior boxes in center-offset form.
236 |             Shape: [num_priors,4].
237 |         variances: (list[float]) Variances of priorboxes
238 |     Return:
239 |         decoded landm predictions
240 |     """
241 |     landms = torch.cat((priors[:, :2] + pre[:, :2] * variances[0] * priors[:, 2:],
242 |                         priors[:, :2] + pre[:, 2:4] * variances[0] * priors[:, 2:],
243 |                         priors[:, :2] + pre[:, 4:6] * variances[0] * priors[:, 2:],
244 |                         priors[:, :2] + pre[:, 6:8] * variances[0] * priors[:, 2:],
245 |                         priors[:, :2] + pre[:, 8:10] * variances[0] * priors[:, 2:],
246 |                         ), dim=1)
247 |     return landms
248 | 
249 | 
250 | def log_sum_exp(x):
251 |     """Utility function for computing log_sum_exp while determining
252 |     This will be used to determine unaveraged confidence loss across
253 |     all examples in a batch.
254 |     Args:
255 |         x (Variable(tensor)): conf_preds from conf layers
256 |     """
257 |     x_max = x.data.max()
258 |     return torch.log(torch.sum(torch.exp(x-x_max), 1, keepdim=True)) + x_max
259 | 
260 | 
261 | # Original author: Francisco Massa:
262 | # https://github.com/fmassa/object-detection.torch
263 | # Ported to PyTorch by Max deGroot (02/01/2017)
264 | def nms(boxes, scores, overlap=0.5, top_k=200):
265 |     """Apply non-maximum suppression at test time to avoid detecting too many
266 |     overlapping bounding boxes for a given object.
267 |     Args:
268 |         boxes: (tensor) The location preds for the img, Shape: [num_priors,4].
269 |         scores: (tensor) The class predscores for the img, Shape:[num_priors].
270 |         overlap: (float) The overlap thresh for suppressing unnecessary boxes.
271 |         top_k: (int) The Maximum number of box preds to consider.
272 |     Return:
273 |         The indices of the kept boxes with respect to num_priors.
274 |     """
275 | 
276 |     keep = torch.Tensor(scores.size(0)).fill_(0).long()
277 |     if boxes.numel() == 0:
278 |         return keep
279 |     x1 = boxes[:, 0]
280 |     y1 = boxes[:, 1]
281 |     x2 = boxes[:, 2]
282 |     y2 = boxes[:, 3]
283 |     area = torch.mul(x2 - x1, y2 - y1)
284 |     v, idx = scores.sort(0)  # sort in ascending order
285 |     # I = I[v >= 0.01]
286 |     idx = idx[-top_k:]  # indices of the top-k largest vals
287 |     xx1 = boxes.new()
288 |     yy1 = boxes.new()
289 |     xx2 = boxes.new()
290 |     yy2 = boxes.new()
291 |     w = boxes.new()
292 |     h = boxes.new()
293 | 
294 |     # keep = torch.Tensor()
295 |     count = 0
296 |     while idx.numel() > 0:
297 |         i = idx[-1]  # index of current largest val
298 |         # keep.append(i)
299 |         keep[count] = i
300 |         count += 1
301 |         if idx.size(0) == 1:
302 |             break
303 |         idx = idx[:-1]  # remove kept element from view
304 |         # load bboxes of next highest vals
305 |         torch.index_select(x1, 0, idx, out=xx1)
306 |         torch.index_select(y1, 0, idx, out=yy1)
307 |         torch.index_select(x2, 0, idx, out=xx2)
308 |         torch.index_select(y2, 0, idx, out=yy2)
309 |         # store element-wise max with next highest score
310 |         xx1 = torch.clamp(xx1, min=x1[i])
311 |         yy1 = torch.clamp(yy1, min=y1[i])
312 |         xx2 = torch.clamp(xx2, max=x2[i])
313 |         yy2 = torch.clamp(yy2, max=y2[i])
314 |         w.resize_as_(xx2)
315 |         h.resize_as_(yy2)
316 |         w = xx2 - xx1
317 |         h = yy2 - yy1
318 |         # check sizes of xx1 and xx2.. after each iteration
319 |         w = torch.clamp(w, min=0.0)
320 |         h = torch.clamp(h, min=0.0)
321 |         inter = w*h
322 |         # IoU = i / (area(a) + area(b) - i)
323 |         rem_areas = torch.index_select(area, 0, idx)  # load remaining areas)
324 |         union = (rem_areas - inter) + area[i]
325 |         IoU = inter/union  # store result in iou
326 |         # keep only elements with an IoU <= overlap
327 |         idx = idx[IoU.le(overlap)]
328 |     return keep, count
329 | 
330 | 
331 | 


--------------------------------------------------------------------------------
/PASSify/face/utils/nms/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yukimasano/PASS/e91b4fc9daf219c765ec5816610e76306de0150c/PASSify/face/utils/nms/__init__.py


--------------------------------------------------------------------------------
/PASSify/face/utils/nms/py_cpu_nms.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Fast R-CNN
 3 | # Copyright (c) 2015 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Ross Girshick
 6 | # --------------------------------------------------------
 7 | 
 8 | import numpy as np
 9 | 
10 | def py_cpu_nms(dets, thresh):
11 |     """Pure Python NMS baseline."""
12 |     x1 = dets[:, 0]
13 |     y1 = dets[:, 1]
14 |     x2 = dets[:, 2]
15 |     y2 = dets[:, 3]
16 |     scores = dets[:, 4]
17 | 
18 |     areas = (x2 - x1 + 1) * (y2 - y1 + 1)
19 |     order = scores.argsort()[::-1]
20 | 
21 |     keep = []
22 |     while order.size > 0:
23 |         i = order[0]
24 |         keep.append(i)
25 |         xx1 = np.maximum(x1[i], x1[order[1:]])
26 |         yy1 = np.maximum(y1[i], y1[order[1:]])
27 |         xx2 = np.minimum(x2[i], x2[order[1:]])
28 |         yy2 = np.minimum(y2[i], y2[order[1:]])
29 | 
30 |         w = np.maximum(0.0, xx2 - xx1 + 1)
31 |         h = np.maximum(0.0, yy2 - yy1 + 1)
32 |         inter = w * h
33 |         ovr = inter / (areas[i] + areas[order[1:]] - inter)
34 | 
35 |         inds = np.where(ovr <= thresh)[0]
36 |         order = order[inds + 1]
37 | 
38 |     return keep
39 | 


--------------------------------------------------------------------------------
/PASSify/face/utils/timer.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Fast R-CNN
 3 | # Copyright (c) 2015 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Ross Girshick
 6 | # --------------------------------------------------------
 7 | 
 8 | import time
 9 | 
10 | 
11 | class Timer(object):
12 |     """A simple timer."""
13 |     def __init__(self):
14 |         self.total_time = 0.
15 |         self.calls = 0
16 |         self.start_time = 0.
17 |         self.diff = 0.
18 |         self.average_time = 0.
19 | 
20 |     def tic(self):
21 |         # using time.time instead of time.clock because time time.clock
22 |         # does not normalize for multithreading
23 |         self.start_time = time.time()
24 | 
25 |     def toc(self, average=True):
26 |         self.diff = time.time() - self.start_time
27 |         self.total_time += self.diff
28 |         self.calls += 1
29 |         self.average_time = self.total_time / self.calls
30 |         if average:
31 |             return self.average_time
32 |         else:
33 |             return self.diff
34 | 
35 |     def clear(self):
36 |         self.total_time = 0.
37 |         self.calls = 0
38 |         self.start_time = 0.
39 |         self.diff = 0.
40 |         self.average_time = 0.
41 | 


--------------------------------------------------------------------------------
/PASSify/face/weights/mobilenet0.25_Final.pth:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yukimasano/PASS/e91b4fc9daf219c765ec5816610e76306de0150c/PASSify/face/weights/mobilenet0.25_Final.pth


--------------------------------------------------------------------------------
/PASSify/passify.py:
--------------------------------------------------------------------------------
  1 | import glob
  2 | import sys
  3 | 
  4 | step = int(sys.argv[1])
  5 | 
  6 | if step == 0:
  7 |     dir = str(sys.argv[2])
  8 |     # enlist all files
  9 |     files = glob.glob(dir + '/*/**')
 10 |     f = open("0_all_files.txt", "a")
 11 |     i = 0
 12 |     for file in files:
 13 |         i += 1
 14 |         f.write(file +"\n")
 15 |     f.close()
 16 |     print(f"found {i} files")
 17 |     nj = (i // 80000) +1 # number of slurm jobs
 18 |     f = open('face/sbatch_face.sh','a')
 19 |     command = f"""#!/bin/bash
 20 | #SBATCH --mem=5G
 21 | #SBATCH --cpus-per-task=2
 22 | #SBATCH --time=8:00:00
 23 | #SBATCH --partition=compute                   # we only need CPUs, adapt to your cluster
 24 | #SBATCH --array=0-{nj}%50                     # being nice and not running more than 50 jobs in parallel
 25 | #SBATCH --job-name=PASSify-face
 26 | #SBATCH --constraint=10GbE
 27 | 
 28 | 
 29 | cd face/
 30 | echo $SLURM_ARRAY_TASK_ID
 31 | X=$(($SLURM_ARRAY_TASK_ID*80000))
 32 | Y=$((($SLURM_ARRAY_TASK_ID + 1)*80000))
 33 | 
 34 | in_file=_tmp_all_files_$SLURM_ARRAY_TASK_ID.txt
 35 | rm $in_file
 36 | results_dir='/facedetector_results/'
 37 | < all_files.txt tail -n +"$X" | head -n "$((Y - X))" >> $in_file
 38 | 
 39 | echo "from " $X
 40 | echo "to " $Y
 41 | 
 42 | 
 43 | # ETA 4-6Hz
 44 | python3 -W ignore main_face_detector.py \
 45 |   --input_txt=$in_file \
 46 |   --save_folder=$results_dir \
 47 |   --cpu
 48 | """
 49 |     f.write(command)
 50 |     f.close()
 51 | 
 52 | 
 53 | if step == 1:
 54 |     files = glob.glob('facedetector_results/noface_index/*/**')
 55 |     f = open("1_no_faces.txt", "a")
 56 |     i = 0
 57 |     for file in files:
 58 |         i += 1
 59 |         f.write(file +"\n")
 60 |     f.close()
 61 |     print(f"left with {i} images that do not contain faces")
 62 |     nj = (i // 80000) +1 # number of slurm jobs
 63 |     f = open('person/sbatch_person.sh','a')
 64 |     command = f"""#!/bin/bash
 65 | #SBATCH --mem=10G
 66 | #SBATCH --cpus-per-task=5
 67 | #SBATCH --time=8:00:00                          # this is on the low-end, jobs might finish quicker.
 68 | #SBATCH --gres=gpu:1
 69 | #SBATCH --partition=gpu                         # we need GPUs, adapt to your cluster
 70 | #SBATCH --job-name=PASSify-person
 71 | #SBATCH --array=0-{nj}%50                       # being nice and not running more than 50 jobs in parallel
 72 | 
 73 | 
 74 | 
 75 | echo $SLURM_ARRAY_TASK_ID
 76 | X=$(($SLURM_ARRAY_TASK_ID*80000))
 77 | Y=$((($SLURM_ARRAY_TASK_ID + 1)*80000))
 78 | 
 79 | in_file=_tmp_noface_$SLURM_ARRAY_TASK_ID.txt
 80 | rm $in_file
 81 | results_dir='persondetector_results/'
 82 | mkdir -p $results_dir
 83 | < 1_no_faces.txt tail -n +"$X" | head -n "$((Y - X))" >> $in_file
 84 | 
 85 | echo "from " $X
 86 | echo "to " $Y
 87 | 
 88 | # ETA 4-6Hz
 89 | python3 -W -W ignore main_person_detector.py \
 90 |   --img_list=$in_file \
 91 |   --save_folder=$results_dir 
 92 | """
 93 |     f.write(command)
 94 |     f.close()
 95 | 
 96 | if step == 2:
 97 |     files = glob.glob('persondetector_results/noperson_index/*/**')
 98 |     f = open("2_no_faces__no_person.txt", "a")
 99 |     i = 0
100 |     for file in files:
101 |         i += 1
102 |         f.write(file +"\n")
103 |     f.close()
104 |     print(f"left with {i} images that do not contain faces nor persons")
105 |     print("final file of images left can be found in: 2_no_faces__no_person.txt")
106 |     print("Note that the results are from automated algorithms,"
107 |           " and thus do not work 100% well and might work differently well on different humans, possibly introducing bias. "
108 |           "For all real applications, please thoroughly run human evaluations.")
109 | 


--------------------------------------------------------------------------------
/PASSify/person/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yukimasano/PASS/e91b4fc9daf219c765ec5816610e76306de0150c/PASSify/person/__init__.py


--------------------------------------------------------------------------------
/PASSify/person/cascade_rcnn.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "GeneralizedRCNN"
 3 |   WEIGHTS: "/scratch/shared/beegfs/yuki/adiwol/experts/cascade_rcnn.pkl"
 4 |   BACKBONE:
 5 |     NAME: "build_resnet_fpn_backbone"
 6 |   MASK_ON: True
 7 |   RESNETS:
 8 |     OUT_FEATURES: ["res2", "res3", "res4", "res5"]
 9 |     DEPTH: 50
10 |   FPN:
11 |     IN_FEATURES: ["res2", "res3", "res4", "res5"]
12 |   ANCHOR_GENERATOR:
13 |     SIZES: [[32], [64], [128], [256], [512]]  # One size for each in feature map
14 |     ASPECT_RATIOS: [[0.5, 1.0, 2.0]]  # Three aspect ratios (same for all in feature maps)
15 |   RPN:
16 |     IN_FEATURES: ["p2", "p3", "p4", "p5", "p6"]
17 |     PRE_NMS_TOPK_TRAIN: 2000  # Per FPN level
18 |     PRE_NMS_TOPK_TEST: 1000  # Per FPN level
19 |     POST_NMS_TOPK_TRAIN: 2000
20 |     POST_NMS_TOPK_TEST: 1000
21 | 
22 |   ROI_HEADS:
23 |     NAME: CascadeROIHeads
24 |     IN_FEATURES: ["p2", "p3", "p4", "p5"]
25 |   ROI_BOX_HEAD:
26 |     NAME: "FastRCNNConvFCHead"
27 |     CLS_AGNOSTIC_BBOX_REG: True
28 |     NUM_FC: 2
29 |     POOLER_RESOLUTION: 7
30 |   ROI_MASK_HEAD:
31 |     NAME: "MaskRCNNConvUpsampleHead"
32 |     NUM_CONV: 4
33 |     POOLER_RESOLUTION: 14
34 | DATASETS:
35 |   TRAIN: ("coco_2017_train",)
36 |   TEST: ("coco_2017_val",)
37 | SOLVER:
38 |   IMS_PER_BATCH: 16
39 |   BASE_LR: 0.02
40 |   STEPS: (60000, 80000)
41 |   MAX_ITER: 90000
42 | INPUT:
43 |   MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
44 | VERSION: 2


--------------------------------------------------------------------------------
/PASSify/person/main_person_detector.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import argparse
  3 | import multiprocessing as mp
  4 | import os
  5 | import time
  6 | import tqdm
  7 | 
  8 | from detectron2.config import get_cfg
  9 | from detectron2.data.detection_utils import read_image
 10 | from detectron2.utils.logger import setup_logger
 11 | 
 12 | from detectron2.data import MetadataCatalog
 13 | from detectron2.engine.defaults import DefaultPredictor
 14 | from detectron2.utils.visualizer import ColorMode, Visualizer
 15 | 
 16 | 
 17 | class Runner(object):
 18 |     def __init__(self, cfg, instance_mode=ColorMode.IMAGE, parallel=False):
 19 |         """
 20 |         Args:
 21 |             cfg (CfgNode):
 22 |             instance_mode (ColorMode):
 23 |             parallel (bool): whether to run the model in different processes from visualization.
 24 |                 Useful since the visualization logic can be slow.
 25 |         """
 26 |         self.metadata = MetadataCatalog.get(
 27 |             cfg.DATASETS.TEST[0] if len(cfg.DATASETS.TEST) else "__unused"
 28 |         )
 29 |         self.cpu_device = torch.device("cpu")
 30 |         self.instance_mode = instance_mode
 31 | 
 32 |         self.parallel = parallel
 33 |         self.predictor = DefaultPredictor(cfg)
 34 | 
 35 | 
 36 | def setup_cfg(args):
 37 |     # load config from file and command-line arguments
 38 |     cfg = get_cfg()
 39 |     cfg.merge_from_file(args.config_file)
 40 |     cfg.merge_from_list(args.opts)
 41 |     # Set score_threshold for builtin models
 42 |     cfg.MODEL.RETINANET.SCORE_THRESH_TEST = args.confidence_threshold
 43 |     cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = args.confidence_threshold
 44 |     cfg.MODEL.PANOPTIC_FPN.COMBINE.INSTANCES_CONFIDENCE_THRESH = args.confidence_threshold
 45 |     cfg.freeze()
 46 |     return cfg
 47 | 
 48 | 
 49 | def get_parser():
 50 |     parser = argparse.ArgumentParser(description="Detectron2 demo for builtin configs")
 51 |     parser.add_argument(
 52 |         "--config-file",
 53 |         default="cascade_rcnn.yaml",
 54 |         metavar="FILE",
 55 |         help="path to config file",
 56 |     )
 57 |     parser.add_argument(
 58 |         "--img_list",
 59 |         type=str,
 60 |         default='',
 61 |     )
 62 |     parser.add_argument(
 63 |         "--output",
 64 |         help="outputfolder",
 65 |     )
 66 |     parser.add_argument(
 67 |         "--confidence-threshold",
 68 |         type=float,
 69 |         default=0.5,
 70 |         help="Minimum score for instance predictions to be shown",
 71 |     )
 72 |     parser.add_argument(
 73 |         "--opts",
 74 |         help="Modify config options using the command-line 'KEY VALUE' pairs",
 75 |         default=[],
 76 |         nargs=argparse.REMAINDER,
 77 |     )
 78 |     parser.add_argument('--save_folder', default='./testout/', type=str, help='Dir to save txt results')
 79 | 
 80 |     return parser
 81 | 
 82 | 
 83 | def touch(fname, times=None):
 84 |     with open(fname, 'a'):
 85 |         os.utime(fname, times)
 86 | 
 87 | if __name__ == "__main__":
 88 |     mp.set_start_method("spawn", force=True)
 89 |     args = get_parser().parse_args()
 90 |     setup_logger(name="fvcore")
 91 |     logger = setup_logger()
 92 |     logger.info("Arguments: " + str(args))
 93 | 
 94 |     cfg = setup_cfg(args)
 95 | 
 96 |     runner = Runner(cfg)
 97 |     if args.img_list != '':
 98 |         f_list = open(args.img_list, 'r')
 99 |         test_dataset = f_list.readlines()
100 |         test_dataset = [_d.strip() for _d in test_dataset]
101 |     print(f'done preparing dataset!, N={len(test_dataset)}', flush=True)
102 | 
103 |     for image_path in tqdm.tqdm(test_dataset):
104 |         # use PIL, to be consistent with evaluation
105 |         img_name = image_path.split('/')[-1]
106 |         subfolder = image_path.split('/')[-2]
107 |         save_name = os.path.join(args.save_folder, subfolder, img_name.split('.')[0] + ".txt")
108 | 
109 |         img = read_image(image_path, format="BGR")
110 |         start_time = time.time()
111 |         predictions = runner.predictor(img)
112 |         logger.info(
113 |             "{}: {} in {:.2f}s".format(
114 |                 img_name,
115 |                 "detected {} instances".format(len(predictions["instances"]))
116 |                 if "instances" in predictions
117 |                 else "finished",
118 |                 time.time() - start_time,
119 |                 )
120 |         )
121 |         classes, scores = predictions["instances"].pred_classes.cpu(), predictions["instances"].scores.cpu()
122 |         # --------------------------------------------------------------------
123 |         name_ifperson = os.path.join(args.save_folder, 'person_index', subfolder,  img_name)
124 |         name_ifnoperson = os.path.join(args.save_folder, 'noperson_index', subfolder,  img_name )
125 | 
126 |         dirname = os.path.dirname(save_name)
127 |         if not os.path.isdir(dirname):
128 |             os.makedirs(dirname)
129 |         with open(save_name, "w") as fd:
130 |             has_person = 1 if any([int(c) == 0 for c in classes]) else 0
131 |             fd.write(str(has_person) + " \n")
132 |             for _c, _s in zip(classes, scores):
133 |                 line = str(_c.item()) + ":" + str(_s.item()) + " \n"
134 |                 fd.write(line)
135 |         if has_person:
136 |             if not os.path.isdir(os.path.dirname(name_ifperson)):
137 |                  os.makedirs(os.path.dirname(name_ifperson))
138 |             touch(name_ifperson)
139 |         else:
140 |             if not os.path.isdir(os.path.dirname(name_ifnoperson)):
141 |                 os.makedirs(os.path.dirname(name_ifnoperson))
142 |             touch(name_ifnoperson)


--------------------------------------------------------------------------------
/PASSify/person/sbatch_person_example.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | #SBATCH --mem=10G
 3 | #SBATCH --cpus-per-task=5
 4 | #SBATCH --time=5:00:00
 5 | #SBATCH --gres=gpu:1
 6 | #SBATCH --partition=gpu
 7 | #SBATCH --job-name=PASSify-person
 8 | #SBATCH --array=0-175%50  # being nice and not running more than 50 jobs in parallel
 9 | 
10 | 
11 | 
12 | echo $SLURM_ARRAY_TASK_ID
13 | X=$((${SLURM_ARRAY_TASK_ID}*80000))
14 | Y=$(((${SLURM_ARRAY_TASK_ID} + 1)*80000))
15 | 
16 | in_file=_tmp_noface_${SLURM_ARRAY_TASK_ID}.txt
17 | rm ${in_file}
18 | results_dir='persondetector_results/'
19 | mkdir -p ${results_dir}
20 | < 1_no_faces.txt tail -n +"$X" | head -n "$((Y - X))" >> ${in_file}
21 | 
22 | echo "from " ${X}
23 | echo "to " ${Y}
24 | 
25 | # ETA 4-6Hz
26 | /scratch/shared/beegfs/yuki/envs/py37/bin/python3 -W ignore main_person_detector.py \
27 |   --img_list=${in_file} \
28 |   --save_folder=${results_dir}


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # PASS: Pictures without humAns for Self-Supervised Pretraining 
 2 | **TL;DR:** An ImageNet replacement dataset for self-supervised pretraining without humans 
 3 | 
 4 | ![img.png](img.png?style=centerme)
 5 | 
 6 | 
 7 | 
 8 | ## Content
 9 | PASS is a large-scale image dataset that does not include any humans, human parts, or other personally identifiable information that can be used for high-quality pretraining while significantly reducing privacy concerns.
10 | 
11 | ![pass.gif](pass.gif)
12 | 
13 | ## Download the dataset
14 | 
15 | The quickest way:
16 | ```sh
17 | git clone https://github.com/yukimasano/PASS
18 | cd PASS
19 | source download.sh # maybe change the directory where you want to download it
20 | ```
21 | Generally: all information is on our [webpage](https://www.robots.ox.ac.uk/~vgg/research/pass/).
22 | 
23 | For downloading the dataset, please visit our [dataset on zenodo](https://zenodo.org/record/6615455). There you can download it in tar files and find the meta-data.
24 | 
25 | You can also download the images from their AWS urls, from [here](https://www.robots.ox.ac.uk/~vgg/research/pass/pass_urls.txt).
26 | 
27 | ## Pretrained models
28 | | Pretraining | Method                                                                 | Epochs | IN-1k Acc. | Places205 Acc. |                                                                                                                                              |
29 | |-------------|------------------------------------------------------------------------|--------|------------|----------------|----------------------------------------------------------------------------------------------------------------------------------------------|
30 | | (IN-1k)     | [MoCo-v2 ](https://github.com/facebookresearch/moco)                   | 200    | 60.6       | 50.1           | [visit MoCo-v2 repo](https://github.com/facebookresearch/moco#models)                                                                        |
31 | | PASS        | [MoCo-v2](https://github.com/facebookresearch/moco)                    | 180    | 59.1       | 52.8           | [R50 weights](https://www.robots.ox.ac.uk/~vgg/research/pass/pretrained_models/moco_v2_180ep_of200ep.pth.tar)                                |
32 | | PASS        | [MoCo-v2](https://github.com/facebookresearch/moco)                    | 200    | 59.5       | 52.8           | [R50 weights](https://www.robots.ox.ac.uk/~vgg/research/pass/pretrained_models/moco_v2_200ep.pth.tar)                                        |
33 | | PASS        | [MoCo-v2](https://github.com/facebookresearch/moco)                    | 800    | 61.2       | 54.0           | [R50 weights](https://www.robots.ox.ac.uk/~vgg/research/pass/pretrained_models/moco_v2_800ep.pth.tar)                                        |
34 | | PASS        | [MoCo-v2 (R18)](https://github.com/facebookresearch/moco)              | 800    | 45.3       | 44.4           | [R18 weights](https://www.robots.ox.ac.uk/~vgg/research/pass/pretrained_models/moco_v2_r18_800ep.pth.tar)                                    |
35 | | PASS        | [MoCo-v2-CLD](https://github.com/frank-xwang/CLD-UnsupervisedLearning) | 200    | 60.2       | 53.1           | [R50 weights](https://www.robots.ox.ac.uk/~vgg/research/pass/pretrained_models/moco_v2_CLD_200ep.pth.tar)                                    |
36 | | PASS        | [SwAV](https://github.com/facebookresearch/swav)                       | 200    | 60.8       | 55.5           | [R50 weights](https://www.robots.ox.ac.uk/~vgg/research/pass/pretrained_models/swav_200ep.pth.tar)                                           |
37 | | PASS        | [DINO](https://github.com/facebookresearch/dino)                       | 100    | 61.3       | 54.6           | [ViT S16 weights](https://www.robots.ox.ac.uk/~vgg/research/pass/pretrained_models/dino_deit_100ep.pth.tar)                                  |
38 | | PASS        | [DINO](https://github.com/facebookresearch/dino)                       | 300    | 65.0       | 55.7           | [ViT S16 weights](https://www.robots.ox.ac.uk/~vgg/research/pass/pretrained_models/dino_deit_300ep_ttemp0o07_warumup30ep_normlayerF.pth.tar) |
39 | 
40 | 
41 | In the table above we give the download links to the full checkpoints (including momentum encoder etc.) to the models we've trained. 
42 | For comparison, we include MoCo-v2 trained on ILSVRC-12 ("IN-1k") and report linear probing performance on IN-1k and Places205.
43 | 
44 | ## Pretrained models from PyTorch Hub
45 | ```python
46 | import torch
47 | vits16_100ep = torch.hub.load('yukimasano/PASS:main', 'dino_100ep_vits16')
48 | vits16 = torch.hub.load('yukimasano/PASS:main', 'dino_vits16')
49 | r50_swav_200ep = torch.hub.load('yukimasano/PASS:main', 'swav_resnet50')
50 | r50_moco_800ep = torch.hub.load('yukimasano/PASS:main', 'moco_resnet50')
51 | r50_moco_cld_200ep = torch.hub.load('yukimasano/PASS:main', 'moco_cld_resnet50')
52 | ```  
53 | 
54 | ## PASSify your dataset
55 | In the folder [PASSify](PASSify/README.md) of this repo, you can find automated scripts that try to remove humans from image datasets.
56 |   
57 | ### Contribute your models
58 | 
59 | Please let us know if you have a model pretrained on this dataset and I will add this to the list above.
60 | 
61 | ## Citation
62 | ```
63 | @Article{asano21pass,
64 | author = "Yuki M. Asano and Christian Rupprecht and Andrew Zisserman and Andrea Vedaldi",
65 | title = "PASS: An ImageNet replacement for self-supervised pretraining without humans",
66 | journal = "NeurIPS Track on Datasets and Benchmarks",
67 | year = "2021"
68 | } 
69 | ```
70 | 


--------------------------------------------------------------------------------
/download.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # download files
 4 | echo "downloading dataset tar files"
 5 | for PART in 0 1 2 3 4 5 6 7 8 9
 6 | do
 7 |    echo "download part" $PART
 8 |    curl  https://zenodo.org/record/6615455/files/PASS.${PART}.tar --output PASS.${PART}.tar
 9 | done
10 | 
11 | # extract dataset
12 | ## will create dataset with images in PASS_dataset/dummy_folder/img-hash.jpeg
13 | for file in *.tar; do tar -xf "$file"; done
14 | 
15 | # you can use this now e.g. with torchvision.datasets.ImageFolder('/dir/to/PASS')
16 | 


--------------------------------------------------------------------------------
/hubconf.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | from torchvision.models.resnet import resnet50 as __resnet50
  3 | 
  4 | import vision_transformer as vits
  5 | 
  6 | dependencies = ["torch", "torchvision"]
  7 | 
  8 | def dino_vits16(pretrained=True, **kwargs):
  9 |     """
 10 |     ViT-Small/16x16 pre-trained with DINO for 300 epochs, teacher-temp=0.07, warmup epochs=30, norm-layer=False
 11 |     """
 12 |     model = vits.__dict__["vit_small"](patch_size=16, num_classes=0, **kwargs)
 13 |     if pretrained:
 14 |         state_dict = torch.hub.load_state_dict_from_url(
 15 |             url="https://www.robots.ox.ac.uk/~vgg/research/pass/pretrained_models/dino_deit_300ep_ttemp0o07_warumup30ep_normlayerF.pth.tar",
 16 |             map_location="cpu",
 17 |         )['teacher']
 18 |         state_dict = __clean_ckpt(state_dict)
 19 |         msg = model.load_state_dict(state_dict, strict=False)
 20 |         print(msg)
 21 |     return model
 22 | 
 23 | 
 24 | def dino_100ep_vits16(pretrained=True, **kwargs):
 25 |     """
 26 |     ViT-Small/16x16 pre-trained with DINO.
 27 |     """
 28 |     model = vits.__dict__["vit_small"](patch_size=16, num_classes=0, **kwargs)
 29 |     if pretrained:
 30 |         state_dict = torch.hub.load_state_dict_from_url(
 31 |             url="https://www.robots.ox.ac.uk/~vgg/research/pass/pretrained_models/dino_deit_100ep.pth.tar",
 32 |             map_location="cpu",
 33 |         )['teacher']
 34 |         state_dict = __clean_ckpt(state_dict)
 35 |         msg = model.load_state_dict(state_dict, strict=False)
 36 |         print(msg)
 37 |     return model
 38 | 
 39 | 
 40 | def moco_resnet50(pretrained=True, **kwargs):
 41 |     """
 42 |     ResNet-50 pre-trained with MoCo-v2 for 800epochs
 43 |     """
 44 |     model = __resnet50(pretrained=False, **kwargs)
 45 |     model.fc = torch.nn.Identity()
 46 |     if pretrained:
 47 |         state_dict = torch.hub.load_state_dict_from_url(
 48 |             url="https://www.robots.ox.ac.uk/~vgg/research/pass/pretrained_models/moco_v2_800ep.pth.tar",
 49 |             map_location="cpu",
 50 |         )['state_dict']
 51 |         state_dict = __clean_ckpt(state_dict)
 52 |         msg = model.load_state_dict(state_dict, strict=False)
 53 |         print(msg)
 54 |     return model
 55 | 
 56 | def moco_resnet50_200ep(pretrained=True, **kwargs):
 57 |     """
 58 |     ResNet-50 pre-trained with MoCo-v2 for 200epochs
 59 |     """
 60 |     model = __resnet50(pretrained=False, **kwargs)
 61 |     model.fc = torch.nn.Identity()
 62 |     if pretrained:
 63 |         state_dict = torch.hub.load_state_dict_from_url(
 64 |             url="https://www.robots.ox.ac.uk/~vgg/research/pass/pretrained_models/moco_v2_200ep.pth.tar",
 65 |             map_location="cpu",
 66 |         )['state_dict']
 67 |         state_dict = __clean_ckpt(state_dict)
 68 |         msg = model.load_state_dict(state_dict, strict=False)
 69 |         print(msg)
 70 |     return model
 71 | 
 72 | def moco_cld_resnet50(pretrained=True, **kwargs):
 73 |     """
 74 |     ResNet-50 pre-trained with MoCo-v2 for 200epochs
 75 |     """
 76 |     model = __resnet50(pretrained=False, **kwargs)
 77 |     model.fc = torch.nn.Identity()
 78 |     if pretrained:
 79 |         state_dict = torch.hub.load_state_dict_from_url(
 80 |             url="https://www.robots.ox.ac.uk/~vgg/research/pass/pretrained_models/moco_v2_CLD_200ep.pth.tar",
 81 |             map_location="cpu",
 82 |         )['state_dict']
 83 |         state_dict = __clean_ckpt(state_dict)
 84 |         msg = model.load_state_dict(state_dict, strict=False)
 85 |         print(msg)
 86 |     return model
 87 | 
 88 | def swav_resnet50(pretrained=True, **kwargs):
 89 |     """
 90 |     ResNet-50 pre-trained with SwAV for 200 epochs. 2 large crops 6 small ones.
 91 |     """
 92 |     model = __resnet50(pretrained=False, **kwargs)
 93 |     model.fc = torch.nn.Identity()
 94 |     if pretrained:
 95 |         state_dict = torch.hub.load_state_dict_from_url(
 96 |             url="https://www.robots.ox.ac.uk/~vgg/research/pass/pretrained_models/swav_200ep.pth.tar",
 97 |             map_location="cpu",
 98 |         )['state_dict']
 99 |         state_dict = __clean_ckpt(state_dict)
100 |         msg = model.load_state_dict(state_dict, strict=False)
101 |         print(msg)
102 |     return model
103 | 
104 | def __clean_ckpt(state_dict):
105 |     is_moco = any(['module.encoder_q' in k for k in state_dict.keys()])
106 |     if is_moco:
107 |         state_dict = {k.replace('module.encoder_q.',''):v for k,v in state_dict.items() if 'encoder_q' in k}
108 |     else:
109 |         state_dict = {k.replace('module.',''):v for k,v in state_dict.items()}
110 |     return state_dict
111 | 


--------------------------------------------------------------------------------
/img.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yukimasano/PASS/e91b4fc9daf219c765ec5816610e76306de0150c/img.png


--------------------------------------------------------------------------------
/pass.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yukimasano/PASS/e91b4fc9daf219c765ec5816610e76306de0150c/pass.gif


--------------------------------------------------------------------------------
/version_history.txt:
--------------------------------------------------------------------------------
 1 | 21.09.2021:
 2 | v1: initial release: 1,440,191 images
 3 | 
 4 | 14.10.2021
 5 | v2: Removed 472 images, now 1,439,719 images. Thanks to the Know-your-data (https://knowyourdata-tfds.withgoogle.com/#dataset=pass) page (published on the 13.10.2021), we were able to identify 472 further images that contained humans.
 6 | Most images that we have removed only contained human depictions (e.g. in newspapers, black-white portraits, ads) in some background, and very few were actual photographs of people (<50). We used KYD to sort images both by face area and face probablity to find all images that were missed in v1.
 7 | We have further added more metadata that will aid further analysis in KYD in the future.
 8 | 
 9 | 07.04.2022
10 | v3: Compared to v2.0 we have removed further 131 images that mostly contained faces, other body parts or images of tattoos.


--------------------------------------------------------------------------------
/vision_transformer.py:
--------------------------------------------------------------------------------
  1 | """
  2 | copied from DINO: https://github.com/facebookresearch/dino
  3 | Mostly copy-paste from timm library.
  4 | https://github.com/rwightman/pytorch-image-models/blob/master/timm/models/vision_transformer.py
  5 | """
  6 | import math
  7 | from functools import partial
  8 | 
  9 | import torch
 10 | import torch.nn as nn
 11 | 
 12 | 
 13 | def _no_grad_trunc_normal_(tensor, mean, std, a, b):
 14 |     # Cut & paste from PyTorch official master until it's in a few official releases - RW
 15 |     # Method based on https://people.sc.fsu.edu/~jburkardt/presentations/truncated_normal.pdf
 16 |     def norm_cdf(x):
 17 |         # Computes standard normal cumulative distribution function
 18 |         return (1. + math.erf(x / math.sqrt(2.))) / 2.
 19 | 
 20 |     if (mean < a - 2 * std) or (mean > b + 2 * std):
 21 |         warnings.warn("mean is more than 2 std from [a, b] in nn.init.trunc_normal_. "
 22 |                       "The distribution of values may be incorrect.",
 23 |                       stacklevel=2)
 24 | 
 25 |     with torch.no_grad():
 26 |         # Values are generated by using a truncated uniform distribution and
 27 |         # then using the inverse CDF for the normal distribution.
 28 |         # Get upper and lower cdf values
 29 |         l = norm_cdf((a - mean) / std)
 30 |         u = norm_cdf((b - mean) / std)
 31 | 
 32 |         # Uniformly fill tensor with values from [l, u], then translate to
 33 |         # [2l-1, 2u-1].
 34 |         tensor.uniform_(2 * l - 1, 2 * u - 1)
 35 | 
 36 |         # Use inverse cdf transform for normal distribution to get truncated
 37 |         # standard normal
 38 |         tensor.erfinv_()
 39 | 
 40 |         # Transform to proper mean, std
 41 |         tensor.mul_(std * math.sqrt(2.))
 42 |         tensor.add_(mean)
 43 | 
 44 |         # Clamp to ensure it's in the proper range
 45 |         tensor.clamp_(min=a, max=b)
 46 |         return tensor
 47 | 
 48 | 
 49 | def trunc_normal_(tensor, mean=0., std=1., a=-2., b=2.):
 50 |     # type: (Tensor, float, float, float, float) -> Tensor
 51 |     return _no_grad_trunc_normal_(tensor, mean, std, a, b)
 52 | 
 53 | 
 54 | def drop_path(x, drop_prob: float = 0., training: bool = False):
 55 |     if drop_prob == 0. or not training:
 56 |         return x
 57 |     keep_prob = 1 - drop_prob
 58 |     shape = (x.shape[0],) + (1,) * (x.ndim - 1)  # work with diff dim tensors, not just 2D ConvNets
 59 |     random_tensor = keep_prob + torch.rand(shape, dtype=x.dtype, device=x.device)
 60 |     random_tensor.floor_()  # binarize
 61 |     output = x.div(keep_prob) * random_tensor
 62 |     return output
 63 | 
 64 | 
 65 | class DropPath(nn.Module):
 66 |     """Drop paths (Stochastic Depth) per sample  (when applied in main path of residual blocks).
 67 |     """
 68 |     def __init__(self, drop_prob=None):
 69 |         super(DropPath, self).__init__()
 70 |         self.drop_prob = drop_prob
 71 | 
 72 |     def forward(self, x):
 73 |         return drop_path(x, self.drop_prob, self.training)
 74 | 
 75 | 
 76 | class Mlp(nn.Module):
 77 |     def __init__(self, in_features, hidden_features=None, out_features=None, act_layer=nn.GELU, drop=0.):
 78 |         super().__init__()
 79 |         out_features = out_features or in_features
 80 |         hidden_features = hidden_features or in_features
 81 |         self.fc1 = nn.Linear(in_features, hidden_features)
 82 |         self.act = act_layer()
 83 |         self.fc2 = nn.Linear(hidden_features, out_features)
 84 |         self.drop = nn.Dropout(drop)
 85 | 
 86 |     def forward(self, x):
 87 |         x = self.fc1(x)
 88 |         x = self.act(x)
 89 |         x = self.drop(x)
 90 |         x = self.fc2(x)
 91 |         x = self.drop(x)
 92 |         return x
 93 | 
 94 | 
 95 | class Attention(nn.Module):
 96 |     def __init__(self, dim, num_heads=8, qkv_bias=False, qk_scale=None, attn_drop=0., proj_drop=0.):
 97 |         super().__init__()
 98 |         self.num_heads = num_heads
 99 |         head_dim = dim // num_heads
100 |         self.scale = qk_scale or head_dim ** -0.5
101 | 
102 |         self.qkv = nn.Linear(dim, dim * 3, bias=qkv_bias)
103 |         self.attn_drop = nn.Dropout(attn_drop)
104 |         self.proj = nn.Linear(dim, dim)
105 |         self.proj_drop = nn.Dropout(proj_drop)
106 | 
107 |     def forward(self, x):
108 |         B, N, C = x.shape
109 |         qkv = self.qkv(x).reshape(B, N, 3, self.num_heads, C // self.num_heads).permute(2, 0, 3, 1, 4)
110 |         q, k, v = qkv[0], qkv[1], qkv[2]
111 | 
112 |         attn = (q @ k.transpose(-2, -1)) * self.scale
113 |         attn = attn.softmax(dim=-1)
114 |         attn = self.attn_drop(attn)
115 | 
116 |         x = (attn @ v).transpose(1, 2).reshape(B, N, C)
117 |         x = self.proj(x)
118 |         x = self.proj_drop(x)
119 |         return x, attn
120 | 
121 | 
122 | class Block(nn.Module):
123 |     def __init__(self, dim, num_heads, mlp_ratio=4., qkv_bias=False, qk_scale=None, drop=0., attn_drop=0.,
124 |                  drop_path=0., act_layer=nn.GELU, norm_layer=nn.LayerNorm):
125 |         super().__init__()
126 |         self.norm1 = norm_layer(dim)
127 |         self.attn = Attention(
128 |             dim, num_heads=num_heads, qkv_bias=qkv_bias, qk_scale=qk_scale, attn_drop=attn_drop, proj_drop=drop)
129 |         self.drop_path = DropPath(drop_path) if drop_path > 0. else nn.Identity()
130 |         self.norm2 = norm_layer(dim)
131 |         mlp_hidden_dim = int(dim * mlp_ratio)
132 |         self.mlp = Mlp(in_features=dim, hidden_features=mlp_hidden_dim, act_layer=act_layer, drop=drop)
133 | 
134 |     def forward(self, x, return_attention=False):
135 |         y, attn = self.attn(self.norm1(x))
136 |         if return_attention:
137 |             return attn
138 |         x = x + self.drop_path(y)
139 |         x = x + self.drop_path(self.mlp(self.norm2(x)))
140 |         return x
141 | 
142 | 
143 | class PatchEmbed(nn.Module):
144 |     """ Image to Patch Embedding
145 |     """
146 |     def __init__(self, img_size=224, patch_size=16, in_chans=3, embed_dim=768):
147 |         super().__init__()
148 |         num_patches = (img_size // patch_size) * (img_size // patch_size)
149 |         self.img_size = img_size
150 |         self.patch_size = patch_size
151 |         self.num_patches = num_patches
152 | 
153 |         self.proj = nn.Conv2d(in_chans, embed_dim, kernel_size=patch_size, stride=patch_size)
154 | 
155 |     def forward(self, x):
156 |         B, C, H, W = x.shape
157 |         x = self.proj(x).flatten(2).transpose(1, 2)
158 |         return x
159 | 
160 | 
161 | class VisionTransformer(nn.Module):
162 |     """ Vision Transformer """
163 |     def __init__(self, img_size=[224], patch_size=16, in_chans=3, num_classes=0, embed_dim=768, depth=12,
164 |                  num_heads=12, mlp_ratio=4., qkv_bias=False, qk_scale=None, drop_rate=0., attn_drop_rate=0.,
165 |                  drop_path_rate=0., norm_layer=nn.LayerNorm, **kwargs):
166 |         super().__init__()
167 |         self.num_features = self.embed_dim = embed_dim
168 | 
169 |         self.patch_embed = PatchEmbed(
170 |             img_size=img_size[0], patch_size=patch_size, in_chans=in_chans, embed_dim=embed_dim)
171 |         num_patches = self.patch_embed.num_patches
172 | 
173 |         self.cls_token = nn.Parameter(torch.zeros(1, 1, embed_dim))
174 |         self.pos_embed = nn.Parameter(torch.zeros(1, num_patches + 1, embed_dim))
175 |         self.pos_drop = nn.Dropout(p=drop_rate)
176 | 
177 |         dpr = [x.item() for x in torch.linspace(0, drop_path_rate, depth)]  # stochastic depth decay rule
178 |         self.blocks = nn.ModuleList([
179 |             Block(
180 |                 dim=embed_dim, num_heads=num_heads, mlp_ratio=mlp_ratio, qkv_bias=qkv_bias, qk_scale=qk_scale,
181 |                 drop=drop_rate, attn_drop=attn_drop_rate, drop_path=dpr[i], norm_layer=norm_layer)
182 |             for i in range(depth)])
183 |         self.norm = norm_layer(embed_dim)
184 | 
185 |         # Classifier head
186 |         self.head = nn.Linear(embed_dim, num_classes) if num_classes > 0 else nn.Identity()
187 | 
188 |         trunc_normal_(self.pos_embed, std=.02)
189 |         trunc_normal_(self.cls_token, std=.02)
190 |         self.apply(self._init_weights)
191 | 
192 |     def _init_weights(self, m):
193 |         if isinstance(m, nn.Linear):
194 |             trunc_normal_(m.weight, std=.02)
195 |             if isinstance(m, nn.Linear) and m.bias is not None:
196 |                 nn.init.constant_(m.bias, 0)
197 |         elif isinstance(m, nn.LayerNorm):
198 |             nn.init.constant_(m.bias, 0)
199 |             nn.init.constant_(m.weight, 1.0)
200 | 
201 |     def interpolate_pos_encoding(self, x, w, h):
202 |         npatch = x.shape[1] - 1
203 |         N = self.pos_embed.shape[1] - 1
204 |         if npatch == N and w == h:
205 |             return self.pos_embed
206 |         class_pos_embed = self.pos_embed[:, 0]
207 |         patch_pos_embed = self.pos_embed[:, 1:]
208 |         dim = x.shape[-1]
209 |         w0 = w // self.patch_embed.patch_size
210 |         h0 = h // self.patch_embed.patch_size
211 |         # we add a small number to avoid floating point error in the interpolation
212 |         # see discussion at https://github.com/facebookresearch/dino/issues/8
213 |         w0, h0 = w0 + 0.1, h0 + 0.1
214 |         patch_pos_embed = nn.functional.interpolate(
215 |             patch_pos_embed.reshape(1, int(math.sqrt(N)), int(math.sqrt(N)), dim).permute(0, 3, 1, 2),
216 |             scale_factor=(w0 / math.sqrt(N), h0 / math.sqrt(N)),
217 |             mode='bicubic',
218 |         )
219 |         assert int(w0) == patch_pos_embed.shape[-2] and int(h0) == patch_pos_embed.shape[-1]
220 |         patch_pos_embed = patch_pos_embed.permute(0, 2, 3, 1).view(1, -1, dim)
221 |         return torch.cat((class_pos_embed.unsqueeze(0), patch_pos_embed), dim=1)
222 | 
223 |     def prepare_tokens(self, x):
224 |         B, nc, w, h = x.shape
225 |         x = self.patch_embed(x)  # patch linear embedding
226 | 
227 |         # add the [CLS] token to the embed patch tokens
228 |         cls_tokens = self.cls_token.expand(B, -1, -1)
229 |         x = torch.cat((cls_tokens, x), dim=1)
230 | 
231 |         # add positional encoding to each token
232 |         x = x + self.interpolate_pos_encoding(x, w, h)
233 | 
234 |         return self.pos_drop(x)
235 | 
236 |     def forward(self, x):
237 |         x = self.prepare_tokens(x)
238 |         for blk in self.blocks:
239 |             x = blk(x)
240 |         x = self.norm(x)
241 |         return x[:, 0]
242 | 
243 |     def get_last_selfattention(self, x):
244 |         x = self.prepare_tokens(x)
245 |         for i, blk in enumerate(self.blocks):
246 |             if i < len(self.blocks) - 1:
247 |                 x = blk(x)
248 |             else:
249 |                 # return attention of the last block
250 |                 return blk(x, return_attention=True)
251 | 
252 |     def get_intermediate_layers(self, x, n=1):
253 |         x = self.prepare_tokens(x)
254 |         # we return the output tokens from the `n` last blocks
255 |         output = []
256 |         for i, blk in enumerate(self.blocks):
257 |             x = blk(x)
258 |             if len(self.blocks) - i <= n:
259 |                 output.append(self.norm(x))
260 |         return output
261 | 
262 | 
263 | def vit_tiny(patch_size=16, **kwargs):
264 |     model = VisionTransformer(
265 |         patch_size=patch_size, embed_dim=192, depth=12, num_heads=3, mlp_ratio=4,
266 |         qkv_bias=True, norm_layer=partial(nn.LayerNorm, eps=1e-6), **kwargs)
267 |     return model
268 | 
269 | 
270 | def vit_small(patch_size=16, **kwargs):
271 |     model = VisionTransformer(
272 |         patch_size=patch_size, embed_dim=384, depth=12, num_heads=6, mlp_ratio=4,
273 |         qkv_bias=True, norm_layer=partial(nn.LayerNorm, eps=1e-6), **kwargs)
274 |     return model
275 | 
276 | 
277 | def vit_base(patch_size=16, **kwargs):
278 |     model = VisionTransformer(
279 |         patch_size=patch_size, embed_dim=768, depth=12, num_heads=12, mlp_ratio=4,
280 |         qkv_bias=True, norm_layer=partial(nn.LayerNorm, eps=1e-6), **kwargs)
281 |     return model
282 | 
283 | 
284 | class DINOHead(nn.Module):
285 |     def __init__(self, in_dim, out_dim, use_bn=False, norm_last_layer=True, nlayers=3, hidden_dim=2048, bottleneck_dim=256):
286 |         super().__init__()
287 |         nlayers = max(nlayers, 1)
288 |         if nlayers == 1:
289 |             self.mlp = nn.Linear(in_dim, bottleneck_dim)
290 |         else:
291 |             layers = [nn.Linear(in_dim, hidden_dim)]
292 |             if use_bn:
293 |                 layers.append(nn.BatchNorm1d(hidden_dim))
294 |             layers.append(nn.GELU())
295 |             for _ in range(nlayers - 2):
296 |                 layers.append(nn.Linear(hidden_dim, hidden_dim))
297 |                 if use_bn:
298 |                     layers.append(nn.BatchNorm1d(hidden_dim))
299 |                 layers.append(nn.GELU())
300 |             layers.append(nn.Linear(hidden_dim, bottleneck_dim))
301 |             self.mlp = nn.Sequential(*layers)
302 |         self.apply(self._init_weights)
303 |         self.last_layer = nn.utils.weight_norm(nn.Linear(bottleneck_dim, out_dim, bias=False))
304 |         self.last_layer.weight_g.data.fill_(1)
305 |         if norm_last_layer:
306 |             self.last_layer.weight_g.requires_grad = False
307 | 
308 |     def _init_weights(self, m):
309 |         if isinstance(m, nn.Linear):
310 |             trunc_normal_(m.weight, std=.02)
311 |             if isinstance(m, nn.Linear) and m.bias is not None:
312 |                 nn.init.constant_(m.bias, 0)
313 | 
314 |     def forward(self, x):
315 |         x = self.mlp(x)
316 |         x = nn.functional.normalize(x, dim=-1, p=2)
317 |         x = self.last_layer(x)
318 |         return x
319 | 


--------------------------------------------------------------------------------