├── .idea ├── Heatmap.iml ├── codeStyleSettings.xml ├── misc.xml └── modules.xml ├── Mytransforms.py ├── README.md ├── __init__.py ├── dataset_loader.py ├── debug.py ├── evaluation ├── csv_evaluation.py ├── csv_evaluation_FPN.py ├── csv_evaluation_ResNet.py ├── generate_val.py ├── modify.py └── submit.py ├── experiments ├── CPM │ ├── config.yml │ └── train_net.py ├── CPM_FPN │ ├── config.yml │ └── train_net.py ├── CPM_ResNet │ ├── config.yml │ └── train_net.py ├── FPN │ ├── config.yml │ └── train_net.py └── hourglass │ ├── config.yml │ └── train_net.py ├── models ├── CPM.py ├── CPM_FPN.py ├── CPM_ResNet.py ├── __init__.py ├── bk │ ├── CPM.py │ ├── CPM_FPN.py │ ├── CPM_FPN2.py │ ├── CPM_FPN3.py │ ├── CPM_FPN4.py │ ├── CPM_FPN5.py │ ├── CPM_ResNet.py │ ├── CPM_ResNet2.py │ ├── CPM_ResNet3.py │ ├── CPM_ResNet4.py │ ├── CPM_ResNet5.py │ ├── CPM_ResNet6.py │ ├── CPM_ResNet7.py │ ├── FPN.py │ └── hourglass.py └── hourglass.py ├── util.py └── vis_input.ipynb /.idea/Heatmap.iml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 12 | -------------------------------------------------------------------------------- /.idea/codeStyleSettings.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 9 | 11 | -------------------------------------------------------------------------------- /.idea/misc.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | -------------------------------------------------------------------------------- /.idea/modules.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Heatmap 2 | Heatmap approach for Fashion AI keypoint 3 | 4 | Preprocessing 5 | 1. split train to trainminusval and val 6 | 7 | train 8 | 1. cd ./experiments/CPM_FPN/ 9 | 2. python ./train_net 10 | 11 | eval 12 | 1. cd ./evaluation 13 | 2. python ./csv_evaluation_FPN.py 14 | 15 | experiments 16 | 1. CPM -> 23% on leaderboard 17 | 2. CPM_ResNet 17.9% on valset 18 | 3. CPM_FPN + data_aug -> 11% on valset, 12% on leaderboard 19 | 20 | 21 | 22 | 23 | #---------------------------------------------- Related Papers-------------------------------------------------------------- 24 | 25 | 1. Attentive Fashion Grammar Network for Fashion Landmark Detection and Clothing Category Classification (BIT, UCLA) - CVPR 2018 26 | - Worthy reading 27 | 28 | 2. Fashion Landmark Detection in the Wild (Sensetime) - ECCV 2016 29 | - Don't waste your time reading this paper, unless you want to learn from scratch 30 | 31 | 32 | 3. A Coarse-Fine Network for Keypoint Localization (U of Sydeny) - ICCV 2017 33 | - Worthy reading. 非常有意思，文章有两个部分，第一个部分Coarse利用detection的方式检测潜在的目标区域，第二个部分利用softmax的方式分类点。 34 | -------------------------------------------------------------------------------- /__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Xiangyu-CAS/FashionAI_Keypoints/dab6cbd975ba6071b070fb7da2fb163d01e2e2e4/__init__.py -------------------------------------------------------------------------------- /dataset_loader.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.utils.data as data 3 | import numpy as np 4 | import os 5 | import math 6 | from PIL import Image 7 | import cv2 8 | import csv 9 | 10 | class dataset_loader(data.Dataset): 11 | 12 | def __init__(self, img_dir, ann_path, stride, transforms=None, sigma = 15): 13 | 14 | self.sigma = sigma#15 #9 #15 15 | self.stride = stride 16 | self.img_dir = img_dir 17 | self.transforms = transforms 18 | self.anns = [] 19 | self.info = [] 20 | with open(ann_path,'rb') as f: 21 | reader = csv.reader(f) 22 | for row in reader: 23 | self.anns.append(row) 24 | self.info.append(self.anns[0]) 25 | self.anns=self.anns[1:] 26 | 27 | 28 | def __getitem__(self, index): 29 | # ---------------- read info ----------------------- 30 | ann = self.anns[index] 31 | img_path = os.path.join(self.img_dir, ann[0]) 32 | img = cv2.imread(img_path) # BGR 33 | catergory = ann[1] 34 | kpt = _get_keypoints(ann) 35 | # ----------------- transform ---------------------- 36 | center = [img.shape[0]/2,img.shape[1]/2] 37 | 38 | # ----------------- transform ---------------------- 39 | if not self.transforms: 40 | img, kpt = _croppad(img, kpt, center, 384, 384) 41 | else: 42 | img, kpt, center = self.transforms(img, kpt, center) 43 | #--------------------------------------------------- 44 | heatmaps = _generate_heatmap(img, kpt,self.stride, self.sigma) 45 | 46 | img = np.array(img, dtype=np.float32) 47 | img -= 128.0 48 | img /= 255.0 49 | 50 | img = torch.from_numpy(img.transpose((2, 0, 1))) 51 | heatmaps = torch.from_numpy(heatmaps.transpose((2, 0, 1))) 52 | 53 | # img = self.trasforms(img) 54 | # heatmaps = self.trasforms(heatmaps) 55 | 56 | return img, heatmaps 57 | 58 | def __len__(self): 59 | return len(self.anns) 60 | 61 | def _croppad(img, kpt, center, w, h): 62 | num = len(kpt) 63 | height, width, _ = img.shape 64 | new_img = np.empty((h, w, 3), dtype=np.float32) 65 | new_img.fill(128) 66 | 67 | # calculate offset 68 | offset_up = -1*(h/2 - center[0]) 69 | offset_left = -1*(w/2 - center[1]) 70 | 71 | for i in range(num): 72 | kpt[i][0] -= offset_left 73 | kpt[i][1] -= offset_up 74 | 75 | st_x = 0 76 | ed_x = w 77 | st_y = 0 78 | ed_y = h 79 | or_st_x = offset_left 80 | or_ed_x = offset_left + w 81 | or_st_y = offset_up 82 | or_ed_y = offset_up + h 83 | 84 | if offset_left < 0: 85 | st_x = -offset_left 86 | or_st_x = 0 87 | if offset_left + w > width: 88 | ed_x = width - offset_left 89 | or_ed_x = width 90 | if offset_up < 0: 91 | st_y = -offset_up 92 | or_st_y = 0 93 | if offset_up + h > height: 94 | ed_y = height - offset_up 95 | or_ed_y = height 96 | new_img[st_y: ed_y, st_x: ed_x, :] = img[or_st_y: or_ed_y, or_st_x: or_ed_x, :].copy() 97 | 98 | return np.ascontiguousarray(new_img), kpt 99 | 100 | 101 | def _get_keypoints(ann): 102 | kpt = np.zeros((24, 3)) 103 | for i in range(2, len(ann)): 104 | str = ann[i] 105 | [x_str, y_str, vis_str] = str.split('_') 106 | kpt[i - 2, 0], kpt[i - 2, 1], kpt[i - 2, 2] = int(x_str), int(y_str), int(vis_str) 107 | return kpt 108 | 109 | def _generate_heatmap(img, kpt, stride, sigma): 110 | height, width, _ = img.shape 111 | heatmap = np.zeros((height / stride, width / stride, len(kpt) + 1), dtype=np.float32) # (24 points + background) 112 | height, width, num_point = heatmap.shape 113 | start = stride / 2.0 - 0.5 114 | 115 | num = len(kpt) 116 | for i in range(num): 117 | if kpt[i][2] == -1: # not labeled 118 | continue 119 | x = kpt[i][0] 120 | y = kpt[i][1] 121 | for h in range(height): 122 | for w in range(width): 123 | xx = start + w * stride 124 | yy = start + h * stride 125 | dis = ((xx - x) * (xx - x) + (yy - y) * (yy - y)) / 2.0 / sigma / sigma 126 | if dis > 4.6052: 127 | continue 128 | heatmap[h][w][i] += math.exp(-dis) 129 | if heatmap[h][w][i] > 1: 130 | heatmap[h][w][i] = 1 131 | 132 | heatmap[:, :, -1] = 1.0 - np.max(heatmap[:, :, :-1], axis=2) # for background 133 | return heatmap 134 | 135 | ''' 136 | 0: labeled but not visble 137 | 1: labeled and visble 138 | -1: not labeled 139 | 140 | 'image_id', 141 | 'image_category', 142 | 0'neckline_left', 143 | 1'neckline_right', 144 | 2 'center_front', 145 | 3'shoulder_left', 146 | 4 'shoulder_right', 147 | 5 'armpit_left', 148 | 6 'armpit_right', 149 | 7 'waistline_left', 150 | 8 'waistline_right', 151 | 9 'cuff_left_in', 152 | 10 'cuff_left_out', 153 | 11 'cuff_right_in', 154 | 12 'cuff_right_out', 155 | 13 'top_hem_left', 156 | 14 'top_hem_right', 157 | 15 'waistband_left', 158 | 16 'waistband_right', 159 | 17 'hemline_left', 160 | 18 'hemline_right', 161 | 19 'crotch', 162 | 20 'bottom_left_in', 163 | 21 'bottom_left_out', 164 | 22 'bottom_right_in', 165 | 23 'bottom_right_out 166 | ''' -------------------------------------------------------------------------------- /debug.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | import os, sys 3 | import os 4 | import dataset_loader 5 | import torch 6 | import util 7 | import matplotlib 8 | import matplotlib.pyplot as plt 9 | import numpy as np 10 | import torchvision.transforms as transforms 11 | 12 | ann_path = '/data/xiaobing.wang/xiangyu.zhu/FashionAI/data/warm_up_train/Annotations/annotations.csv' 13 | img_dir = '/data/xiaobing.wang/xiangyu.zhu/FashionAI/data/warm_up_train/' 14 | 15 | train_loader = torch.utils.data.DataLoader( 16 | dataset_loader.dataset_loader(img_dir, ann_path, 8, 17 | transforms.ToTensor()), 18 | batch_size=4, shuffle=True, 19 | num_workers=2, pin_memory=True) 20 | 21 | for i, (input, heatmap) in enumerate(train_loader): 22 | imgs = input.numpy() 23 | heats = heatmap.numpy() 24 | break -------------------------------------------------------------------------------- /evaluation/csv_evaluation.py: -------------------------------------------------------------------------------- 1 | import csv 2 | import os 3 | import sys 4 | import numpy as np 5 | import cv2 6 | from scipy.ndimage.filters import gaussian_filter 7 | import math, time 8 | import torch 9 | import csv 10 | import util 11 | sys.path.append('../') 12 | def apply_model(oriImg, model, multiplier): 13 | stride = 8 14 | height, width, _ = oriImg.shape 15 | normed_img = np.array(oriImg, dtype=np.float32) 16 | heatmap_avg = np.zeros((oriImg.shape[0], oriImg.shape[1], 25), dtype=np.float32) 17 | for m in range(len(multiplier)): 18 | scale = multiplier[m] 19 | imageToTest = cv2.resize(normed_img, (0, 0), fx=scale, fy=scale, interpolation=cv2.INTER_CUBIC) 20 | imgToTest_padded, pad = util.padRightDownCorner(imageToTest, stride, 128) 21 | 22 | input_img = np.transpose(np.float32(imgToTest_padded[:, :, :, np.newaxis]), 23 | (3, 2, 0, 1)) / 255 - 0.5 # required shape (1, c, h, w) 24 | 25 | input_var = torch.autograd.Variable(torch.from_numpy(input_img).cuda()) 26 | 27 | # get the features 28 | heat1, heat2, heat3, heat4, heat5, heat6 = model(input_var) 29 | 30 | # get the heatmap 31 | heatmap = heat6.data.cpu().numpy() 32 | heatmap = np.transpose(np.squeeze(heatmap), (1, 2, 0)) # (h, w, c) 33 | heatmap = cv2.resize(heatmap, (0, 0), fx=stride, fy=stride, interpolation=cv2.INTER_CUBIC) 34 | heatmap = heatmap[:imgToTest_padded.shape[0] - pad[2], :imgToTest_padded.shape[1] - pad[3], :] 35 | heatmap = cv2.resize(heatmap, (width, height), interpolation=cv2.INTER_CUBIC) 36 | heatmap_avg = heatmap_avg + heatmap / len(multiplier) 37 | 38 | all_peaks = [] # all of the possible points by classes. 39 | peak_counter = 0 40 | thre1 = 0.1 41 | for part in range(25 - 1): 42 | x_list = [] 43 | y_list = [] 44 | map_ori = heatmap_avg[:, :, part] 45 | map = gaussian_filter(map_ori, sigma=3) 46 | 47 | map_left = np.zeros(map.shape) 48 | map_left[1:, :] = map[:-1, :] 49 | map_right = np.zeros(map.shape) 50 | map_right[:-1, :] = map[1:, :] 51 | map_up = np.zeros(map.shape) 52 | map_up[:, 1:] = map[:, :-1] 53 | map_down = np.zeros(map.shape) 54 | map_down[:, :-1] = map[:, 1:] 55 | 56 | peaks_binary = np.logical_and.reduce( 57 | (map >= map_left, map >= map_right, map >= map_up, map >= map_down, map > thre1)) 58 | peaks = zip(np.nonzero(peaks_binary)[1], np.nonzero(peaks_binary)[0]) # note reverse 59 | peaks_with_score = [x + (map_ori[x[1], x[0]],) for x in peaks] 60 | id = range(peak_counter, peak_counter + len(peaks)) 61 | peaks_with_score_and_id = [peaks_with_score[i] + (id[i],) for i in range(len(id))] 62 | 63 | all_peaks.append(peaks_with_score_and_id) 64 | peak_counter += len(peaks) 65 | 66 | # sort by score 67 | for i in range(24): 68 | all_peaks[i] = sorted(all_peaks[i], key=lambda ele : ele[2],reverse = True) 69 | 70 | canvas = oriImg.copy() 71 | # draw points 72 | for i in range(24): 73 | for j in range(len(all_peaks[i])): 74 | if j is 0: 75 | cv2.circle(canvas, all_peaks[i][j][0:2], 4, [0, 0, 255], thickness=-1) 76 | else: 77 | cv2.circle(canvas, all_peaks[i][j][0:2], 4, [255, 0, 0], thickness=-1) 78 | 79 | keypoints = -1*np.ones((24, 3)) 80 | for i in range(24): 81 | if len(all_peaks[i]) == 0: 82 | continue 83 | else: 84 | keypoints[i,0], keypoints[i,1], keypoints[i,2] = all_peaks[i][0][0], all_peaks[i][0][1], 1 85 | 86 | return keypoints, canvas 87 | 88 | 89 | def write_csv(name, results): 90 | import csv 91 | with open(name, 'w') as f: 92 | writer = csv.writer(f) 93 | writer.writerows(results) 94 | 95 | def prepare_row(ann, keypoints): 96 | # cls 97 | image_name = ann[0] 98 | category = ann[1] 99 | keypoints_str = [] 100 | for i in range(24): 101 | cell_str = str(int(keypoints[i][0])) + '_' + str(int(keypoints[i][1])) + '_' + str(int(keypoints[i][2])) 102 | keypoints_str.append(cell_str) 103 | row = [image_name, category] + keypoints_str 104 | return row 105 | 106 | def read_csv(ann_file): 107 | info = [] 108 | anns = [] 109 | with open(ann_file, 'rb') as f: 110 | reader = csv.reader(f) 111 | for row in reader: 112 | anns.append(row) 113 | info = anns[0] 114 | anns = anns[1:] 115 | return info, anns 116 | 117 | def euclidean_distance(a, b): 118 | return math.sqrt((a[0] - b[0]) ** 2 + (a[1] - b[1]) ** 2) 119 | 120 | def criterion(ann_gt, ann_dt): 121 | category = ann_gt[1] 122 | gt_kpt = -1 * np.ones((24, 3)) 123 | for i in range(len(gt_kpt)): 124 | x_str, y_str, vis_str = ann_gt[i + 2].split('_') 125 | gt_kpt[i][0], gt_kpt[i][1], gt_kpt[i][2] = int(x_str), int(y_str), int(vis_str) 126 | 127 | dt_kpt = -1 * np.ones((24, 3)) 128 | for i in range(len(dt_kpt)): 129 | x_str, y_str, vis_str = ann_dt[i + 2].split('_') 130 | dt_kpt[i][0], dt_kpt[i][1], dt_kpt[i][2] = int(x_str), int(y_str), int(vis_str) 131 | 132 | if category in ['blouse','outwear','skirt']: # armpit distance 133 | thre = euclidean_distance(gt_kpt[5], gt_kpt[6]) 134 | elif category in ['trousers', 'dress']: # waistband distance 135 | thre = euclidean_distance(gt_kpt[7], gt_kpt[8]) 136 | if thre == 0: 137 | return [] 138 | score = [] 139 | for i in range(len(gt_kpt)): 140 | if gt_kpt[i][2] == 1: 141 | #if dt_kpt[i][2] == -1: 142 | # score.append(2) 143 | #else: 144 | score.append(1.0* euclidean_distance(gt_kpt[i],dt_kpt[i])/ thre) 145 | return score 146 | #print('score = {}'.format(score)) 147 | 148 | 149 | 150 | def evaluate(gt_file, dt_file, num_imgs): 151 | info_gt, anns_gt = read_csv(gt_file) 152 | info_dt, anns_dt = read_csv(dt_file) 153 | anns_gt = anns_gt[:num_imgs] 154 | assert len(anns_gt) == len(anns_dt) 155 | scores = [] 156 | for i in range(len(anns_gt)): 157 | ann_gt = anns_gt[i] 158 | ann_dt = anns_dt[i] 159 | score = criterion(ann_gt, ann_dt) 160 | scores += score 161 | value = sum(scores)/len(scores) 162 | print('score = {}'.format(value)) 163 | 164 | def eval(): 165 | gt_file = '../FashionAI/data/train/Annotations/val.csv' 166 | dt_file = 'val_result.csv' 167 | # dt_file = 'modify.csv' 168 | 169 | num_imgs = 100 170 | evaluate(gt_file, dt_file,num_imgs) 171 | 172 | 173 | def main(): 174 | os.environ['CUDA_VISIBLE_DEVICES'] = '2' 175 | 176 | #--------------------------- model ------------------------------------------------------------------------------- 177 | import models.CPM 178 | pytorch_model = '../FashionAI/Heatmap/experiments/CPM/20000.pth.tar' 179 | model = models.CPM.PoseModel(num_point=25, pretrained=False) 180 | #----------------------------------------------------------------------------------------------------------------- 181 | 182 | img_dir = '../FashionAI/data/train/' 183 | ann_path = '../FashionAI/data/train/Annotations/val.csv' 184 | result_name = 'val_result.csv' 185 | scale_search = [0.5, 0.7, 1.0, 1.3] #[0.5, 1.0, 1.5] 186 | boxsize = 384 187 | # -------------------------- pytorch model------------------ 188 | state_dict = torch.load(pytorch_model)['state_dict'] 189 | model.load_state_dict(state_dict) 190 | model = model.cuda() 191 | model.eval() 192 | # -------------------------------------------------------- 193 | anns = [] 194 | with open(ann_path, 'rb') as f: 195 | reader = csv.reader(f) 196 | for row in reader: 197 | anns.append(row) 198 | info=anns[0] 199 | anns = anns[1:] 200 | #--------------------------------------------------------- 201 | num_imgs = 100# len(anns) 202 | results = [] 203 | results.append(info) 204 | 205 | for i in range(num_imgs): 206 | print('{}/{}'.format(i, num_imgs)) 207 | ann = anns[i] 208 | image_path = os.path.join(img_dir,ann[0]) 209 | oriImg = cv2.imread(image_path) 210 | #multiplier = [x * boxsize / oriImg.shape[0] for x in scale_search] 211 | multiplier = scale_search 212 | keypoints, canvas = apply_model(oriImg, model, multiplier) 213 | # cv2.imwrite(os.path.join('./result', ann[0].split('/')[-1]), canvas) 214 | row = prepare_row(ann, keypoints) 215 | results.append(row) 216 | write_csv(result_name, results) 217 | evaluate(ann_path, result_name,num_imgs) 218 | 219 | if __name__ == '__main__': 220 | main() 221 | # eval -------------------------------------------------------------------------------- /evaluation/csv_evaluation_FPN.py: -------------------------------------------------------------------------------- 1 | import csv 2 | import os 3 | import sys 4 | import numpy as np 5 | import cv2 6 | from scipy.ndimage.filters import gaussian_filter 7 | import math, time 8 | import torch 9 | import csv 10 | import util 11 | sys.path.append('../') 12 | def apply_model(oriImg, model, multiplier): 13 | stride = 8 14 | height, width, _ = oriImg.shape 15 | normed_img = np.array(oriImg, dtype=np.float32) 16 | heatmap_avg = np.zeros((oriImg.shape[0], oriImg.shape[1], 25), dtype=np.float32) 17 | for m in range(len(multiplier)): 18 | scale = multiplier[m] 19 | imageToTest = cv2.resize(normed_img, (0, 0), fx=scale, fy=scale, interpolation=cv2.INTER_CUBIC) 20 | # imgToTest_padded, pad = util.padRightDownCorner(imageToTest, stride, 128) 21 | imgToTest_padded, pad = util.padRightDownCorner(imageToTest, 64, 128) 22 | 23 | input_img = np.transpose(np.float32(imgToTest_padded[:, :, :, np.newaxis]), 24 | (3, 2, 0, 1)) / 255 - 0.5 # required shape (1, c, h, w) 25 | 26 | input_var = torch.autograd.Variable(torch.from_numpy(input_img).cuda()) 27 | 28 | # get the features 29 | heat = model(input_var) 30 | # heat = model(input_var) 31 | 32 | # get the heatmap 33 | heatmap = heat.data.cpu().numpy() 34 | heatmap = np.transpose(np.squeeze(heatmap), (1, 2, 0)) # (h, w, c) 35 | heatmap = cv2.resize(heatmap, (0, 0), fx=stride, fy=stride, interpolation=cv2.INTER_CUBIC) 36 | heatmap = heatmap[:imgToTest_padded.shape[0] - pad[2], :imgToTest_padded.shape[1] - pad[3], :] 37 | heatmap = cv2.resize(heatmap, (width, height), interpolation=cv2.INTER_CUBIC) 38 | heatmap_avg = heatmap_avg + heatmap / len(multiplier) 39 | 40 | all_peaks = [] # all of the possible points by classes. 41 | peak_counter = 0 42 | thre1 = 0.1 43 | for part in range(25 - 1): 44 | x_list = [] 45 | y_list = [] 46 | map_ori = heatmap_avg[:, :, part] 47 | map = gaussian_filter(map_ori, sigma=3) 48 | 49 | map_left = np.zeros(map.shape) 50 | map_left[1:, :] = map[:-1, :] 51 | map_right = np.zeros(map.shape) 52 | map_right[:-1, :] = map[1:, :] 53 | map_up = np.zeros(map.shape) 54 | map_up[:, 1:] = map[:, :-1] 55 | map_down = np.zeros(map.shape) 56 | map_down[:, :-1] = map[:, 1:] 57 | 58 | peaks_binary = np.logical_and.reduce( 59 | (map >= map_left, map >= map_right, map >= map_up, map >= map_down, map > thre1)) 60 | peaks = zip(np.nonzero(peaks_binary)[1], np.nonzero(peaks_binary)[0]) # note reverse 61 | peaks_with_score = [x + (map_ori[x[1], x[0]],) for x in peaks] 62 | id = range(peak_counter, peak_counter + len(peaks)) 63 | peaks_with_score_and_id = [peaks_with_score[i] + (id[i],) for i in range(len(id))] 64 | 65 | all_peaks.append(peaks_with_score_and_id) 66 | peak_counter += len(peaks) 67 | 68 | # sort by score 69 | for i in range(24): 70 | all_peaks[i] = sorted(all_peaks[i], key=lambda ele : ele[2],reverse = True) 71 | 72 | canvas = oriImg.copy() 73 | # draw points 74 | for i in range(24): 75 | for j in range(len(all_peaks[i])): 76 | if j is 0: 77 | cv2.circle(canvas, all_peaks[i][j][0:2], 4, [0, 0, 255], thickness=-1) 78 | else: 79 | cv2.circle(canvas, all_peaks[i][j][0:2], 4, [255, 0, 0], thickness=-1) 80 | 81 | keypoints = -1*np.ones((24, 3)) 82 | for i in range(24): 83 | if len(all_peaks[i]) == 0: 84 | continue 85 | else: 86 | keypoints[i,0], keypoints[i,1], keypoints[i,2] = all_peaks[i][0][0], all_peaks[i][0][1], 1 87 | 88 | return keypoints, canvas 89 | 90 | 91 | def write_csv(name, results): 92 | import csv 93 | with open(name, 'w') as f: 94 | writer = csv.writer(f) 95 | writer.writerows(results) 96 | 97 | def prepare_row(ann, keypoints): 98 | # cls 99 | image_name = ann[0] 100 | category = ann[1] 101 | keypoints_str = [] 102 | for i in range(24): 103 | cell_str = str(int(keypoints[i][0])) + '_' + str(int(keypoints[i][1])) + '_' + str(int(keypoints[i][2])) 104 | keypoints_str.append(cell_str) 105 | row = [image_name, category] + keypoints_str 106 | return row 107 | 108 | def read_csv(ann_file): 109 | info = [] 110 | anns = [] 111 | with open(ann_file, 'rb') as f: 112 | reader = csv.reader(f) 113 | for row in reader: 114 | anns.append(row) 115 | info = anns[0] 116 | anns = anns[1:] 117 | return info, anns 118 | 119 | def euclidean_distance(a, b): 120 | return math.sqrt((a[0] - b[0]) ** 2 + (a[1] - b[1]) ** 2) 121 | 122 | def criterion(ann_gt, ann_dt): 123 | category = ann_gt[1] 124 | gt_kpt = -1 * np.ones((24, 3)) 125 | for i in range(len(gt_kpt)): 126 | x_str, y_str, vis_str = ann_gt[i + 2].split('_') 127 | gt_kpt[i][0], gt_kpt[i][1], gt_kpt[i][2] = int(x_str), int(y_str), int(vis_str) 128 | 129 | dt_kpt = -1 * np.ones((24, 3)) 130 | for i in range(len(dt_kpt)): 131 | x_str, y_str, vis_str = ann_dt[i + 2].split('_') 132 | dt_kpt[i][0], dt_kpt[i][1], dt_kpt[i][2] = int(x_str), int(y_str), int(vis_str) 133 | 134 | if category in ['blouse','outwear','dress']: # armpit distance 135 | thre = euclidean_distance(gt_kpt[5], gt_kpt[6]) 136 | elif category in ['trousers', 'skirt']: # waistband distance 137 | thre = euclidean_distance(gt_kpt[15], gt_kpt[16]) 138 | if thre == 0: 139 | return [] 140 | score = [] 141 | for i in range(len(gt_kpt)): 142 | if gt_kpt[i][2] == 1: 143 | #if dt_kpt[i][2] == -1: 144 | # score.append(2) 145 | #else: 146 | score.append(1.0* euclidean_distance(gt_kpt[i],dt_kpt[i])/ thre) 147 | return score 148 | #print('score = {}'.format(score)) 149 | 150 | 151 | 152 | def evaluate(gt_file, dt_file, num_imgs): 153 | info_gt, anns_gt = read_csv(gt_file) 154 | info_dt, anns_dt = read_csv(dt_file) 155 | anns_gt = anns_gt[:num_imgs] 156 | assert len(anns_gt) == len(anns_dt) 157 | scores = [] 158 | for i in range(len(anns_gt)): 159 | ann_gt = anns_gt[i] 160 | ann_dt = anns_dt[i] 161 | score = criterion(ann_gt, ann_dt) 162 | scores += score 163 | value = sum(scores)/len(scores) 164 | print('score = {}'.format(value)) 165 | 166 | def eval(): 167 | gt_file = '../FashionAI/data/train/Annotations/val.csv' 168 | # dt_file = 'val_result.csv' 169 | dt_file = 'modify.csv' 170 | 171 | num_imgs = 500 172 | evaluate(gt_file, dt_file,num_imgs) 173 | 174 | 175 | def main(): 176 | os.environ['CUDA_VISIBLE_DEVICES'] = '3' 177 | 178 | # --------------------------- model ------------------------------------------------------------------------------- 179 | import models.CPM_FPN 180 | pytorch_model = '../FashionAI/Heatmap/experiments/CPM_FPN/160000.pth.tar' 181 | model = models.CPM_FPN.pose_estimation(class_num=25, pretrain=False) 182 | # ----------------------------------------------------------------------------------------------------------------- 183 | 184 | img_dir = '../FashionAI/data/train/' 185 | ann_path = '../FashionAI/data/train/Annotations/val.csv' 186 | # ann_path = '/data/xiaobing.wang/xiangyu.zhu/FashionAI/data/train/Annotations/trainminusval.csv' 187 | result_name = 'val_result.csv' 188 | # scale_search = [0.5, 0.7, 1.0, 1.3] # [0.5, 1.0, 1.5] 189 | scale_search = [0.5, 0.7, 1.0, 1.3] 190 | boxsize = 384 191 | # -------------------------- pytorch model------------------ 192 | state_dict = torch.load(pytorch_model)['state_dict'] 193 | model.load_state_dict(state_dict) 194 | model = model.cuda() 195 | model.eval() 196 | # -------------------------------------------------------- 197 | anns = [] 198 | with open(ann_path, 'rb') as f: 199 | reader = csv.reader(f) 200 | for row in reader: 201 | anns.append(row) 202 | info=anns[0] 203 | anns = anns[1:] 204 | #--------------------------------------------------------- 205 | num_imgs =100# len(anns) 206 | results = [] 207 | results.append(info) 208 | 209 | for i in range(num_imgs): 210 | print('{}/{}'.format(i, num_imgs)) 211 | ann = anns[i] 212 | image_path = os.path.join(img_dir, ann[0]) 213 | oriImg = cv2.imread(image_path) 214 | # multiplier = [x * boxsize / oriImg.shape[0] for x in scale_search] 215 | multiplier = scale_search 216 | keypoints, canvas = apply_model(oriImg, model, multiplier) 217 | # cv2.imwrite(os.path.join('./result', ann[0].split('/')[-1]), canvas) 218 | row = prepare_row(ann, keypoints) 219 | results.append(row) 220 | write_csv(result_name, results) 221 | evaluate(ann_path, result_name,num_imgs) 222 | 223 | if __name__ == '__main__': 224 | main() 225 | # eval() 226 | -------------------------------------------------------------------------------- /evaluation/csv_evaluation_ResNet.py: -------------------------------------------------------------------------------- 1 | import csv 2 | import os 3 | import sys 4 | import numpy as np 5 | import cv2 6 | from scipy.ndimage.filters import gaussian_filter 7 | import math, time 8 | import torch 9 | import csv 10 | import util 11 | sys.path.append('../') 12 | def apply_model(oriImg, model, multiplier): 13 | stride = 8 14 | height, width, _ = oriImg.shape 15 | normed_img = np.array(oriImg, dtype=np.float32) 16 | heatmap_avg = np.zeros((oriImg.shape[0], oriImg.shape[1], 25), dtype=np.float32) 17 | for m in range(len(multiplier)): 18 | scale = multiplier[m] 19 | imageToTest = cv2.resize(normed_img, (0, 0), fx=scale, fy=scale, interpolation=cv2.INTER_CUBIC) 20 | # imgToTest_padded, pad = util.padRightDownCorner(imageToTest, stride, 128) 21 | imgToTest_padded, pad = util.padRightDownCorner(imageToTest, 64, 128) 22 | 23 | input_img = np.transpose(np.float32(imgToTest_padded[:, :, :, np.newaxis]), 24 | (3, 2, 0, 1)) / 255 - 0.5 # required shape (1, c, h, w) 25 | 26 | input_var = torch.autograd.Variable(torch.from_numpy(input_img).cuda()) 27 | 28 | # get the features 29 | heat = model(input_var) 30 | # heat = model(input_var) 31 | 32 | # get the heatmap 33 | heatmap = heat.data.cpu().numpy() 34 | heatmap = np.transpose(np.squeeze(heatmap), (1, 2, 0)) # (h, w, c) 35 | heatmap = cv2.resize(heatmap, (0, 0), fx=stride, fy=stride, interpolation=cv2.INTER_CUBIC) 36 | heatmap = heatmap[:imgToTest_padded.shape[0] - pad[2], :imgToTest_padded.shape[1] - pad[3], :] 37 | heatmap = cv2.resize(heatmap, (width, height), interpolation=cv2.INTER_CUBIC) 38 | heatmap_avg = heatmap_avg + heatmap / len(multiplier) 39 | 40 | all_peaks = [] # all of the possible points by classes. 41 | peak_counter = 0 42 | thre1 = 0.1 43 | for part in range(25 - 1): 44 | x_list = [] 45 | y_list = [] 46 | map_ori = heatmap_avg[:, :, part] 47 | map = gaussian_filter(map_ori, sigma=3) 48 | 49 | map_left = np.zeros(map.shape) 50 | map_left[1:, :] = map[:-1, :] 51 | map_right = np.zeros(map.shape) 52 | map_right[:-1, :] = map[1:, :] 53 | map_up = np.zeros(map.shape) 54 | map_up[:, 1:] = map[:, :-1] 55 | map_down = np.zeros(map.shape) 56 | map_down[:, :-1] = map[:, 1:] 57 | 58 | peaks_binary = np.logical_and.reduce( 59 | (map >= map_left, map >= map_right, map >= map_up, map >= map_down, map > thre1)) 60 | peaks = zip(np.nonzero(peaks_binary)[1], np.nonzero(peaks_binary)[0]) # note reverse 61 | peaks_with_score = [x + (map_ori[x[1], x[0]],) for x in peaks] 62 | id = range(peak_counter, peak_counter + len(peaks)) 63 | peaks_with_score_and_id = [peaks_with_score[i] + (id[i],) for i in range(len(id))] 64 | 65 | all_peaks.append(peaks_with_score_and_id) 66 | peak_counter += len(peaks) 67 | 68 | # sort by score 69 | for i in range(24): 70 | all_peaks[i] = sorted(all_peaks[i], key=lambda ele : ele[2],reverse = True) 71 | 72 | canvas = oriImg.copy() 73 | # draw points 74 | for i in range(24): 75 | for j in range(len(all_peaks[i])): 76 | if j is 0: 77 | cv2.circle(canvas, all_peaks[i][j][0:2], 4, [0, 0, 255], thickness=-1) 78 | else: 79 | cv2.circle(canvas, all_peaks[i][j][0:2], 4, [255, 0, 0], thickness=-1) 80 | 81 | keypoints = -1*np.ones((24, 3)) 82 | for i in range(24): 83 | if len(all_peaks[i]) == 0: 84 | continue 85 | else: 86 | keypoints[i,0], keypoints[i,1], keypoints[i,2] = all_peaks[i][0][0], all_peaks[i][0][1], 1 87 | 88 | return keypoints, canvas 89 | 90 | 91 | def write_csv(name, results): 92 | import csv 93 | with open(name, 'w') as f: 94 | writer = csv.writer(f) 95 | writer.writerows(results) 96 | 97 | def prepare_row(ann, keypoints): 98 | # cls 99 | image_name = ann[0] 100 | category = ann[1] 101 | keypoints_str = [] 102 | for i in range(24): 103 | cell_str = str(int(keypoints[i][0])) + '_' + str(int(keypoints[i][1])) + '_' + str(int(keypoints[i][2])) 104 | keypoints_str.append(cell_str) 105 | row = [image_name, category] + keypoints_str 106 | return row 107 | 108 | def read_csv(ann_file): 109 | info = [] 110 | anns = [] 111 | with open(ann_file, 'rb') as f: 112 | reader = csv.reader(f) 113 | for row in reader: 114 | anns.append(row) 115 | info = anns[0] 116 | anns = anns[1:] 117 | return info, anns 118 | 119 | def euclidean_distance(a, b): 120 | return math.sqrt((a[0] - b[0]) ** 2 + (a[1] - b[1]) ** 2) 121 | 122 | def criterion(ann_gt, ann_dt): 123 | category = ann_gt[1] 124 | gt_kpt = -1 * np.ones((24, 3)) 125 | for i in range(len(gt_kpt)): 126 | x_str, y_str, vis_str = ann_gt[i + 2].split('_') 127 | gt_kpt[i][0], gt_kpt[i][1], gt_kpt[i][2] = int(x_str), int(y_str), int(vis_str) 128 | 129 | dt_kpt = -1 * np.ones((24, 3)) 130 | for i in range(len(dt_kpt)): 131 | x_str, y_str, vis_str = ann_dt[i + 2].split('_') 132 | dt_kpt[i][0], dt_kpt[i][1], dt_kpt[i][2] = int(x_str), int(y_str), int(vis_str) 133 | 134 | if category in ['blouse','outwear','dress']: # armpit distance 135 | thre = euclidean_distance(gt_kpt[5], gt_kpt[6]) 136 | elif category in ['trousers', 'skirt']: # waistband distance 137 | thre = euclidean_distance(gt_kpt[7], gt_kpt[8]) 138 | if thre == 0: 139 | return [] 140 | score = [] 141 | for i in range(len(gt_kpt)): 142 | if gt_kpt[i][2] == 1: 143 | #if dt_kpt[i][2] == -1: 144 | # score.append(2) 145 | #else: 146 | score.append(1.0* euclidean_distance(gt_kpt[i],dt_kpt[i])/ thre) 147 | return score 148 | #print('score = {}'.format(score)) 149 | 150 | 151 | 152 | def evaluate(gt_file, dt_file, num_imgs): 153 | info_gt, anns_gt = read_csv(gt_file) 154 | info_dt, anns_dt = read_csv(dt_file) 155 | anns_gt = anns_gt[:num_imgs] 156 | assert len(anns_gt) == len(anns_dt) 157 | scores = [] 158 | for i in range(len(anns_gt)): 159 | ann_gt = anns_gt[i] 160 | ann_dt = anns_dt[i] 161 | score = criterion(ann_gt, ann_dt) 162 | scores += score 163 | value = sum(scores)/len(scores) 164 | print('score = {}'.format(value)) 165 | 166 | def eval(): 167 | gt_file = '../FashionAI/data/train/Annotations/val.csv' 168 | # dt_file = 'val_result.csv' 169 | dt_file = 'modify.csv' 170 | 171 | num_imgs = 500 172 | evaluate(gt_file, dt_file,num_imgs) 173 | 174 | 175 | def main(): 176 | os.environ['CUDA_VISIBLE_DEVICES'] = '1' 177 | 178 | # --------------------------- model ------------------------------------------------------------------------------- 179 | import models.CPM_ResNet 180 | pytorch_model = '../FashionAI/Heatmap/experiments/CPM_ResNet/120000.pth.tar' 181 | model = models.CPM_ResNet.pose_estimation(class_num=25, pretrain=False) 182 | # ----------------------------------------------------------------------------------------------------------------- 183 | 184 | img_dir = '../FashionAI/data/train/' 185 | ann_path = '../FashionAI/data/train/Annotations/val.csv' 186 | result_name = 'val_result.csv' 187 | # scale_search = [0.5, 0.7, 1.0, 1.3] # [0.5, 1.0, 1.5] 188 | scale_search = [0.5, 0.7, 1.0, 1.3] 189 | boxsize = 384 190 | # -------------------------- pytorch model------------------ 191 | state_dict = torch.load(pytorch_model)['state_dict'] 192 | model.load_state_dict(state_dict) 193 | model = model.cuda() 194 | model.eval() 195 | # -------------------------------------------------------- 196 | anns = [] 197 | with open(ann_path, 'rb') as f: 198 | reader = csv.reader(f) 199 | for row in reader: 200 | anns.append(row) 201 | info=anns[0] 202 | anns = anns[1:] 203 | #--------------------------------------------------------- 204 | num_imgs =100# len(anns) 205 | results = [] 206 | results.append(info) 207 | 208 | for i in range(num_imgs): 209 | print('{}/{}'.format(i, num_imgs)) 210 | ann = anns[i] 211 | image_path = os.path.join(img_dir, ann[0]) 212 | oriImg = cv2.imread(image_path) 213 | # multiplier = [x * boxsize / oriImg.shape[0] for x in scale_search] 214 | multiplier = scale_search 215 | keypoints, canvas = apply_model(oriImg, model, multiplier) 216 | # cv2.imwrite(os.path.join('./result', ann[0].split('/')[-1]), canvas) 217 | row = prepare_row(ann, keypoints) 218 | results.append(row) 219 | write_csv(result_name, results) 220 | evaluate(ann_path, result_name,num_imgs) 221 | 222 | if __name__ == '__main__': 223 | main() 224 | # eval() -------------------------------------------------------------------------------- /evaluation/generate_val.py: -------------------------------------------------------------------------------- 1 | # split train to trainminusval and val (500) 2 | import csv 3 | import os, random 4 | 5 | train_ann_path = '/data/xiaobing.wang/xiangyu.zhu/FashionAI/data/train/Annotations/train.csv' 6 | output_dir = '/data/xiaobing.wang/xiangyu.zhu/FashionAI/data/train/Annotations/' 7 | val_num = 500 8 | 9 | info = [] 10 | anns = [] 11 | with open(train_ann_path,'rb') as f: 12 | reader = csv.reader(f) 13 | for row in reader: 14 | anns.append(row) 15 | info = anns[0] 16 | anns = anns[1:] 17 | 18 | random.shuffle(anns) 19 | trainminusval_anns = [info] 20 | val_anns = [info] 21 | trainminusval_anns = trainminusval_anns + anns[:-500] 22 | val_anns = val_anns +anns[-500:] 23 | 24 | with open(os.path.join(output_dir,'trainminusval.csv'), 'w') as f: 25 | writer = csv.writer(f) 26 | writer.writerows(trainminusval_anns) 27 | 28 | with open(os.path.join(output_dir, 'val.csv'), 'w') as f: 29 | writer = csv.writer(f) 30 | writer.writerows(val_anns) 31 | 32 | 33 | -------------------------------------------------------------------------------- /evaluation/modify.py: -------------------------------------------------------------------------------- 1 | import csv 2 | 3 | 4 | def cell_str2init(cell): 5 | [x_str, y_str, vis_str] = cell.split('_') 6 | x, y, vis = int(x_str), int(y_str), int(vis_str) 7 | return [x,y,vis] 8 | 9 | csv_file = 'val_result.csv' 10 | # csv_file = 'result_0309_23.16%.csv' 11 | anns = [] 12 | with open(csv_file, 'rb') as f: 13 | reader = csv.reader(f) 14 | for row in reader: 15 | anns.append(row) 16 | info = anns[0] 17 | anns = anns[1:] 18 | 19 | center_pair = [[0,3,5,7,13,15,17], 20 | [1,4,6,8,14,16,18]] 21 | 22 | near_pair = [[ 9,11,20,22], 23 | [10,12,21,23]] 24 | 25 | for i in range(len(anns)): 26 | ann = anns[i] 27 | center_x = 0 28 | count = 0 29 | for j in range(2, len(ann)): 30 | cell = ann[j] 31 | [x, y, vis] = cell_str2init(cell) 32 | center_x += x 33 | count += 1 34 | center_x = int(1.0*center_x/count) 35 | for j in range(len(near_pair[0])): 36 | indexA = near_pair[0][j] + 2 37 | indexB = near_pair[1][j] + 2 38 | [x_str_A, y_str_A, vis_str_A] = ann[indexA].split('_') 39 | x_A, y_A, vis_A = int(x_str_A), int(y_str_A), int(vis_str_A) 40 | 41 | [x_str_B, y_str_B, vis_str_B] = ann[indexB].split('_') 42 | x_B, y_B, vis_B = int(x_str_B), int(y_str_B), int(vis_str_B) 43 | 44 | if (vis_A == -1 and vis_B == -1) or (vis_A == 1 and vis_B == 1): 45 | continue 46 | if (vis_A == 1 and vis_B == -1): 47 | vis_B = 1 48 | x_B = x_A 49 | y_B = y_A 50 | elif (vis_B == 1 and vis_A == -1): 51 | vis_A = 1 52 | x_A = x_B 53 | y_A = y_B 54 | anns[i][indexA] = str(x_A) + '_' + str(y_A) + '_' + str(vis_A) 55 | anns[i][indexB] = str(x_B) + '_' + str(y_B) + '_' + str(vis_B) 56 | 57 | for j in range(len(center_pair[0])): 58 | indexA = center_pair[0][j] + 2 59 | indexB = center_pair[1][j] + 2 60 | [x_str_A, y_str_A, vis_str_A] = ann[indexA].split('_') 61 | x_A, y_A, vis_A = int(x_str_A), int(y_str_A), int(vis_str_A) 62 | 63 | [x_str_B, y_str_B, vis_str_B] = ann[indexB].split('_') 64 | x_B, y_B, vis_B = int(x_str_B), int(y_str_B), int(vis_str_B) 65 | 66 | if (vis_A == -1 and vis_B== -1) or (vis_A == 1 and vis_B == 1): 67 | continue 68 | if (vis_A == 1 and vis_B == -1): 69 | vis_B = 1 70 | x_B = abs(2*center_x - x_A) 71 | y_B = y_A 72 | elif (vis_B == 1 and vis_A == -1): 73 | vis_A = 1 74 | x_A = abs(2*center_x - x_B) 75 | y_A = y_B 76 | anns[i][indexA] = str(x_A) + '_' + str(y_A) + '_' + str(vis_A) 77 | anns[i][indexB] = str(x_B) + '_' + str(y_B) + '_' + str(vis_B) 78 | 79 | results = [info] 80 | results = results + anns 81 | 82 | with open('modify.csv', 'w') as f: 83 | writer = csv.writer(f) 84 | writer.writerows(results) 85 | 86 | 87 | ''' 88 | 0'neckline_left', 89 | 1'neckline_right', 90 | 2 'center_front', 91 | 3'shoulder_left', 92 | 4 'shoulder_right', 93 | 5 'armpit_left', 94 | 6 'armpit_right', 95 | 7 'waistline_left', 96 | 8 'waistline_right', 97 | 9 'cuff_left_in', 98 | 10 'cuff_left_out', 99 | 11 'cuff_right_in', 100 | 12 'cuff_right_out', 101 | 13 'top_hem_left', 102 | 14 'top_hem_right', 103 | 15 'waistband_left', 104 | 16 'waistband_right', 105 | 17 'hemline_left', 106 | 18 'hemline_right', 107 | 19 'crotch', 108 | 20 'bottom_left_in', 109 | 21 'bottom_left_out', 110 | 22 'bottom_right_in', 111 | 23 'bottom_right_out 112 | ''' -------------------------------------------------------------------------------- /evaluation/submit.py: -------------------------------------------------------------------------------- 1 | import csv 2 | import os 3 | import sys 4 | import numpy as np 5 | import cv2 6 | from scipy.ndimage.filters import gaussian_filter 7 | import math, time 8 | import torch 9 | import csv 10 | import util 11 | sys.path.append('../') 12 | def apply_model(oriImg, model, multiplier): 13 | stride = 8 14 | height, width, _ = oriImg.shape 15 | normed_img = np.array(oriImg, dtype=np.float32) 16 | heatmap_avg = np.zeros((oriImg.shape[0], oriImg.shape[1], 25), dtype=np.float32) 17 | for m in range(len(multiplier)): 18 | scale = multiplier[m] 19 | imageToTest = cv2.resize(normed_img, (0, 0), fx=scale, fy=scale, interpolation=cv2.INTER_CUBIC) 20 | # imgToTest_padded, pad = util.padRightDownCorner(imageToTest, stride, 128) 21 | imgToTest_padded, pad = util.padRightDownCorner(imageToTest, 32, 128) 22 | 23 | input_img = np.transpose(np.float32(imgToTest_padded[:, :, :, np.newaxis]), 24 | (3, 2, 0, 1)) / 255 - 0.5 # required shape (1, c, h, w) 25 | 26 | input_var = torch.autograd.Variable(torch.from_numpy(input_img).cuda()) 27 | 28 | # get the features 29 | # heat1, heat2, heat3, heat4, heat5, heat6 = model(input_var) 30 | heat = model(input_var) 31 | 32 | # get the heatmap 33 | heatmap = heat.data.cpu().numpy() 34 | heatmap = np.transpose(np.squeeze(heatmap), (1, 2, 0)) # (h, w, c) 35 | heatmap = cv2.resize(heatmap, (0, 0), fx=stride, fy=stride, interpolation=cv2.INTER_CUBIC) 36 | heatmap = heatmap[:imgToTest_padded.shape[0] - pad[2], :imgToTest_padded.shape[1] - pad[3], :] 37 | heatmap = cv2.resize(heatmap, (width, height), interpolation=cv2.INTER_CUBIC) 38 | heatmap_avg = heatmap_avg + heatmap / len(multiplier) 39 | 40 | all_peaks = [] # all of the possible points by classes. 41 | peak_counter = 0 42 | thre1 = 0.1 43 | for part in range(25 - 1): 44 | x_list = [] 45 | y_list = [] 46 | map_ori = heatmap_avg[:, :, part] 47 | map = gaussian_filter(map_ori, sigma=3) 48 | 49 | map_left = np.zeros(map.shape) 50 | map_left[1:, :] = map[:-1, :] 51 | map_right = np.zeros(map.shape) 52 | map_right[:-1, :] = map[1:, :] 53 | map_up = np.zeros(map.shape) 54 | map_up[:, 1:] = map[:, :-1] 55 | map_down = np.zeros(map.shape) 56 | map_down[:, :-1] = map[:, 1:] 57 | 58 | peaks_binary = np.logical_and.reduce( 59 | (map >= map_left, map >= map_right, map >= map_up, map >= map_down, map > thre1)) 60 | peaks = zip(np.nonzero(peaks_binary)[1], np.nonzero(peaks_binary)[0]) # note reverse 61 | peaks_with_score = [x + (map_ori[x[1], x[0]],) for x in peaks] 62 | id = range(peak_counter, peak_counter + len(peaks)) 63 | peaks_with_score_and_id = [peaks_with_score[i] + (id[i],) for i in range(len(id))] 64 | 65 | all_peaks.append(peaks_with_score_and_id) 66 | peak_counter += len(peaks) 67 | 68 | # sort by score 69 | for i in range(24): 70 | all_peaks[i] = sorted(all_peaks[i], key=lambda ele : ele[2],reverse = True) 71 | 72 | canvas = oriImg.copy() 73 | # draw points 74 | for i in range(24): 75 | for j in range(len(all_peaks[i])): 76 | if j is 0: 77 | cv2.circle(canvas, all_peaks[i][j][0:2], 4, [0, 0, 255], thickness=-1) 78 | else: 79 | cv2.circle(canvas, all_peaks[i][j][0:2], 4, [255, 0, 0], thickness=-1) 80 | 81 | keypoints = -1*np.ones((24, 3)) 82 | for i in range(24): 83 | if len(all_peaks[i]) == 0: 84 | continue 85 | else: 86 | keypoints[i,0], keypoints[i,1], keypoints[i,2] = all_peaks[i][0][0], all_peaks[i][0][1], 1 87 | 88 | return keypoints, canvas 89 | 90 | 91 | def write_csv(name, results): 92 | import csv 93 | with open(name, 'w') as f: 94 | writer = csv.writer(f) 95 | writer.writerows(results) 96 | 97 | def prepare_row(ann, keypoints): 98 | # cls 99 | image_name = ann[0] 100 | category = ann[1] 101 | keypoints_str = [] 102 | for i in range(24): 103 | cell_str = str(int(keypoints[i][0])) + '_' + str(int(keypoints[i][1])) + '_' + str(int(keypoints[i][2])) 104 | keypoints_str.append(cell_str) 105 | row = [image_name, category] + keypoints_str 106 | return row 107 | 108 | def read_csv(ann_file): 109 | info = [] 110 | anns = [] 111 | with open(ann_file, 'rb') as f: 112 | reader = csv.reader(f) 113 | for row in reader: 114 | anns.append(row) 115 | info = anns[0] 116 | anns = anns[1:] 117 | return info, anns 118 | 119 | def euclidean_distance(a, b): 120 | return math.sqrt((a[0] - b[0]) ** 2 + (a[1] - b[1]) ** 2) 121 | 122 | def criterion(ann_gt, ann_dt): 123 | category = ann_gt[1] 124 | gt_kpt = -1 * np.ones((24, 3)) 125 | for i in range(len(gt_kpt)): 126 | x_str, y_str, vis_str = ann_gt[i + 2].split('_') 127 | gt_kpt[i][0], gt_kpt[i][1], gt_kpt[i][2] = int(x_str), int(y_str), int(vis_str) 128 | 129 | dt_kpt = -1 * np.ones((24, 3)) 130 | for i in range(len(dt_kpt)): 131 | x_str, y_str, vis_str = ann_dt[i + 2].split('_') 132 | dt_kpt[i][0], dt_kpt[i][1], dt_kpt[i][2] = int(x_str), int(y_str), int(vis_str) 133 | 134 | if category in ['blouse','outwear','dress']: # armpit distance 135 | thre = euclidean_distance(gt_kpt[5], gt_kpt[6]) 136 | elif category in ['trousers', 'skirt']: # waistband distance 137 | thre = euclidean_distance(gt_kpt[7], gt_kpt[8]) 138 | if thre == 0: 139 | return [] 140 | score = [] 141 | for i in range(len(gt_kpt)): 142 | if gt_kpt[i][2] == 1: 143 | #if dt_kpt[i][2] == -1: 144 | # score.append(2) 145 | #else: 146 | score.append(1.0* euclidean_distance(gt_kpt[i],dt_kpt[i])/ thre) 147 | return score 148 | #print('score = {}'.format(score)) 149 | 150 | 151 | 152 | def evaluate(gt_file, dt_file, num_imgs): 153 | info_gt, anns_gt = read_csv(gt_file) 154 | info_dt, anns_dt = read_csv(dt_file) 155 | anns_gt = anns_gt[:num_imgs] 156 | assert len(anns_gt) == len(anns_dt) 157 | scores = [] 158 | for i in range(len(anns_gt)): 159 | ann_gt = anns_gt[i] 160 | ann_dt = anns_dt[i] 161 | score = criterion(ann_gt, ann_dt) 162 | scores += score 163 | value = sum(scores)/len(scores) 164 | print('score = {}'.format(value)) 165 | 166 | def eval(): 167 | gt_file = '../FashionAI/data/train/Annotations/val.csv' 168 | dt_file = 'val_result.csv' 169 | # dt_file = 'modify.csv' 170 | 171 | num_imgs = 100 172 | evaluate(gt_file, dt_file,num_imgs) 173 | 174 | 175 | def main(): 176 | os.environ['CUDA_VISIBLE_DEVICES'] = '2' 177 | 178 | # --------------------------- model ------------------------------------------------------------------------------- 179 | import models.CPM_FPN 180 | pytorch_model = '../FashionAI/Heatmap/experiments/CPM_FPN3/120000_8%.pth.tar' 181 | model = models.CPM_FPN.pose_estimation(class_num=25, pretrain=False) 182 | # ----------------------------------------------------------------------------------------------------------------- 183 | 184 | img_dir = '../FashionAI/data/test/' 185 | ann_path = '../FashionAI/data/test/test.csv' 186 | result_name = 'result.csv' 187 | # scale_search = [0.5, 0.7, 1.0, 1.3] # [0.5, 1.0, 1.5] 188 | scale_search = [0.5, 0.7, 1.0] 189 | boxsize = 384 190 | # -------------------------- pytorch model------------------ 191 | state_dict = torch.load(pytorch_model)['state_dict'] 192 | model.load_state_dict(state_dict) 193 | model = model.cuda() 194 | model.eval() 195 | # -------------------------------------------------------- 196 | anns = [] 197 | with open(ann_path, 'rb') as f: 198 | reader = csv.reader(f) 199 | for row in reader: 200 | anns.append(row) 201 | info=anns[0] 202 | anns = anns[1:] 203 | #--------------------------------------------------------- 204 | num_imgs = len(anns) 205 | results = [] 206 | results.append(info) 207 | 208 | for i in range(num_imgs): 209 | print('{}/{}'.format(i, num_imgs)) 210 | ann = anns[i] 211 | image_path = os.path.join(img_dir, ann[0]) 212 | oriImg = cv2.imread(image_path) 213 | # multiplier = [x * boxsize / oriImg.shape[0] for x in scale_search] 214 | multiplier = scale_search 215 | keypoints, canvas = apply_model(oriImg, model, multiplier) 216 | # cv2.imwrite(os.path.join('./result', ann[0].split('/')[-1]), canvas) 217 | row = prepare_row(ann, keypoints) 218 | results.append(row) 219 | write_csv(result_name, results) 220 | 221 | if __name__ == '__main__': 222 | main() -------------------------------------------------------------------------------- /experiments/CPM/config.yml: -------------------------------------------------------------------------------- 1 | workers: 6 2 | weight_decay: 0.0005 3 | momentum: 0.9 4 | display: 50 5 | max_iter: 160000 6 | batch_size: 10 7 | test_interval: 50 8 | topk: 3 9 | base_lr: 0.00004 10 | start_iters: 0 11 | best_model: 12345678.9 12 | #-------------lr_policy--------------------# 13 | lr_policy: 'multistep' 14 | policy_parameter: 15 | stepvalue: [50000, 100000, 120000] 16 | gamma: 0.33 -------------------------------------------------------------------------------- /experiments/CPM/train_net.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | import os, sys 3 | sys.path.append('../../') 4 | import dataset_loader 5 | import torch 6 | import torch.nn as nn 7 | import torch.backends.cudnn as cudnn 8 | import util 9 | import cv2 10 | import argparse 11 | import models.CPM 12 | import torchvision.transforms as transforms 13 | import time 14 | 15 | def parse(): 16 | parser = argparse.ArgumentParser() 17 | return parser.parse_args() 18 | 19 | def construct_model(args): 20 | model = models.CPM .PoseModel(num_point=25, pretrained=True) 21 | model.cuda() 22 | return model 23 | 24 | def get_parameters(model, config, isdefault=True): 25 | if isdefault: 26 | return model.parameters(), [1.] 27 | lr_1 = [] 28 | lr_2 = [] 29 | lr_4 = [] 30 | lr_8 = [] 31 | params_dict = dict(model.named_parameters()) 32 | for key, value in params_dict.items(): 33 | if 'stage' in key: 34 | if key[-4:] == 'bias': 35 | lr_8.append(value) 36 | else: 37 | lr_4.append(value) 38 | elif key[-4:] == 'bias': 39 | lr_2.append(value) 40 | else: 41 | lr_1.append(value) 42 | 43 | 44 | params = [{'params': lr_1, 'lr': config.base_lr}, 45 | {'params': lr_2, 'lr': config.base_lr * 2.}, 46 | {'params': lr_4, 'lr': config.base_lr * 4.}, 47 | {'params': lr_8, 'lr': config.base_lr * 8.}] 48 | 49 | return params, [1., 2., 4., 8.] 50 | 51 | def train_net(model, args): 52 | ann_path = '../FashionAI/data/train/Annotations/trainminusval.csv' 53 | img_dir = '../FashionAI/data/train/' 54 | 55 | stride = 8 56 | cudnn.benchmark = True 57 | config = util.Config('./config.yml') 58 | 59 | train_loader = torch.utils.data.DataLoader( 60 | dataset_loader.dataset_loader(img_dir, ann_path, stride, 61 | transforms.ToTensor()), 62 | batch_size=config.batch_size, shuffle=True, 63 | num_workers=config.workers, pin_memory=True) 64 | 65 | criterion = nn.MSELoss().cuda() 66 | params, multiple = get_parameters(model, config, False) 67 | 68 | optimizer = torch.optim.SGD(params, config.base_lr, momentum=config.momentum, 69 | weight_decay=config.weight_decay) 70 | model.train() 71 | iters = 0 72 | batch_time = util.AverageMeter() 73 | data_time = util.AverageMeter() 74 | losses = util.AverageMeter() 75 | losses_list = [util.AverageMeter() for i in range(12)] 76 | end = time.time() 77 | 78 | heat_weight = 48 * 48 * 25 / 2.0 # for convenient to compare with origin code 79 | # heat_weight = 1 80 | 81 | while iters < config.max_iter: 82 | for i, (input, heatmap) in enumerate(train_loader): 83 | learning_rate = util.adjust_learning_rate(optimizer, iters, config.base_lr, policy=config.lr_policy,\ 84 | policy_parameter=config.policy_parameter, multiple=multiple) 85 | data_time.update(time.time() - end) 86 | 87 | input = input.cuda(async=True) 88 | heatmap = heatmap.cuda(async=True) 89 | input_var = torch.autograd.Variable(input) 90 | heatmap_var = torch.autograd.Variable(heatmap) 91 | 92 | heat1, heat2, heat3, heat4, heat5, heat6 = model(input_var) 93 | loss1 = criterion(heat1,heatmap_var) * heat_weight 94 | loss2 = criterion(heat2, heatmap_var) * heat_weight 95 | loss3 = criterion(heat3, heatmap_var) * heat_weight 96 | loss4 = criterion(heat4, heatmap_var) * heat_weight 97 | loss5 = criterion(heat5, heatmap_var) * heat_weight 98 | loss6 = criterion(heat6, heatmap_var) * heat_weight 99 | loss = loss1 + loss2 + loss3 + loss4 + loss5 + loss6 100 | losses.update(loss.data[0], input.size(0)) 101 | loss_list = [loss1 , loss2 , loss3 , loss4 , loss5 , loss6] 102 | for cnt, l in enumerate(loss_list): 103 | losses_list[cnt].update(l.data[0], input.size(0)) 104 | 105 | optimizer.zero_grad() 106 | loss.backward() 107 | optimizer.step() 108 | batch_time.update(time.time() - end) 109 | end = time.time() 110 | 111 | 112 | iters += 1 113 | if iters % config.display == 0: 114 | print('Train Iteration: {0}\t' 115 | 'Time {batch_time.sum:.3f}s / {1}iters, ({batch_time.avg:.3f})\t' 116 | 'Data load {data_time.sum:.3f}s / {1}iters, ({data_time.avg:3f})\n' 117 | 'Learning rate = {2}\n' 118 | 'Loss = {loss.val:.8f} (ave = {loss.avg:.8f})\n'.format( 119 | iters, config.display, learning_rate, batch_time=batch_time, 120 | data_time=data_time, loss=losses)) 121 | for cnt in range(0, 6): 122 | print('Loss{0}_1 = {loss1.val:.8f} (ave = {loss1.avg:.8f})'.format(cnt + 1,loss1=losses_list[cnt])) 123 | print(time.strftime( 124 | '%Y-%m-%d %H:%M:%S -----------------------------------------------------------------------------------------------------------------\n', 125 | time.localtime())) 126 | 127 | batch_time.reset() 128 | data_time.reset() 129 | losses.reset() 130 | for cnt in range(12): 131 | losses_list[cnt].reset() 132 | 133 | if iters % 5000 == 0: 134 | torch.save({ 135 | 'iter': iters, 136 | 'state_dict': model.state_dict(), 137 | }, str(iters) + '.pth.tar') 138 | 139 | if iters == config.max_iter: 140 | break 141 | return 142 | 143 | if __name__ == '__main__': 144 | os.environ['CUDA_VISIBLE_DEVICES'] = '3' 145 | args = parse() 146 | model = construct_model(args) 147 | train_net(model, args) -------------------------------------------------------------------------------- /experiments/CPM_FPN/config.yml: -------------------------------------------------------------------------------- 1 | workers: 6 2 | weight_decay: 0.0005 3 | momentum: 0.9 4 | display: 50 5 | max_iter: 160000 6 | batch_size: 10 7 | test_interval: 10 8 | topk: 3 9 | base_lr: 0.00004 10 | start_iters: 0 11 | best_model: 12345678.9 12 | #-------------lr_policy--------------------# 13 | lr_policy: 'multistep' 14 | policy_parameter: 15 | stepvalue: [50000, 80000] 16 | gamma: 0.1 -------------------------------------------------------------------------------- /experiments/CPM_FPN/train_net.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | import os, sys 3 | sys.path.append('../../') 4 | import Mytransforms 5 | import dataset_loader 6 | import torch 7 | import torch.nn as nn 8 | import torch.backends.cudnn as cudnn 9 | import util 10 | import cv2 11 | import argparse 12 | import models.CPM_FPN 13 | import torchvision.transforms as transforms 14 | import time 15 | 16 | def parse(): 17 | parser = argparse.ArgumentParser() 18 | return parser.parse_args() 19 | 20 | def construct_model(args): 21 | model = models.CPM_FPN.pose_estimation(class_num=25, pretrain=True) 22 | model.cuda() 23 | print (model) 24 | return model 25 | 26 | 27 | 28 | def train_net(model, args): 29 | 30 | ann_path = '../FashionAI/data/train/Annotations/trainminusval.csv' 31 | img_dir = '../FashionAI/data/train/' 32 | 33 | stride = 8 34 | cudnn.benchmark = True 35 | config = util.Config('./config.yml') 36 | train_loader = torch.utils.data.DataLoader( 37 | dataset_loader.dataset_loader(img_dir, ann_path, stride, 38 | Mytransforms.Compose([Mytransforms.RandomResized(), 39 | Mytransforms.RandomRotate(40), 40 | Mytransforms.RandomCrop(384), 41 | ]), sigma=15), 42 | batch_size=config.batch_size, shuffle=True, 43 | num_workers=config.workers, pin_memory=True) 44 | 45 | criterion = nn.MSELoss().cuda() 46 | params = [] 47 | for key, value in model.named_parameters(): 48 | if value.requires_grad != False: 49 | params.append({'params': value, 'lr': config.base_lr}) 50 | 51 | optimizer = torch.optim.SGD(params, config.base_lr, momentum=config.momentum, 52 | weight_decay=config.weight_decay) 53 | # model.train() # only for bn and dropout 54 | model.eval() 55 | 56 | from matplotlib import pyplot as plt 57 | 58 | iters = 0 59 | batch_time = util.AverageMeter() 60 | data_time = util.AverageMeter() 61 | losses = util.AverageMeter() 62 | losses_list = [util.AverageMeter() for i in range(12)] 63 | end = time.time() 64 | 65 | heat_weight = 48 * 48 * 25 / 2.0 # for convenient to compare with origin code 66 | # heat_weight = 1 67 | 68 | while iters < config.max_iter: 69 | for i, (input, heatmap) in enumerate(train_loader): 70 | learning_rate = util.adjust_learning_rate(optimizer, iters, config.base_lr, policy=config.lr_policy,\ 71 | policy_parameter=config.policy_parameter) 72 | data_time.update(time.time() - end) 73 | 74 | input = input.cuda(async=True) 75 | heatmap = heatmap.cuda(async=True) 76 | input_var = torch.autograd.Variable(input) 77 | heatmap_var = torch.autograd.Variable(heatmap) 78 | 79 | heat = model(input_var) 80 | 81 | # feat = C4.cpu().data.numpy() 82 | # for n in range(100): 83 | # plt.subplot(10, 10, n + 1); 84 | # plt.imshow(feat[0, n, :, :], cmap='gray') 85 | # plt.xticks([]); 86 | # plt.yticks([]) 87 | # plt.show() 88 | 89 | loss1 = criterion(heat, heatmap_var) * heat_weight 90 | # loss2 = criterion(heat4, heatmap_var) * heat_weight 91 | # loss3 = criterion(heat5, heatmap_var) * heat_weight 92 | # loss4 = criterion(heat6, heatmap_var) * heat_weight 93 | # loss5 = criterion(heat, heatmap_var) 94 | # loss6 = criterion(heat, heatmap_var) 95 | 96 | loss = loss1 # + loss2 + loss3# + loss4# + loss5 + loss6 97 | losses.update(loss.data[0], input.size(0)) 98 | loss_list = [loss1]#, loss2, loss3]# , loss4 ]# , loss5 , loss6] 99 | for cnt, l in enumerate(loss_list): 100 | losses_list[cnt].update(l.data[0], input.size(0)) 101 | 102 | optimizer.zero_grad() 103 | loss.backward() 104 | optimizer.step() 105 | batch_time.update(time.time() - end) 106 | end = time.time() 107 | 108 | 109 | iters += 1 110 | if iters % config.display == 0: 111 | print('Train Iteration: {0}\t' 112 | 'Time {batch_time.sum:.3f}s / {1}iters, ({batch_time.avg:.3f})\t' 113 | 'Data load {data_time.sum:.3f}s / {1}iters, ({data_time.avg:3f})\n' 114 | 'Learning rate = {2}\n' 115 | 'Loss = {loss.val:.8f} (ave = {loss.avg:.8f})\n'.format( 116 | iters, config.display, learning_rate, batch_time=batch_time, 117 | data_time=data_time, loss=losses)) 118 | for cnt in range(0, 1): 119 | print('Loss{0}_1 = {loss1.val:.8f} (ave = {loss1.avg:.8f})'.format(cnt + 1,loss1=losses_list[cnt])) 120 | print(time.strftime( 121 | '%Y-%m-%d %H:%M:%S -----------------------------------------------------------------------------------------------------------------\n', 122 | time.localtime())) 123 | 124 | batch_time.reset() 125 | data_time.reset() 126 | losses.reset() 127 | for cnt in range(12): 128 | losses_list[cnt].reset() 129 | 130 | if iters % 5000 == 0: 131 | torch.save({ 132 | 'iter': iters, 133 | 'state_dict': model.state_dict(), 134 | }, str(iters) + '.pth.tar') 135 | 136 | if iters == config.max_iter: 137 | break 138 | return 139 | 140 | if __name__ == '__main__': 141 | os.environ['CUDA_VISIBLE_DEVICES'] = '0' 142 | args = parse() 143 | model = construct_model(args) 144 | train_net(model, args) 145 | -------------------------------------------------------------------------------- /experiments/CPM_ResNet/config.yml: -------------------------------------------------------------------------------- 1 | workers: 6 2 | weight_decay: 0.0005 3 | momentum: 0.9 4 | display: 50 5 | max_iter: 70000 6 | batch_size: 10 7 | test_interval: 10 8 | topk: 3 9 | base_lr: 0.00004 10 | start_iters: 0 11 | best_model: 12345678.9 12 | #-------------lr_policy--------------------# 13 | lr_policy: 'multistep' 14 | policy_parameter: 15 | stepvalue: [30000, 50000] 16 | gamma: 0.1 -------------------------------------------------------------------------------- /experiments/CPM_ResNet/train_net.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | import os, sys 3 | sys.path.append('../../') 4 | import dataset_loader 5 | import torch 6 | import torch.nn as nn 7 | import torch.backends.cudnn as cudnn 8 | import util 9 | import cv2 10 | import argparse 11 | import models.CPM_ResNet 12 | import torchvision.transforms as transforms 13 | import time 14 | 15 | def parse(): 16 | parser = argparse.ArgumentParser() 17 | return parser.parse_args() 18 | 19 | def construct_model(args): 20 | model = models.CPM_ResNet.pose_estimation(class_num=25, pretrain=True) 21 | model.cuda() 22 | print (model) 23 | return model 24 | 25 | 26 | 27 | def train_net(model, args): 28 | 29 | ann_path = '../FashionAI/data/train/Annotations/trainminusval.csv' 30 | img_dir = '../FashionAI/data/train/' 31 | 32 | stride = 8 33 | cudnn.benchmark = True 34 | config = util.Config('./config.yml') 35 | 36 | train_loader = torch.utils.data.DataLoader( 37 | dataset_loader.dataset_loader(img_dir, ann_path, stride, 38 | transforms.ToTensor()), 39 | batch_size=config.batch_size, shuffle=True, 40 | num_workers=config.workers, pin_memory=True) 41 | 42 | criterion = nn.MSELoss().cuda() 43 | params = [] 44 | for key, value in model.named_parameters(): 45 | if value.requires_grad != False: 46 | params.append({'params': value, 'lr': config.base_lr}) 47 | 48 | optimizer = torch.optim.SGD(params, config.base_lr, momentum=config.momentum, 49 | weight_decay=config.weight_decay) 50 | # model.train() # only for bn and dropout 51 | model.eval() 52 | 53 | from matplotlib import pyplot as plt 54 | 55 | iters = 0 56 | batch_time = util.AverageMeter() 57 | data_time = util.AverageMeter() 58 | losses = util.AverageMeter() 59 | losses_list = [util.AverageMeter() for i in range(12)] 60 | end = time.time() 61 | 62 | heat_weight = 48 * 48 * 25 / 2.0 # for convenient to compare with origin code 63 | # heat_weight = 1 64 | 65 | while iters < config.max_iter: 66 | for i, (input, heatmap) in enumerate(train_loader): 67 | learning_rate = util.adjust_learning_rate(optimizer, iters, config.base_lr, policy=config.lr_policy,\ 68 | policy_parameter=config.policy_parameter) 69 | data_time.update(time.time() - end) 70 | 71 | input = input.cuda(async=True) 72 | heatmap = heatmap.cuda(async=True) 73 | input_var = torch.autograd.Variable(input) 74 | heatmap_var = torch.autograd.Variable(heatmap) 75 | 76 | heat = model(input_var) 77 | 78 | # feat = C4.cpu().data.numpy() 79 | # for n in range(100): 80 | # plt.subplot(10, 10, n + 1); 81 | # plt.imshow(feat[0, n, :, :], cmap='gray') 82 | # plt.xticks([]); 83 | # plt.yticks([]) 84 | # plt.show() 85 | 86 | loss1 = criterion(heat, heatmap_var) * heat_weight 87 | # loss2 = criterion(heat, heatmap_var) 88 | # loss3 = criterion(heat, heatmap_var) 89 | # loss4 = criterion(heat, heatmap_var) 90 | # loss5 = criterion(heat, heatmap_var) 91 | # loss6 = criterion(heat, heatmap_var) 92 | 93 | loss = loss1# + loss2 + loss3 + loss4 + loss5 + loss6 94 | losses.update(loss.data[0], input.size(0)) 95 | loss_list = [loss1]# , loss2 , loss3 , loss4 , loss5 , loss6] 96 | for cnt, l in enumerate(loss_list): 97 | losses_list[cnt].update(l.data[0], input.size(0)) 98 | 99 | optimizer.zero_grad() 100 | loss.backward() 101 | optimizer.step() 102 | batch_time.update(time.time() - end) 103 | end = time.time() 104 | 105 | 106 | iters += 1 107 | if iters % config.display == 0: 108 | print('Train Iteration: {0}\t' 109 | 'Time {batch_time.sum:.3f}s / {1}iters, ({batch_time.avg:.3f})\t' 110 | 'Data load {data_time.sum:.3f}s / {1}iters, ({data_time.avg:3f})\n' 111 | 'Learning rate = {2}\n' 112 | 'Loss = {loss.val:.8f} (ave = {loss.avg:.8f})\n'.format( 113 | iters, config.display, learning_rate, batch_time=batch_time, 114 | data_time=data_time, loss=losses)) 115 | for cnt in range(0, 1): 116 | print('Loss{0}_1 = {loss1.val:.8f} (ave = {loss1.avg:.8f})'.format(cnt + 1,loss1=losses_list[cnt])) 117 | print(time.strftime( 118 | '%Y-%m-%d %H:%M:%S -----------------------------------------------------------------------------------------------------------------\n', 119 | time.localtime())) 120 | 121 | batch_time.reset() 122 | data_time.reset() 123 | losses.reset() 124 | for cnt in range(12): 125 | losses_list[cnt].reset() 126 | 127 | if iters % 5000 == 0: 128 | torch.save({ 129 | 'iter': iters, 130 | 'state_dict': model.state_dict(), 131 | }, str(iters) + '.pth.tar') 132 | 133 | if iters == config.max_iter: 134 | break 135 | return 136 | 137 | if __name__ == '__main__': 138 | os.environ['CUDA_VISIBLE_DEVICES'] = '1' 139 | args = parse() 140 | model = construct_model(args) 141 | train_net(model, args) -------------------------------------------------------------------------------- /experiments/FPN/config.yml: -------------------------------------------------------------------------------- 1 | workers: 6 2 | weight_decay: 0.0005 3 | momentum: 0.9 4 | display: 50 5 | max_iter: 160000 6 | batch_size: 10 7 | test_interval: 10 8 | topk: 3 9 | base_lr: 0.0001 10 | start_iters: 0 11 | best_model: 12345678.9 12 | #-------------lr_policy--------------------# 13 | lr_policy: 'multistep' 14 | policy_parameter: 15 | stepvalue: [50000, 100000, 120000] 16 | gamma: 0.33 -------------------------------------------------------------------------------- /experiments/FPN/train_net.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | import os, sys 3 | sys.path.append('../../') 4 | import dataset_loader 5 | import torch 6 | import torch.nn as nn 7 | import torch.backends.cudnn as cudnn 8 | import util 9 | import cv2 10 | import argparse 11 | import models.FPN 12 | import torchvision.transforms as transforms 13 | import time 14 | 15 | def parse(): 16 | parser = argparse.ArgumentParser() 17 | return parser.parse_args() 18 | 19 | def construct_model(args): 20 | model = models.FPN.pose_estimation(class_num=25, pretrain=True) 21 | model.cuda() 22 | return model 23 | 24 | 25 | 26 | def train_net(model, args): 27 | 28 | ann_path = '/disk/data/fashionAI/train/Annotations/train.csv' 29 | img_dir = '/disk/data/fashionAI/train/' 30 | 31 | stride = 8 32 | cudnn.benchmark = True 33 | config = util.Config('./config.yml') 34 | 35 | train_loader = torch.utils.data.DataLoader( 36 | dataset_loader.dataset_loader(img_dir, ann_path, stride, 37 | transforms.ToTensor()), 38 | batch_size=config.batch_size, shuffle=True, 39 | num_workers=config.workers, pin_memory=True) 40 | 41 | criterion = nn.MSELoss().cuda() 42 | params = [] 43 | for key, value in model.named_parameters(): 44 | if value.requires_grad != False: 45 | params.append({'params': value, 'lr': config.base_lr}) 46 | 47 | optimizer = torch.optim.SGD(params, config.base_lr, momentum=config.momentum, 48 | weight_decay=config.weight_decay) 49 | model.train() # only for bn and dropout 50 | # model.eval() 51 | 52 | 53 | iters = 0 54 | batch_time = util.AverageMeter() 55 | data_time = util.AverageMeter() 56 | losses = util.AverageMeter() 57 | losses_list = [util.AverageMeter() for i in range(12)] 58 | end = time.time() 59 | 60 | heat_weight = 48 * 48 * 25 / 2.0 # for convenient to compare with origin code 61 | # heat_weight = 1 62 | 63 | while iters < config.max_iter: 64 | for i, (input, heatmap) in enumerate(train_loader): 65 | learning_rate = util.adjust_learning_rate(optimizer, iters, config.base_lr, policy=config.lr_policy,\ 66 | policy_parameter=config.policy_parameter) 67 | data_time.update(time.time() - end) 68 | 69 | input = input.cuda(async=True) 70 | heatmap = heatmap.cuda(async=True) 71 | input_var = torch.autograd.Variable(input) 72 | heatmap_var = torch.autograd.Variable(heatmap) 73 | 74 | heat2, heat3, heat4, heat5, heat6 = model(input_var) 75 | loss1 = criterion(heat3, heatmap_var) 76 | loss2 = criterion(heat2, heatmap_var) 77 | loss3 = criterion(heat3, heatmap_var) 78 | loss4 = criterion(heat4, heatmap_var) 79 | loss5 = criterion(heat5, heatmap_var) 80 | loss6 = criterion(heat3, heatmap_var) 81 | 82 | loss = loss1 + loss2 + loss3 + loss4 + loss5 + loss6 83 | losses.update(loss.data[0], input.size(0)) 84 | loss_list = [loss1 , loss2 , loss3 , loss4 , loss5 , loss6] 85 | for cnt, l in enumerate(loss_list): 86 | losses_list[cnt].update(l.data[0], input.size(0)) 87 | 88 | optimizer.zero_grad() 89 | loss.backward() 90 | optimizer.step() 91 | batch_time.update(time.time() - end) 92 | end = time.time() 93 | 94 | 95 | iters += 1 96 | if iters % config.display == 0: 97 | print('Train Iteration: {0}\t' 98 | 'Time {batch_time.sum:.3f}s / {1}iters, ({batch_time.avg:.3f})\t' 99 | 'Data load {data_time.sum:.3f}s / {1}iters, ({data_time.avg:3f})\n' 100 | 'Learning rate = {2}\n' 101 | 'Loss = {loss.val:.8f} (ave = {loss.avg:.8f})\n'.format( 102 | iters, config.display, learning_rate, batch_time=batch_time, 103 | data_time=data_time, loss=losses)) 104 | for cnt in range(0, 6): 105 | print('Loss{0}_1 = {loss1.val:.8f} (ave = {loss1.avg:.8f})'.format(cnt + 1,loss1=losses_list[cnt])) 106 | print(time.strftime( 107 | '%Y-%m-%d %H:%M:%S -----------------------------------------------------------------------------------------------------------------\n', 108 | time.localtime())) 109 | 110 | batch_time.reset() 111 | data_time.reset() 112 | losses.reset() 113 | for cnt in range(12): 114 | losses_list[cnt].reset() 115 | 116 | if iters % 5000 == 0: 117 | torch.save({ 118 | 'iter': iters, 119 | 'state_dict': model.state_dict(), 120 | }, str(iters) + '.pth.tar') 121 | 122 | if iters == config.max_iter: 123 | break 124 | return 125 | 126 | if __name__ == '__main__': 127 | os.environ['CUDA_VISIBLE_DEVICES'] = '0' 128 | args = parse() 129 | model = construct_model(args) 130 | train_net(model, args) -------------------------------------------------------------------------------- /experiments/hourglass/config.yml: -------------------------------------------------------------------------------- 1 | workers: 6 2 | weight_decay: 0.0005 3 | momentum: 0.9 4 | display: 50 5 | max_iter: 160000 6 | batch_size: 10 7 | test_interval: 50 8 | topk: 3 9 | base_lr: 0.00025 10 | start_iters: 0 11 | best_model: 12345678.9 12 | #-------------lr_policy--------------------# 13 | lr_policy: 'multistep' 14 | policy_parameter: 15 | stepvalue: [100000, 150000, 200000] 16 | gamma: 0.1 -------------------------------------------------------------------------------- /experiments/hourglass/train_net.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | import os, sys 3 | sys.path.append('../../') 4 | import dataset_loader 5 | import torch 6 | import torch.nn as nn 7 | import torch.backends.cudnn as cudnn 8 | import util 9 | import cv2 10 | import argparse 11 | import models.hourglass 12 | import torchvision.transforms as transforms 13 | import time 14 | 15 | def parse(): 16 | parser = argparse.ArgumentParser() 17 | return parser.parse_args() 18 | 19 | def construct_model(args): 20 | model = models.hourglass.hg(num_stacks=2, num_blocks=1, num_classes=5) 21 | model.cuda() 22 | return model 23 | 24 | def train_net(model, args): 25 | 26 | ann_path = '../FashionAI/data/train/Annotations/train.csv' 27 | img_dir = '../FashionAI/data/train/' 28 | 29 | stride = 8 30 | cudnn.benchmark = True 31 | config = util.Config('./config.yml') 32 | 33 | train_loader = torch.utils.data.DataLoader( 34 | dataset_loader.dataset_loader(img_dir, ann_path, stride, 35 | transforms.ToTensor()), 36 | batch_size=config.batch_size, shuffle=True, 37 | num_workers=config.workers, pin_memory=True) 38 | 39 | criterion = nn.MSELoss().cuda() 40 | 41 | optimizer = torch.optim.SGD(model.parameters(), config.base_lr, momentum=config.momentum, 42 | weight_decay=config.weight_decay) 43 | model.train() 44 | iters = 0 45 | batch_time = util.AverageMeter() 46 | data_time = util.AverageMeter() 47 | losses = util.AverageMeter() 48 | losses_list = [util.AverageMeter() for i in range(12)] 49 | end = time.time() 50 | 51 | heat_weight = 48 * 48 * 25 / 2.0 # for convenient to compare with origin code 52 | # heat_weight = 1 53 | 54 | while iters < config.max_iter: 55 | for i, (input, heatmap) in enumerate(train_loader): 56 | learning_rate = util.adjust_learning_rate(optimizer, iters, config.base_lr, policy=config.lr_policy,\ 57 | policy_parameter=config.policy_parameter) 58 | data_time.update(time.time() - end) 59 | 60 | input = input.cuda(async=True) 61 | heatmap = heatmap.cuda(async=True) 62 | input_var = torch.autograd.Variable(input) 63 | heatmap_var = torch.autograd.Variable(heatmap) 64 | 65 | output = model(input_var) 66 | loss = criterion(output[0], heatmap_var)* heat_weight 67 | for j in range(1, len(output)): 68 | loss += criterion(output[j], heatmap_var) * heat_weight 69 | 70 | losses.update(loss.data[0], input.size(0)) 71 | loss_list = loss 72 | for cnt, l in enumerate(loss_list): 73 | losses_list[cnt].update(l.data[0], input.size(0)) 74 | 75 | optimizer.zero_grad() 76 | loss.backward() 77 | optimizer.step() 78 | batch_time.update(time.time() - end) 79 | end = time.time() 80 | 81 | 82 | iters += 1 83 | if iters % config.display == 0: 84 | print('Train Iteration: {0}\t' 85 | 'Time {batch_time.sum:.3f}s / {1}iters, ({batch_time.avg:.3f})\t' 86 | 'Data load {data_time.sum:.3f}s / {1}iters, ({data_time.avg:3f})\n' 87 | 'Learning rate = {2}\n' 88 | 'Loss = {loss.val:.8f} (ave = {loss.avg:.8f})\n'.format( 89 | iters, config.display, learning_rate, batch_time=batch_time, 90 | data_time=data_time, loss=losses)) 91 | for cnt in range(0, 1): 92 | print('Loss{0}_1 = {loss1.val:.8f} (ave = {loss1.avg:.8f})'.format(cnt + 1,loss1=losses_list[cnt])) 93 | print(time.strftime( 94 | '%Y-%m-%d %H:%M:%S -----------------------------------------------------------------------------------------------------------------\n', 95 | time.localtime())) 96 | 97 | batch_time.reset() 98 | data_time.reset() 99 | losses.reset() 100 | for cnt in range(12): 101 | losses_list[cnt].reset() 102 | 103 | if iters % 5000 == 0: 104 | torch.save({ 105 | 'iter': iters, 106 | 'state_dict': model.state_dict(), 107 | }, str(iters) + '.pth.tar') 108 | 109 | if iters == config.max_iter: 110 | break 111 | return 112 | 113 | if __name__ == '__main__': 114 | os.environ['CUDA_VISIBLE_DEVICES'] = '0' 115 | args = parse() 116 | model = construct_model(args) 117 | train_net(model, args) -------------------------------------------------------------------------------- /models/CPM.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | import torch 6 | import torch.nn as nn 7 | import os 8 | import sys 9 | import math 10 | import torchvision.models as models 11 | 12 | 13 | 14 | def make_net_dict(): 15 | 16 | feature = [{'conv1_1': [3, 64, 3, 1, 1]}, {'conv1_2': [64, 64, 3, 1, 1]}, {'pool1': [2, 2, 0]}, 17 | {'conv2_1': [64, 128, 3, 1, 1]}, {'conv2_2': [128, 128, 3, 1, 1]}, {'pool2': [2, 2, 0]}, 18 | {'conv3_1': [128, 256, 3, 1, 1]}, {'conv3_2': [256, 256, 3, 1, 1]}, {'conv3_3': [256, 256, 3, 1, 1]}, {'conv3_4': [256, 256, 3, 1, 1]}, {'pool3': [2, 2, 0]}, 19 | {'conv4_1': [256, 512, 3, 1, 1]}, {'conv4_2': [512, 512, 3, 1, 1]}, {'conv4_3_cpm': [512, 256, 3, 1, 1]}, {'conv4_4_cpm': [256, 128, 3, 1, 1]}] 20 | 21 | 22 | block1 = [{'conv5_1_CPM': [128, 128, 3, 1, 1]},{'conv5_2_CPM': [128, 128, 3, 1, 1]},{'conv5_3_CPM': [128, 128, 3, 1, 1]}, 23 | {'conv5_4_CPM': [128, 512, 1, 1, 0]}] 24 | 25 | 26 | block2 = [{'Mconv1': [128+25, 128, 7, 1, 3]}, {'Mconv2': [128, 128, 7, 1, 3]}, 27 | {'Mconv3': [128, 128, 7, 1, 3]},{'Mconv4': [128, 128, 7, 1, 3]}, 28 | {'Mconv5': [128, 128, 7, 1, 3]}, 29 | {'Mconv6': [128, 128, 1, 1, 0]} 30 | ] 31 | predict_layers_stage1 = [{'predict_L1': [512, 25, 1, 1, 0]}] 32 | 33 | predict_layers_stageN = [{'predict_L1': [128, 25, 1, 1, 0]}] 34 | 35 | net_dict = [feature,block1,predict_layers_stage1,block2,predict_layers_stageN] 36 | 37 | return net_dict 38 | 39 | 40 | class CPM(nn.Module): 41 | 42 | def __init__(self, net_dict, batch_norm=False): 43 | 44 | super(CPM, self).__init__() 45 | 46 | self.feature = self._make_layer(net_dict[0]) 47 | 48 | self.block = self._make_layer(net_dict[1]) 49 | 50 | self.predict = self._make_layer(net_dict[2]) 51 | 52 | # repeate 53 | self.block_stage2 = self._make_layer(net_dict[3]) 54 | 55 | self.predict_stage2 = self._make_layer(net_dict[4]) 56 | 57 | self.block_stage3 = self._make_layer(net_dict[3]) 58 | 59 | self.predict_stage3 = self._make_layer(net_dict[4]) 60 | 61 | self.block_stage4 = self._make_layer(net_dict[3]) 62 | 63 | self.predict_stage4 = self._make_layer(net_dict[4]) 64 | 65 | self.block_stage5 = self._make_layer(net_dict[3]) 66 | 67 | self.predict_stage5 = self._make_layer(net_dict[4]) 68 | 69 | self.block_stage6 = self._make_layer(net_dict[3]) 70 | 71 | self.predict_stage6 = self._make_layer(net_dict[4]) 72 | 73 | self._init_weights() 74 | 75 | def _init_weights(self): 76 | for m in self.modules(): 77 | if isinstance(m, nn.Conv2d): 78 | m.weight.data.normal_(0, 0.01) 79 | if m.bias is not None: 80 | m.bias.data.zero_() 81 | 82 | def _make_layer(self, net_dict, batch_norm=False): 83 | layers = [] 84 | length = len(net_dict) 85 | for i in range(length): 86 | one_layer = net_dict[i] 87 | key = one_layer.keys()[0] 88 | v = one_layer[key] 89 | 90 | if 'pool' in key: 91 | layers += [nn.MaxPool2d(kernel_size=v[0], stride=v[1], padding=v[2])] 92 | elif 'predict' in key: 93 | conv2d = nn.Conv2d(in_channels=v[0], out_channels=v[1], kernel_size=v[2], stride=v[3], padding=v[4]) 94 | layers += [conv2d] 95 | else: 96 | conv2d = nn.Conv2d(in_channels=v[0], out_channels=v[1], kernel_size=v[2], stride=v[3], padding=v[4]) 97 | if batch_norm: 98 | layers += [conv2d, nn.BatchNorm2d(v[1]), nn.ReLU(inplace=True)] 99 | else: 100 | layers += [conv2d, nn.ReLU(inplace=True)] 101 | 102 | return nn.Sequential(*layers) 103 | 104 | def forward(self, x): 105 | # define forward flow 106 | feature = self.feature(x) 107 | 108 | out_stage1 = self.block(feature) 109 | L1_stage1 = self.predict(out_stage1) 110 | 111 | 112 | concat_stage2 = torch.cat([L1_stage1, feature], 1) 113 | out_stage2 = self.block_stage2(concat_stage2) 114 | L1_stage2 = self.predict_stage2(out_stage2) 115 | 116 | concat_stage3 = torch.cat([L1_stage2, feature], 1) 117 | out_stage3 = self.block_stage3(concat_stage3) 118 | L1_stage3 = self.predict_stage3(out_stage3) 119 | 120 | 121 | concat_stage4 = torch.cat([L1_stage3, feature], 1) 122 | out_stage4 = self.block_stage4(concat_stage4) 123 | L1_stage4 = self.predict_stage4(out_stage4) 124 | 125 | concat_stage5 = torch.cat([L1_stage4, feature], 1) 126 | out_stage5 = self.block_stage5(concat_stage5) 127 | L1_stage5 = self.predict_stage5(out_stage5) 128 | 129 | concat_stage6 = torch.cat([L1_stage5, feature], 1) 130 | out_stage6 = self.block_stage6(concat_stage6) 131 | L1_stage6 = self.predict_stage6(out_stage6) 132 | 133 | return L1_stage1, L1_stage2, L1_stage3, L1_stage4, L1_stage5, L1_stage6 134 | 135 | def PoseModel(num_point, num_stages=6, batch_norm=False, pretrained=False): 136 | net_dict = make_net_dict() 137 | model = CPM(net_dict, batch_norm) 138 | 139 | if pretrained: 140 | parameter_num = 10 141 | if batch_norm: 142 | vgg19 = models.vgg19_bn(pretrained=True) 143 | parameter_num *= 6 144 | else: 145 | vgg19 = models.vgg19(pretrained=True) 146 | parameter_num *= 2 147 | 148 | vgg19_state_dict = vgg19.state_dict() 149 | vgg19_keys = vgg19_state_dict.keys() 150 | 151 | model_dict = model.state_dict() 152 | from collections import OrderedDict 153 | weights_load = OrderedDict() 154 | 155 | for i in range(parameter_num): 156 | weights_load[model.state_dict().keys()[i]] = vgg19_state_dict[vgg19_keys[i]] 157 | model_dict.update(weights_load) 158 | model.load_state_dict(model_dict) 159 | 160 | return model 161 | 162 | 163 | if __name__ == '__main__': 164 | print(PoseModel(25, 6, batch_norm=False)) 165 | -------------------------------------------------------------------------------- /models/CPM_FPN.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | import torch 6 | import torch.nn as nn 7 | import math 8 | import torch.nn.functional as F 9 | 10 | ############################################################ 11 | # ResNet 12 | ############################################################ 13 | def conv3x3(in_planes, out_planes, stride=1): 14 | "3x3 convolution with padding" 15 | return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, 16 | padding=1, bias=False) 17 | 18 | class BasicBlock(nn.Module): 19 | expansion = 1 20 | 21 | def __init__(self, inplanes, planes, stride=1, downsample=None): 22 | super(BasicBlock, self).__init__() 23 | self.conv1 = conv3x3(inplanes, planes, stride) 24 | self.bn1 = nn.BatchNorm2d(planes) 25 | self.relu = nn.ReLU(inplace=True) 26 | self.conv2 = conv3x3(planes, planes) 27 | self.bn2 = nn.BatchNorm2d(planes) 28 | self.downsample = downsample 29 | self.stride = stride 30 | 31 | def forward(self, x): 32 | residual = x 33 | 34 | out = self.conv1(x) 35 | out = self.bn1(out) 36 | out = self.relu(out) 37 | 38 | out = self.conv2(out) 39 | out = self.bn2(out) 40 | 41 | if self.downsample is not None: 42 | residual = self.downsample(x) 43 | 44 | out += residual 45 | out = self.relu(out) 46 | 47 | return out 48 | 49 | 50 | class Bottleneck(nn.Module): 51 | expansion = 4 52 | 53 | def __init__(self, inplanes, planes, stride=1, downsample=None): 54 | super(Bottleneck, self).__init__() 55 | self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False) 56 | self.bn1 = nn.BatchNorm2d(planes) 57 | self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, 58 | padding=1, bias=False) 59 | self.bn2 = nn.BatchNorm2d(planes) 60 | self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False) 61 | self.bn3 = nn.BatchNorm2d(planes * 4) 62 | self.relu = nn.ReLU(inplace=True) 63 | self.downsample = downsample 64 | self.stride = stride 65 | 66 | def forward(self, x): 67 | residual = x 68 | 69 | out = self.conv1(x) 70 | out = self.bn1(out) 71 | out = self.relu(out) 72 | 73 | out = self.conv2(out) 74 | out = self.bn2(out) 75 | out = self.relu(out) 76 | 77 | out = self.conv3(out) 78 | out = self.bn3(out) 79 | 80 | if self.downsample is not None: 81 | residual = self.downsample(x) 82 | 83 | out += residual 84 | out = self.relu(out) 85 | 86 | return out 87 | 88 | 89 | class ResNet(nn.Module): 90 | 91 | def __init__(self, block, layers, num_classes=1000): 92 | self.inplanes = 64 93 | super(ResNet, self).__init__() 94 | self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, 95 | bias=False) 96 | self.bn1 = nn.BatchNorm2d(64) 97 | self.relu = nn.ReLU(inplace=True) 98 | self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) 99 | self.layer1 = self._make_layer(block, 64, layers[0]) 100 | self.layer2 = self._make_layer(block, 128, layers[1], stride=2) 101 | self.layer3 = self._make_layer(block, 256, layers[2], stride=2) 102 | self.layer4 = self._make_layer(block, 512, layers[3], stride=2) 103 | 104 | for m in self.modules(): 105 | if isinstance(m, nn.Conv2d): 106 | n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels 107 | m.weight.data.normal_(0, math.sqrt(2. / n)) 108 | elif isinstance(m, nn.BatchNorm2d): 109 | m.weight.data.fill_(1) 110 | m.bias.data.zero_() 111 | 112 | def _make_layer(self, block, planes, blocks, stride=1): 113 | downsample = None 114 | if stride != 1 or self.inplanes != planes * block.expansion: 115 | downsample = nn.Sequential( 116 | nn.Conv2d(self.inplanes, planes * block.expansion, 117 | kernel_size=1, stride=stride, bias=False), 118 | nn.BatchNorm2d(planes * block.expansion), 119 | ) 120 | 121 | layers = [] 122 | layers.append(block(self.inplanes, planes, stride, downsample)) 123 | self.inplanes = planes * block.expansion 124 | for i in range(1, blocks): 125 | layers.append(block(self.inplanes, planes)) 126 | 127 | return nn.Sequential(*layers) 128 | 129 | def load_weights(self, path): 130 | model_dict = self.state_dict() 131 | print('loading model from {}'.format(path)) 132 | try: 133 | #state_dict = torch.load(self.path) 134 | # self.load_state_dict({k: v for k, v in state_dict.items() if k in self.state_dict()}) 135 | pretrained_dict = torch.load(path) 136 | from collections import OrderedDict 137 | tmp = OrderedDict() 138 | for k,v in pretrained_dict.items(): 139 | if k in model_dict: 140 | tmp[k] = v 141 | # pretrained_dict = {k: v for k, v in pretrained_dict.items() if k in model_dict} 142 | # model_dict.update(pretrained_dict) 143 | model_dict.update(tmp) 144 | self.load_state_dict(model_dict) 145 | except: 146 | print ('loading model failed, {} may not exist'.format(path)) 147 | 148 | def forward(self, x): 149 | x = self.conv1(x) 150 | x = self.bn1(x) 151 | x = self.relu(x) 152 | C1 = self.maxpool(x) 153 | 154 | C2 = self.layer1(C1) 155 | C3 = self.layer2(C2) 156 | C4 = self.layer3(C3) 157 | C5 = self.layer4(C4) 158 | 159 | return C1, C2, C3, C4, C5 160 | 161 | ############################################################ 162 | # FPN Graph 163 | ############################################################ 164 | 165 | class FPN(nn.Module): # xavier_fill as default 166 | def __init__(self, out_channels): 167 | super(FPN, self).__init__() 168 | self.out_channels = out_channels 169 | self.P6 = nn.MaxPool2d(kernel_size=1, stride=2, padding=0, ceil_mode=False) 170 | self.P5_conv1 = nn.Conv2d(2048, self.out_channels, kernel_size=1, stride=1) 171 | 172 | self.P4_conv1 = nn.Conv2d(1024, self.out_channels, kernel_size=1, stride=1) 173 | self.P4_conv2 = nn.Conv2d(self.out_channels, self.out_channels, kernel_size=3, stride=1, padding=1) 174 | 175 | self.P3_conv1 = nn.Conv2d(512, self.out_channels, kernel_size=1, stride=1) 176 | self.P3_conv2 = nn.Conv2d(self.out_channels, self.out_channels, kernel_size=3, stride=1, padding=1) 177 | self.P2_conv1 = nn.Conv2d(256, self.out_channels, kernel_size=1, stride=1) 178 | self.P2_conv2 = nn.Conv2d(self.out_channels, self.out_channels, kernel_size=3, stride=1, padding=1) 179 | 180 | def forward(self, C1, C2, C3 ,C4, C5): 181 | 182 | p5_out = self.P5_conv1(C5) 183 | 184 | p4_out = torch.add(self.P4_conv1(C4), F.upsample_nearest(p5_out, scale_factor=2)) 185 | p3_out = torch.add(self.P3_conv1(C3), F.upsample_nearest(p4_out, scale_factor=2)) 186 | p2_out = torch.add(self.P2_conv1(C2), F.upsample_nearest(p3_out, scale_factor=2)) 187 | 188 | p4_out = self.P4_conv2(p4_out) 189 | p3_out = self.P3_conv2(p3_out) 190 | p2_out = self.P2_conv2(p2_out) 191 | 192 | # P6 is used for the 5th anchor scale in RPN. Generated by 193 | # subsampling from P5 with stride of 2. 194 | p6_out = self.P6(p5_out) 195 | 196 | return p2_out, p3_out, p4_out, p5_out, p6_out 197 | 198 | 199 | ############################################################ 200 | # Pose Estimation Graph 201 | ############################################################ 202 | 203 | class pose_estimation(nn.Module): 204 | def __init__(self, class_num, pretrain=True): 205 | super(pose_estimation, self).__init__() 206 | self.resnet = ResNet(Bottleneck, [3, 4, 6, 3]) # resnet50 207 | if pretrain == True: 208 | self.model_path = '/data/xiaobing.wang/.torch/models/resnet50-19c8e357.pth' 209 | self.resnet.load_weights(self.model_path) 210 | self.apply_fix() 211 | self.out_channels = 256 212 | self.fpn = FPN(self.out_channels) 213 | 214 | # self.block = nn.Sequential(nn.Conv2d(256, 128, 1, 1, 0), nn.ReLU(inplace=True), 215 | # nn.Conv2d(128, 128, 3, 1, 1), nn.ReLU(inplace=True), 216 | # nn.Conv2d(128, 128, 3, 1, 1), nn.ReLU(inplace=True), 217 | # nn.Conv2d(128, 128, 3, 1, 1), nn.ReLU(inplace=True), 218 | # nn.Conv2d(128, 256, 3, 1, 1), nn.ReLU(inplace=True) 219 | # ) 220 | # self._init_weights(self.block) 221 | 222 | self.predict = nn.Sequential(nn.Conv2d(768, 128, 3, 1, 1), nn.ReLU(inplace=True), 223 | nn.Conv2d(128, 128, 3, 1, 1), nn.ReLU(inplace=True), 224 | nn.Conv2d(128, 128, 3, 1, 1), nn.ReLU(inplace=True), 225 | nn.Conv2d(128, 128, 3, 1, 1), nn.ReLU(inplace=True), 226 | nn.Conv2d(128, 25, 3, 1, 1)) 227 | self._init_weights(self.predict) 228 | 229 | 230 | 231 | def _gaussian_init_conv(self, conv): 232 | if isinstance(conv, nn.Conv2d): 233 | conv.weight.data.normal_(0, 0.01) 234 | if conv.bias is not None: 235 | conv.bias.data.zero_() 236 | 237 | def _init_weights(self, model): 238 | for m in model: 239 | if isinstance(m, nn.Conv2d): 240 | m.weight.data.normal_(0, 0.01) 241 | if m.bias is not None: 242 | m.bias.data.zero_() 243 | 244 | 245 | def apply_fix(self): 246 | # 1. fix bn 247 | # 2. fix conv1 conv2 248 | for param in self.resnet.conv1.parameters(): 249 | param.requires_grad = False 250 | for param in self.resnet.layer1.parameters(): 251 | param.requires_grad = False 252 | 253 | def forward(self, x): 254 | C1, C2, C3, C4, C5 = self.resnet(x) 255 | P2, P3, P4, P5, P6 = self.fpn(C1, C2, C3, C4, C5) 256 | 257 | P4_x2 = F.upsample(P4, scale_factor=2) 258 | P5_x4 = F.upsample(P5, scale_factor=4) 259 | featuer_cat = torch.cat([P3, P4_x2, P5_x4],1) 260 | out = self.predict(featuer_cat) 261 | return out 262 | -------------------------------------------------------------------------------- /models/CPM_ResNet.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | import torch 6 | import torch.nn as nn 7 | import math 8 | import torch.nn.functional as F 9 | 10 | ############################################################ 11 | # ResNet 12 | ############################################################ 13 | def conv3x3(in_planes, out_planes, stride=1): 14 | "3x3 convolution with padding" 15 | return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, 16 | padding=1, bias=False) 17 | 18 | class BasicBlock(nn.Module): 19 | expansion = 1 20 | 21 | def __init__(self, inplanes, planes, stride=1, downsample=None): 22 | super(BasicBlock, self).__init__() 23 | self.conv1 = conv3x3(inplanes, planes, stride) 24 | self.bn1 = nn.BatchNorm2d(planes) 25 | self.relu = nn.ReLU(inplace=True) 26 | self.conv2 = conv3x3(planes, planes) 27 | self.bn2 = nn.BatchNorm2d(planes) 28 | self.downsample = downsample 29 | self.stride = stride 30 | 31 | def forward(self, x): 32 | residual = x 33 | 34 | out = self.conv1(x) 35 | out = self.bn1(out) 36 | out = self.relu(out) 37 | 38 | out = self.conv2(out) 39 | out = self.bn2(out) 40 | 41 | if self.downsample is not None: 42 | residual = self.downsample(x) 43 | 44 | out += residual 45 | out = self.relu(out) 46 | 47 | return out 48 | 49 | 50 | class Bottleneck(nn.Module): 51 | expansion = 4 52 | 53 | def __init__(self, inplanes, planes, stride=1, downsample=None): 54 | super(Bottleneck, self).__init__() 55 | self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False) 56 | self.bn1 = nn.BatchNorm2d(planes) 57 | self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, 58 | padding=1, bias=False) 59 | self.bn2 = nn.BatchNorm2d(planes) 60 | self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False) 61 | self.bn3 = nn.BatchNorm2d(planes * 4) 62 | self.relu = nn.ReLU(inplace=True) 63 | self.downsample = downsample 64 | self.stride = stride 65 | 66 | def forward(self, x): 67 | residual = x 68 | 69 | out = self.conv1(x) 70 | out = self.bn1(out) 71 | out = self.relu(out) 72 | 73 | out = self.conv2(out) 74 | out = self.bn2(out) 75 | out = self.relu(out) 76 | 77 | out = self.conv3(out) 78 | out = self.bn3(out) 79 | 80 | if self.downsample is not None: 81 | residual = self.downsample(x) 82 | 83 | out += residual 84 | out = self.relu(out) 85 | 86 | return out 87 | 88 | 89 | class ResNet(nn.Module): 90 | 91 | def __init__(self, block, layers, num_classes=1000): 92 | self.inplanes = 64 93 | super(ResNet, self).__init__() 94 | self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, 95 | bias=False) 96 | self.bn1 = nn.BatchNorm2d(64) 97 | self.relu = nn.ReLU(inplace=True) 98 | self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) 99 | self.layer1 = self._make_layer(block, 64, layers[0]) 100 | self.layer2 = self._make_layer(block, 128, layers[1], stride=2) 101 | self.layer3 = self._make_layer(block, 256, layers[2], stride=2) 102 | self.layer4 = self._make_layer(block, 512, layers[3], stride=2) 103 | 104 | for m in self.modules(): 105 | if isinstance(m, nn.Conv2d): 106 | n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels 107 | m.weight.data.normal_(0, math.sqrt(2. / n)) 108 | elif isinstance(m, nn.BatchNorm2d): 109 | m.weight.data.fill_(1) 110 | m.bias.data.zero_() 111 | 112 | def _make_layer(self, block, planes, blocks, stride=1): 113 | downsample = None 114 | if stride != 1 or self.inplanes != planes * block.expansion: 115 | downsample = nn.Sequential( 116 | nn.Conv2d(self.inplanes, planes * block.expansion, 117 | kernel_size=1, stride=stride, bias=False), 118 | nn.BatchNorm2d(planes * block.expansion), 119 | ) 120 | 121 | layers = [] 122 | layers.append(block(self.inplanes, planes, stride, downsample)) 123 | self.inplanes = planes * block.expansion 124 | for i in range(1, blocks): 125 | layers.append(block(self.inplanes, planes)) 126 | 127 | return nn.Sequential(*layers) 128 | 129 | def load_weights(self, path): 130 | model_dict = self.state_dict() 131 | print('loading model from {}'.format(path)) 132 | try: 133 | #state_dict = torch.load(self.path) 134 | # self.load_state_dict({k: v for k, v in state_dict.items() if k in self.state_dict()}) 135 | pretrained_dict = torch.load(path) 136 | from collections import OrderedDict 137 | tmp = OrderedDict() 138 | for k,v in pretrained_dict.items(): 139 | if k in model_dict: 140 | tmp[k] = v 141 | # pretrained_dict = {k: v for k, v in pretrained_dict.items() if k in model_dict} 142 | # model_dict.update(pretrained_dict) 143 | model_dict.update(tmp) 144 | self.load_state_dict(model_dict) 145 | except: 146 | print ('loading model failed, {} may not exist'.format(path)) 147 | 148 | def forward(self, x): 149 | x = self.conv1(x) 150 | x = self.bn1(x) 151 | x = self.relu(x) 152 | C1 = self.maxpool(x) 153 | 154 | C2 = self.layer1(C1) 155 | C3 = self.layer2(C2) 156 | C4 = self.layer3(C3) 157 | C5 = self.layer4(C4) 158 | 159 | return C1, C2, C3, C4, C5 160 | 161 | ############################################################ 162 | # Pose Estimation Graph 163 | ############################################################ 164 | 165 | class pose_estimation(nn.Module): 166 | def __init__(self, class_num, pretrain=True): 167 | super(pose_estimation, self).__init__() 168 | self.resnet = ResNet(Bottleneck, [3, 4, 6, 3]) # resnet50 169 | if pretrain == True: 170 | self.model_path = '/data/xiaobing.wang/.torch/models/resnet50-19c8e357.pth' 171 | self.resnet.load_weights(self.model_path) 172 | self.apply_fix() 173 | self.reduce_C4 = nn.Sequential(nn.Conv2d(1024, 256, 1, 1, 0)) 174 | self.reduce_C5 = nn.Sequential(nn.Conv2d(2048, 256, 1, 1, 0)) 175 | 176 | self.block = nn.Sequential(nn.Conv2d(256, 128, 1, 1, 0),nn.ReLU(inplace=True), 177 | nn.Conv2d(128, 128, 3, 1, 1), nn.ReLU(inplace=True), 178 | nn.Conv2d(128, 128, 3, 1, 1), nn.ReLU(inplace=True), 179 | nn.Conv2d(128, 128, 3, 1, 1), nn.ReLU(inplace=True), 180 | nn.Conv2d(128, 512, 3, 1, 1), nn.ReLU(inplace=True) 181 | ) 182 | self._init_weights(self.block) 183 | self.predict = nn.Conv2d(512, 25, 1, 1, 0) 184 | 185 | def _init_weights(self, model): 186 | for m in model: 187 | if isinstance(m, nn.Conv2d): 188 | m.weight.data.normal_(0, 0.01) 189 | if m.bias is not None: 190 | m.bias.data.zero_() 191 | def apply_fix(self): 192 | # 1. fix bn 193 | # 2. fix conv1 conv2 194 | for param in self.resnet.conv1.parameters(): 195 | param.requires_grad = False 196 | for param in self.resnet.layer1.parameters(): 197 | param.requires_grad = False 198 | 199 | 200 | def forward(self, x): 201 | C1, C2, C3, C4, C5 = self.resnet(x) 202 | C4 = self.reduce_C4(C4) 203 | C4 = F.upsample(C4, scale_factor=2) 204 | 205 | C5 = self.reduce_C5(C5) 206 | C5 = F.upsample(C5, scale_factor=4) 207 | 208 | P4 = C5 + C4 209 | 210 | out = self.block(P4) 211 | predict = self.predict(out) 212 | return predict -------------------------------------------------------------------------------- /models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Xiangyu-CAS/FashionAI_Keypoints/dab6cbd975ba6071b070fb7da2fb163d01e2e2e4/models/__init__.py -------------------------------------------------------------------------------- /models/bk/CPM.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import os 4 | import sys 5 | import math 6 | import torchvision.models as models 7 | 8 | def make_net_dict(): 9 | 10 | feature = [{'conv1_1': [3, 64, 3, 1, 1]}, {'conv1_2': [64, 64, 3, 1, 1]}, {'pool1': [2, 2, 0]}, 11 | {'conv2_1': [64, 128, 3, 1, 1]}, {'conv2_2': [128, 128, 3, 1, 1]}, {'pool2': [2, 2, 0]}, 12 | {'conv3_1': [128, 256, 3, 1, 1]}, {'conv3_2': [256, 256, 3, 1, 1]}, {'conv3_3': [256, 256, 3, 1, 1]}, {'conv3_4': [256, 256, 3, 1, 1]}, {'pool3': [2, 2, 0]}, 13 | {'conv4_1': [256, 512, 3, 1, 1]}, {'conv4_2': [512, 512, 3, 1, 1]}, {'conv4_3_cpm': [512, 256, 3, 1, 1]}, {'conv4_4_cpm': [256, 128, 3, 1, 1]}] 14 | 15 | 16 | block1 = [{'conv5_1_CPM': [128, 128, 3, 1, 1]},{'conv5_2_CPM': [128, 128, 3, 1, 1]},{'conv5_3_CPM': [128, 128, 3, 1, 1]}, 17 | {'conv5_4_CPM': [128, 512, 1, 1, 0]}] 18 | 19 | 20 | block2 = [{'Mconv1': [128+25, 128, 7, 1, 3]}, {'Mconv2': [128, 128, 7, 1, 3]}, 21 | {'Mconv3': [128, 128, 7, 1, 3]},{'Mconv4': [128, 128, 7, 1, 3]}, 22 | {'Mconv5': [128, 128, 7, 1, 3]}, 23 | {'Mconv6': [128, 128, 1, 1, 0]} 24 | ] 25 | predict_layers_stage1 = [{'predict_L1': [512, 25, 1, 1, 0]}] 26 | 27 | predict_layers_stageN = [{'predict_L1': [128, 25, 1, 1, 0]}] 28 | 29 | net_dict = [feature,block1,predict_layers_stage1,block2,predict_layers_stageN] 30 | 31 | return net_dict 32 | 33 | 34 | class CPM(nn.Module): 35 | 36 | def __init__(self, net_dict, batch_norm=False): 37 | 38 | super(CPM, self).__init__() 39 | 40 | self.feature = self._make_layer(net_dict[0]) 41 | 42 | self.block = self._make_layer(net_dict[1]) 43 | 44 | self.predict = self._make_layer(net_dict[2]) 45 | 46 | # repeate 47 | self.block_stage2 = self._make_layer(net_dict[3]) 48 | 49 | self.predict_stage2 = self._make_layer(net_dict[4]) 50 | 51 | self.block_stage3 = self._make_layer(net_dict[3]) 52 | 53 | self.predict_stage3 = self._make_layer(net_dict[4]) 54 | 55 | self.block_stage4 = self._make_layer(net_dict[3]) 56 | 57 | self.predict_stage4 = self._make_layer(net_dict[4]) 58 | 59 | self.block_stage5 = self._make_layer(net_dict[3]) 60 | 61 | self.predict_stage5 = self._make_layer(net_dict[4]) 62 | 63 | self.block_stage6 = self._make_layer(net_dict[3]) 64 | 65 | self.predict_stage6 = self._make_layer(net_dict[4]) 66 | 67 | self._init_weights() 68 | 69 | def _init_weights(self): 70 | for m in self.modules(): 71 | if isinstance(m, nn.Conv2d): 72 | m.weight.data.normal_(0, 0.01) 73 | if m.bias is not None: 74 | m.bias.data.zero_() 75 | 76 | def _make_layer(self, net_dict, batch_norm=False): 77 | layers = [] 78 | length = len(net_dict) 79 | for i in range(length): 80 | one_layer = net_dict[i] 81 | key = one_layer.keys()[0] 82 | v = one_layer[key] 83 | 84 | if 'pool' in key: 85 | layers += [nn.MaxPool2d(kernel_size=v[0], stride=v[1], padding=v[2])] 86 | elif 'predict' in key: 87 | conv2d = nn.Conv2d(in_channels=v[0], out_channels=v[1], kernel_size=v[2], stride=v[3], padding=v[4]) 88 | layers += [conv2d] 89 | else: 90 | conv2d = nn.Conv2d(in_channels=v[0], out_channels=v[1], kernel_size=v[2], stride=v[3], padding=v[4]) 91 | if batch_norm: 92 | layers += [conv2d, nn.BatchNorm2d(v[1]), nn.ReLU(inplace=True)] 93 | else: 94 | layers += [conv2d, nn.ReLU(inplace=True)] 95 | 96 | return nn.Sequential(*layers) 97 | 98 | def forward(self, x): 99 | # define forward flow 100 | feature = self.feature(x) 101 | 102 | out_stage1 = self.block(feature) 103 | L1_stage1 = self.predict(out_stage1) 104 | 105 | 106 | concat_stage2 = torch.cat([L1_stage1, feature], 1) 107 | out_stage2 = self.block_stage2(concat_stage2) 108 | L1_stage2 = self.predict_stage2(out_stage2) 109 | 110 | concat_stage3 = torch.cat([L1_stage2, feature], 1) 111 | out_stage3 = self.block_stage3(concat_stage3) 112 | L1_stage3 = self.predict_stage3(out_stage3) 113 | 114 | 115 | concat_stage4 = torch.cat([L1_stage3, feature], 1) 116 | out_stage4 = self.block_stage4(concat_stage4) 117 | L1_stage4 = self.predict_stage4(out_stage4) 118 | 119 | concat_stage5 = torch.cat([L1_stage4, feature], 1) 120 | out_stage5 = self.block_stage5(concat_stage5) 121 | L1_stage5 = self.predict_stage5(out_stage5) 122 | 123 | concat_stage6 = torch.cat([L1_stage5, feature], 1) 124 | out_stage6 = self.block_stage6(concat_stage6) 125 | L1_stage6 = self.predict_stage6(out_stage6) 126 | 127 | return L1_stage1, L1_stage2, L1_stage3, L1_stage4, L1_stage5, L1_stage6 128 | 129 | def PoseModel(num_point, num_stages=6, batch_norm=False, pretrained=False): 130 | net_dict = make_net_dict() 131 | model = CPM(net_dict, batch_norm) 132 | 133 | if pretrained: 134 | parameter_num = 10 135 | if batch_norm: 136 | vgg19 = models.vgg19_bn(pretrained=True) 137 | parameter_num *= 6 138 | else: 139 | vgg19 = models.vgg19(pretrained=True) 140 | parameter_num *= 2 141 | 142 | vgg19_state_dict = vgg19.state_dict() 143 | vgg19_keys = vgg19_state_dict.keys() 144 | 145 | model_dict = model.state_dict() 146 | from collections import OrderedDict 147 | weights_load = OrderedDict() 148 | 149 | for i in range(parameter_num): 150 | weights_load[model.state_dict().keys()[i]] = vgg19_state_dict[vgg19_keys[i]] 151 | model_dict.update(weights_load) 152 | model.load_state_dict(model_dict) 153 | 154 | return model 155 | 156 | 157 | if __name__ == '__main__': 158 | print PoseModel(25, 6, batch_norm=False) 159 | -------------------------------------------------------------------------------- /models/bk/CPM_FPN.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | import torch 6 | import torch.nn as nn 7 | import math 8 | import torch.nn.functional as F 9 | 10 | ############################################################ 11 | # ResNet 12 | ############################################################ 13 | def conv3x3(in_planes, out_planes, stride=1): 14 | "3x3 convolution with padding" 15 | return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, 16 | padding=1, bias=False) 17 | 18 | class BasicBlock(nn.Module): 19 | expansion = 1 20 | 21 | def __init__(self, inplanes, planes, stride=1, downsample=None): 22 | super(BasicBlock, self).__init__() 23 | self.conv1 = conv3x3(inplanes, planes, stride) 24 | self.bn1 = nn.BatchNorm2d(planes) 25 | self.relu = nn.ReLU(inplace=True) 26 | self.conv2 = conv3x3(planes, planes) 27 | self.bn2 = nn.BatchNorm2d(planes) 28 | self.downsample = downsample 29 | self.stride = stride 30 | 31 | def forward(self, x): 32 | residual = x 33 | 34 | out = self.conv1(x) 35 | out = self.bn1(out) 36 | out = self.relu(out) 37 | 38 | out = self.conv2(out) 39 | out = self.bn2(out) 40 | 41 | if self.downsample is not None: 42 | residual = self.downsample(x) 43 | 44 | out += residual 45 | out = self.relu(out) 46 | 47 | return out 48 | 49 | 50 | class Bottleneck(nn.Module): 51 | expansion = 4 52 | 53 | def __init__(self, inplanes, planes, stride=1, downsample=None): 54 | super(Bottleneck, self).__init__() 55 | self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False) 56 | self.bn1 = nn.BatchNorm2d(planes) 57 | self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, 58 | padding=1, bias=False) 59 | self.bn2 = nn.BatchNorm2d(planes) 60 | self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False) 61 | self.bn3 = nn.BatchNorm2d(planes * 4) 62 | self.relu = nn.ReLU(inplace=True) 63 | self.downsample = downsample 64 | self.stride = stride 65 | 66 | def forward(self, x): 67 | residual = x 68 | 69 | out = self.conv1(x) 70 | out = self.bn1(out) 71 | out = self.relu(out) 72 | 73 | out = self.conv2(out) 74 | out = self.bn2(out) 75 | out = self.relu(out) 76 | 77 | out = self.conv3(out) 78 | out = self.bn3(out) 79 | 80 | if self.downsample is not None: 81 | residual = self.downsample(x) 82 | 83 | out += residual 84 | out = self.relu(out) 85 | 86 | return out 87 | 88 | 89 | class ResNet(nn.Module): 90 | 91 | def __init__(self, block, layers, num_classes=1000): 92 | self.inplanes = 64 93 | super(ResNet, self).__init__() 94 | self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, 95 | bias=False) 96 | self.bn1 = nn.BatchNorm2d(64) 97 | self.relu = nn.ReLU(inplace=True) 98 | self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) 99 | self.layer1 = self._make_layer(block, 64, layers[0]) 100 | self.layer2 = self._make_layer(block, 128, layers[1], stride=2) 101 | self.layer3 = self._make_layer(block, 256, layers[2], stride=2) 102 | self.layer4 = self._make_layer(block, 512, layers[3], stride=2) 103 | 104 | for m in self.modules(): 105 | if isinstance(m, nn.Conv2d): 106 | n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels 107 | m.weight.data.normal_(0, math.sqrt(2. / n)) 108 | elif isinstance(m, nn.BatchNorm2d): 109 | m.weight.data.fill_(1) 110 | m.bias.data.zero_() 111 | 112 | def _make_layer(self, block, planes, blocks, stride=1): 113 | downsample = None 114 | if stride != 1 or self.inplanes != planes * block.expansion: 115 | downsample = nn.Sequential( 116 | nn.Conv2d(self.inplanes, planes * block.expansion, 117 | kernel_size=1, stride=stride, bias=False), 118 | nn.BatchNorm2d(planes * block.expansion), 119 | ) 120 | 121 | layers = [] 122 | layers.append(block(self.inplanes, planes, stride, downsample)) 123 | self.inplanes = planes * block.expansion 124 | for i in range(1, blocks): 125 | layers.append(block(self.inplanes, planes)) 126 | 127 | return nn.Sequential(*layers) 128 | 129 | def load_weights(self, path): 130 | model_dict = self.state_dict() 131 | print('loading model from {}'.format(path)) 132 | try: 133 | #state_dict = torch.load(self.path) 134 | # self.load_state_dict({k: v for k, v in state_dict.items() if k in self.state_dict()}) 135 | pretrained_dict = torch.load(path) 136 | from collections import OrderedDict 137 | tmp = OrderedDict() 138 | for k,v in pretrained_dict.items(): 139 | if k in model_dict: 140 | tmp[k] = v 141 | # pretrained_dict = {k: v for k, v in pretrained_dict.items() if k in model_dict} 142 | # model_dict.update(pretrained_dict) 143 | model_dict.update(tmp) 144 | self.load_state_dict(model_dict) 145 | except: 146 | print ('loading model failed, {} may not exist'.format(path)) 147 | 148 | def forward(self, x): 149 | x = self.conv1(x) 150 | x = self.bn1(x) 151 | x = self.relu(x) 152 | C1 = self.maxpool(x) 153 | 154 | C2 = self.layer1(C1) 155 | C3 = self.layer2(C2) 156 | C4 = self.layer3(C3) 157 | C5 = self.layer4(C4) 158 | 159 | return C1, C2, C3, C4, C5 160 | 161 | ############################################################ 162 | # FPN Graph 163 | ############################################################ 164 | 165 | class FPN(nn.Module): 166 | def __init__(self, out_channels): 167 | super(FPN, self).__init__() 168 | self.out_channels = out_channels 169 | self.P6 = nn.MaxPool2d(kernel_size=1, stride=2, padding=0, ceil_mode=False) 170 | self.P5_conv1 = nn.Conv2d(2048, self.out_channels, kernel_size=1, stride=1) 171 | self.P5_conv2 = nn.Conv2d(self.out_channels, self.out_channels, kernel_size=3, stride=1, padding=1) 172 | self.P4_conv1 = nn.Conv2d(1024, self.out_channels, kernel_size=1, stride=1) 173 | self.P4_conv2 = nn.Conv2d(self.out_channels, self.out_channels, kernel_size=3, stride=1, padding=1) 174 | 175 | self.P3_conv1 = nn.Conv2d(512, self.out_channels, kernel_size=1, stride=1) 176 | self.P3_conv2 = nn.Conv2d(self.out_channels, self.out_channels, kernel_size=3, stride=1, padding=1) 177 | self.P2_conv1 = nn.Conv2d(256, self.out_channels, kernel_size=1, stride=1) 178 | self.P2_conv2 = nn.Conv2d(self.out_channels, self.out_channels, kernel_size=3, stride=1, padding=1) 179 | 180 | self.init_weights() 181 | 182 | def init_weights(self): 183 | for m in self.modules(): 184 | if isinstance(m, nn.Conv2d): 185 | n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels 186 | m.weight.data.normal_(0, math.sqrt(2. / n)) 187 | if m.bias is not None: 188 | m.bias.data.zero_() 189 | elif isinstance(m, nn.BatchNorm2d): 190 | m.weight.data.fill_(1) 191 | m.bias.data.zero_() 192 | elif isinstance(m, nn.Linear): 193 | m.weight.data.normal_(0, 0.01) 194 | m.bias.data.zero_() 195 | 196 | def forward(self, C1, C2, C3, C4, C5): 197 | 198 | p5_out = self.P5_conv1(C5) 199 | p4_out = torch.add(self.P4_conv1(C4), F.upsample(p5_out, scale_factor=2)) 200 | p3_out = torch.add(self.P3_conv1(C3), F.upsample(p4_out, scale_factor=2)) 201 | p2_out = torch.add(self.P2_conv1(C2), F.upsample(p3_out, scale_factor=2)) 202 | 203 | p5_out = self.P5_conv2(p5_out) 204 | p4_out = self.P4_conv2(p4_out) 205 | p3_out = self.P3_conv2(p3_out) 206 | p2_out = self.P2_conv2(p2_out) 207 | 208 | # P6 is used for the 5th anchor scale in RPN. Generated by 209 | # subsampling from P5 with stride of 2. 210 | p6_out = self.P6(p5_out) 211 | 212 | return p2_out, p3_out, p4_out, p5_out, p6_out 213 | ############################################################ 214 | # Pose Estimation Graph 215 | ############################################################ 216 | 217 | class pose_estimation(nn.Module): 218 | def __init__(self, class_num, pretrain=True): 219 | super(pose_estimation, self).__init__() 220 | self.resnet = ResNet(Bottleneck, [3, 4, 6, 3]) # resnet50 221 | if pretrain == True: 222 | self.model_path = '/data/xiaobing.wang/.torch/models/resnet50-19c8e357.pth' 223 | self.resnet.load_weights(self.model_path) 224 | self.apply_fix() 225 | self.out_channels = 256 226 | self.P5_conv1 = nn.Conv2d(2048, self.out_channels, kernel_size=1, stride=1) 227 | self.P5_conv2 = nn.Conv2d(self.out_channels, self.out_channels, kernel_size=3, stride=1, padding=1) 228 | self.P4_conv1 = nn.Conv2d(1024, self.out_channels, kernel_size=1, stride=1) 229 | self.P4_conv2 = nn.Conv2d(self.out_channels, self.out_channels, kernel_size=3, stride=1, padding=1) 230 | 231 | self.P3_conv1 = nn.Conv2d(512, self.out_channels, kernel_size=1, stride=1) 232 | self.P3_conv2 = nn.Conv2d(self.out_channels, self.out_channels, kernel_size=3, stride=1, padding=1) 233 | self.P2_conv1 = nn.Conv2d(256, self.out_channels, kernel_size=1, stride=1) 234 | self.P2_conv2 = nn.Conv2d(self.out_channels, self.out_channels, kernel_size=3, stride=1, padding=1) 235 | 236 | 237 | self.block = nn.Sequential(nn.Conv2d(256, 128, 1, 1, 0),nn.ReLU(inplace=True), 238 | nn.Conv2d(128, 128, 3, 1, 1), nn.ReLU(inplace=True), 239 | nn.Conv2d(128, 128, 3, 1, 1), nn.ReLU(inplace=True), 240 | nn.Conv2d(128, 128, 3, 1, 1), nn.ReLU(inplace=True), 241 | nn.Conv2d(128, 512, 3, 1, 1), nn.ReLU(inplace=True) 242 | ) 243 | self._init_weights(self.block) 244 | self.predict = nn.Conv2d(512, 25, 1, 1, 0) 245 | 246 | def _init_weights(self, model): 247 | for m in model: 248 | if isinstance(m, nn.Conv2d): 249 | m.weight.data.normal_(0, 0.01) 250 | if m.bias is not None: 251 | m.bias.data.zero_() 252 | def apply_fix(self): 253 | # 1. fix bn 254 | # 2. fix conv1 conv2 255 | for param in self.resnet.conv1.parameters(): 256 | param.requires_grad = False 257 | for param in self.resnet.layer1.parameters(): 258 | param.requires_grad = False 259 | 260 | 261 | def forward(self, x): 262 | C1, C2, C3, C4, C5 = self.resnet(x) 263 | 264 | p5_out = self.P5_conv1(C5) 265 | p4_out = torch.add(self.P4_conv1(C4), F.upsample(p5_out, scale_factor=2)) 266 | p3_out = torch.add(self.P3_conv1(C3), F.upsample(p4_out, scale_factor=2)) 267 | p2_out = torch.add(self.P2_conv1(C2), F.upsample(p3_out, scale_factor=2)) 268 | 269 | p5_out = self.P5_conv2(p5_out) 270 | p4_out = self.P4_conv2(p4_out) 271 | p3_out = self.P3_conv2(p3_out) 272 | p2_out = self.P2_conv2(p2_out) 273 | 274 | 275 | out = self.block(p3_out) 276 | predict = self.predict(out) 277 | return predict -------------------------------------------------------------------------------- /models/bk/CPM_FPN3.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | import torch 6 | import torch.nn as nn 7 | import math 8 | import torch.nn.functional as F 9 | 10 | ############################################################ 11 | # ResNet 12 | ############################################################ 13 | def conv3x3(in_planes, out_planes, stride=1): 14 | "3x3 convolution with padding" 15 | return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, 16 | padding=1, bias=False) 17 | 18 | class BasicBlock(nn.Module): 19 | expansion = 1 20 | 21 | def __init__(self, inplanes, planes, stride=1, downsample=None): 22 | super(BasicBlock, self).__init__() 23 | self.conv1 = conv3x3(inplanes, planes, stride) 24 | self.bn1 = nn.BatchNorm2d(planes) 25 | self.relu = nn.ReLU(inplace=True) 26 | self.conv2 = conv3x3(planes, planes) 27 | self.bn2 = nn.BatchNorm2d(planes) 28 | self.downsample = downsample 29 | self.stride = stride 30 | 31 | def forward(self, x): 32 | residual = x 33 | 34 | out = self.conv1(x) 35 | out = self.bn1(out) 36 | out = self.relu(out) 37 | 38 | out = self.conv2(out) 39 | out = self.bn2(out) 40 | 41 | if self.downsample is not None: 42 | residual = self.downsample(x) 43 | 44 | out += residual 45 | out = self.relu(out) 46 | 47 | return out 48 | 49 | 50 | class Bottleneck(nn.Module): 51 | expansion = 4 52 | 53 | def __init__(self, inplanes, planes, stride=1, downsample=None): 54 | super(Bottleneck, self).__init__() 55 | self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False) 56 | self.bn1 = nn.BatchNorm2d(planes) 57 | self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, 58 | padding=1, bias=False) 59 | self.bn2 = nn.BatchNorm2d(planes) 60 | self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False) 61 | self.bn3 = nn.BatchNorm2d(planes * 4) 62 | self.relu = nn.ReLU(inplace=True) 63 | self.downsample = downsample 64 | self.stride = stride 65 | 66 | def forward(self, x): 67 | residual = x 68 | 69 | out = self.conv1(x) 70 | out = self.bn1(out) 71 | out = self.relu(out) 72 | 73 | out = self.conv2(out) 74 | out = self.bn2(out) 75 | out = self.relu(out) 76 | 77 | out = self.conv3(out) 78 | out = self.bn3(out) 79 | 80 | if self.downsample is not None: 81 | residual = self.downsample(x) 82 | 83 | out += residual 84 | out = self.relu(out) 85 | 86 | return out 87 | 88 | 89 | class ResNet(nn.Module): 90 | 91 | def __init__(self, block, layers, num_classes=1000): 92 | self.inplanes = 64 93 | super(ResNet, self).__init__() 94 | self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, 95 | bias=False) 96 | self.bn1 = nn.BatchNorm2d(64) 97 | self.relu = nn.ReLU(inplace=True) 98 | self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) 99 | self.layer1 = self._make_layer(block, 64, layers[0]) 100 | self.layer2 = self._make_layer(block, 128, layers[1], stride=2) 101 | self.layer3 = self._make_layer(block, 256, layers[2], stride=2) 102 | self.layer4 = self._make_layer(block, 512, layers[3], stride=2) 103 | 104 | for m in self.modules(): 105 | if isinstance(m, nn.Conv2d): 106 | n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels 107 | m.weight.data.normal_(0, math.sqrt(2. / n)) 108 | elif isinstance(m, nn.BatchNorm2d): 109 | m.weight.data.fill_(1) 110 | m.bias.data.zero_() 111 | 112 | def _make_layer(self, block, planes, blocks, stride=1): 113 | downsample = None 114 | if stride != 1 or self.inplanes != planes * block.expansion: 115 | downsample = nn.Sequential( 116 | nn.Conv2d(self.inplanes, planes * block.expansion, 117 | kernel_size=1, stride=stride, bias=False), 118 | nn.BatchNorm2d(planes * block.expansion), 119 | ) 120 | 121 | layers = [] 122 | layers.append(block(self.inplanes, planes, stride, downsample)) 123 | self.inplanes = planes * block.expansion 124 | for i in range(1, blocks): 125 | layers.append(block(self.inplanes, planes)) 126 | 127 | return nn.Sequential(*layers) 128 | 129 | def load_weights(self, path): 130 | model_dict = self.state_dict() 131 | print('loading model from {}'.format(path)) 132 | try: 133 | #state_dict = torch.load(self.path) 134 | # self.load_state_dict({k: v for k, v in state_dict.items() if k in self.state_dict()}) 135 | pretrained_dict = torch.load(path) 136 | from collections import OrderedDict 137 | tmp = OrderedDict() 138 | for k,v in pretrained_dict.items(): 139 | if k in model_dict: 140 | tmp[k] = v 141 | # pretrained_dict = {k: v for k, v in pretrained_dict.items() if k in model_dict} 142 | # model_dict.update(pretrained_dict) 143 | model_dict.update(tmp) 144 | self.load_state_dict(model_dict) 145 | except: 146 | print ('loading model failed, {} may not exist'.format(path)) 147 | 148 | def forward(self, x): 149 | x = self.conv1(x) 150 | x = self.bn1(x) 151 | x = self.relu(x) 152 | C1 = self.maxpool(x) 153 | 154 | C2 = self.layer1(C1) 155 | C3 = self.layer2(C2) 156 | C4 = self.layer3(C3) 157 | C5 = self.layer4(C4) 158 | 159 | return C1, C2, C3, C4, C5 160 | 161 | ############################################################ 162 | # FPN Graph 163 | ############################################################ 164 | 165 | class FPN(nn.Module): # xavier_fill as default 166 | def __init__(self, out_channels): 167 | super(FPN, self).__init__() 168 | self.out_channels = out_channels 169 | self.P6 = nn.MaxPool2d(kernel_size=1, stride=2, padding=0, ceil_mode=False) 170 | self.P5_conv1 = nn.Conv2d(2048, self.out_channels, kernel_size=1, stride=1) 171 | 172 | self.P4_conv1 = nn.Conv2d(1024, self.out_channels, kernel_size=1, stride=1) 173 | self.P4_conv2 = nn.Conv2d(self.out_channels, self.out_channels, kernel_size=3, stride=1, padding=1) 174 | 175 | self.P3_conv1 = nn.Conv2d(512, self.out_channels, kernel_size=1, stride=1) 176 | self.P3_conv2 = nn.Conv2d(self.out_channels, self.out_channels, kernel_size=3, stride=1, padding=1) 177 | self.P2_conv1 = nn.Conv2d(256, self.out_channels, kernel_size=1, stride=1) 178 | self.P2_conv2 = nn.Conv2d(self.out_channels, self.out_channels, kernel_size=3, stride=1, padding=1) 179 | 180 | def forward(self, C1, C2, C3 ,C4, C5): 181 | 182 | p5_out = self.P5_conv1(C5) 183 | 184 | p4_out = torch.add(self.P4_conv1(C4), F.upsample_nearest(p5_out, scale_factor=2)) 185 | p3_out = torch.add(self.P3_conv1(C3), F.upsample_nearest(p4_out, scale_factor=2)) 186 | p2_out = torch.add(self.P2_conv1(C2), F.upsample_nearest(p3_out, scale_factor=2)) 187 | 188 | p4_out = self.P4_conv2(p4_out) 189 | p3_out = self.P3_conv2(p3_out) 190 | p2_out = self.P2_conv2(p2_out) 191 | 192 | # P6 is used for the 5th anchor scale in RPN. Generated by 193 | # subsampling from P5 with stride of 2. 194 | p6_out = self.P6(p5_out) 195 | 196 | return p2_out, p3_out, p4_out, p5_out, p6_out 197 | 198 | 199 | ############################################################ 200 | # Pose Estimation Graph 201 | ############################################################ 202 | 203 | class pose_estimation(nn.Module): 204 | def __init__(self, class_num, pretrain=True): 205 | super(pose_estimation, self).__init__() 206 | self.resnet = ResNet(Bottleneck, [3, 4, 6, 3]) # resnet50 207 | if pretrain == True: 208 | self.model_path = '/data/xiaobing.wang/.torch/models/resnet50-19c8e357.pth' 209 | self.resnet.load_weights(self.model_path) 210 | self.apply_fix() 211 | self.out_channels = 256 212 | self.fpn = FPN(self.out_channels) 213 | 214 | # self.block = nn.Sequential(nn.Conv2d(256, 128, 1, 1, 0), nn.ReLU(inplace=True), 215 | # nn.Conv2d(128, 128, 3, 1, 1), nn.ReLU(inplace=True), 216 | # nn.Conv2d(128, 128, 3, 1, 1), nn.ReLU(inplace=True), 217 | # nn.Conv2d(128, 128, 3, 1, 1), nn.ReLU(inplace=True), 218 | # nn.Conv2d(128, 256, 3, 1, 1), nn.ReLU(inplace=True) 219 | # ) 220 | # self._init_weights(self.block) 221 | 222 | self.predict = nn.Sequential(nn.Conv2d(768, 128, 3, 1, 1), nn.ReLU(inplace=True), 223 | nn.Conv2d(128, 128, 3, 1, 1), nn.ReLU(inplace=True), 224 | nn.Conv2d(128, 128, 3, 1, 1), nn.ReLU(inplace=True), 225 | nn.Conv2d(128, 128, 3, 1, 1), nn.ReLU(inplace=True), 226 | nn.Conv2d(128, 25, 3, 1, 1)) 227 | self._init_weights(self.predict) 228 | 229 | 230 | 231 | def _gaussian_init_conv(self, conv): 232 | if isinstance(conv, nn.Conv2d): 233 | conv.weight.data.normal_(0, 0.01) 234 | if conv.bias is not None: 235 | conv.bias.data.zero_() 236 | 237 | def _init_weights(self, model): 238 | for m in model: 239 | if isinstance(m, nn.Conv2d): 240 | m.weight.data.normal_(0, 0.01) 241 | if m.bias is not None: 242 | m.bias.data.zero_() 243 | 244 | 245 | def apply_fix(self): 246 | # 1. fix bn 247 | # 2. fix conv1 conv2 248 | for param in self.resnet.conv1.parameters(): 249 | param.requires_grad = False 250 | for param in self.resnet.layer1.parameters(): 251 | param.requires_grad = False 252 | 253 | def forward(self, x): 254 | C1, C2, C3, C4, C5 = self.resnet(x) 255 | P2, P3, P4, P5, P6 = self.fpn(C1, C2, C3, C4, C5) 256 | 257 | P4_x2 = F.upsample(P4, scale_factor=2) 258 | P5_x4 = F.upsample(P5, scale_factor=4) 259 | featuer_cat = torch.cat([P3, P4_x2, P5_x4],1) 260 | out = self.predict(featuer_cat) 261 | return out 262 | -------------------------------------------------------------------------------- /models/bk/CPM_FPN4.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | import torch 6 | import torch.nn as nn 7 | import math 8 | import torch.nn.functional as F 9 | 10 | ############################################################ 11 | # ResNet 12 | ############################################################ 13 | def conv3x3(in_planes, out_planes, stride=1): 14 | "3x3 convolution with padding" 15 | return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, 16 | padding=1, bias=False) 17 | 18 | class BasicBlock(nn.Module): 19 | expansion = 1 20 | 21 | def __init__(self, inplanes, planes, stride=1, downsample=None): 22 | super(BasicBlock, self).__init__() 23 | self.conv1 = conv3x3(inplanes, planes, stride) 24 | self.bn1 = nn.BatchNorm2d(planes) 25 | self.relu = nn.ReLU(inplace=True) 26 | self.conv2 = conv3x3(planes, planes) 27 | self.bn2 = nn.BatchNorm2d(planes) 28 | self.downsample = downsample 29 | self.stride = stride 30 | 31 | def forward(self, x): 32 | residual = x 33 | 34 | out = self.conv1(x) 35 | out = self.bn1(out) 36 | out = self.relu(out) 37 | 38 | out = self.conv2(out) 39 | out = self.bn2(out) 40 | 41 | if self.downsample is not None: 42 | residual = self.downsample(x) 43 | 44 | out += residual 45 | out = self.relu(out) 46 | 47 | return out 48 | 49 | 50 | class Bottleneck(nn.Module): 51 | expansion = 4 52 | 53 | def __init__(self, inplanes, planes, stride=1, downsample=None): 54 | super(Bottleneck, self).__init__() 55 | self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False) 56 | self.bn1 = nn.BatchNorm2d(planes) 57 | self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, 58 | padding=1, bias=False) 59 | self.bn2 = nn.BatchNorm2d(planes) 60 | self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False) 61 | self.bn3 = nn.BatchNorm2d(planes * 4) 62 | self.relu = nn.ReLU(inplace=True) 63 | self.downsample = downsample 64 | self.stride = stride 65 | 66 | def forward(self, x): 67 | residual = x 68 | 69 | out = self.conv1(x) 70 | out = self.bn1(out) 71 | out = self.relu(out) 72 | 73 | out = self.conv2(out) 74 | out = self.bn2(out) 75 | out = self.relu(out) 76 | 77 | out = self.conv3(out) 78 | out = self.bn3(out) 79 | 80 | if self.downsample is not None: 81 | residual = self.downsample(x) 82 | 83 | out += residual 84 | out = self.relu(out) 85 | 86 | return out 87 | 88 | 89 | class ResNet(nn.Module): 90 | 91 | def __init__(self, block, layers, num_classes=1000): 92 | self.inplanes = 64 93 | super(ResNet, self).__init__() 94 | self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, 95 | bias=False) 96 | self.bn1 = nn.BatchNorm2d(64) 97 | self.relu = nn.ReLU(inplace=True) 98 | self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) 99 | self.layer1 = self._make_layer(block, 64, layers[0]) 100 | self.layer2 = self._make_layer(block, 128, layers[1], stride=2) 101 | self.layer3 = self._make_layer(block, 256, layers[2], stride=2) 102 | self.layer4 = self._make_layer(block, 512, layers[3], stride=2) 103 | 104 | for m in self.modules(): 105 | if isinstance(m, nn.Conv2d): 106 | n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels 107 | m.weight.data.normal_(0, math.sqrt(2. / n)) 108 | elif isinstance(m, nn.BatchNorm2d): 109 | m.weight.data.fill_(1) 110 | m.bias.data.zero_() 111 | 112 | def _make_layer(self, block, planes, blocks, stride=1): 113 | downsample = None 114 | if stride != 1 or self.inplanes != planes * block.expansion: 115 | downsample = nn.Sequential( 116 | nn.Conv2d(self.inplanes, planes * block.expansion, 117 | kernel_size=1, stride=stride, bias=False), 118 | nn.BatchNorm2d(planes * block.expansion), 119 | ) 120 | 121 | layers = [] 122 | layers.append(block(self.inplanes, planes, stride, downsample)) 123 | self.inplanes = planes * block.expansion 124 | for i in range(1, blocks): 125 | layers.append(block(self.inplanes, planes)) 126 | 127 | return nn.Sequential(*layers) 128 | 129 | def load_weights(self, path): 130 | model_dict = self.state_dict() 131 | print('loading model from {}'.format(path)) 132 | try: 133 | #state_dict = torch.load(self.path) 134 | # self.load_state_dict({k: v for k, v in state_dict.items() if k in self.state_dict()}) 135 | pretrained_dict = torch.load(path) 136 | from collections import OrderedDict 137 | tmp = OrderedDict() 138 | for k,v in pretrained_dict.items(): 139 | if k in model_dict: 140 | tmp[k] = v 141 | # pretrained_dict = {k: v for k, v in pretrained_dict.items() if k in model_dict} 142 | # model_dict.update(pretrained_dict) 143 | model_dict.update(tmp) 144 | self.load_state_dict(model_dict) 145 | except: 146 | print ('loading model failed, {} may not exist'.format(path)) 147 | 148 | def forward(self, x): 149 | x = self.conv1(x) 150 | x = self.bn1(x) 151 | x = self.relu(x) 152 | C1 = self.maxpool(x) 153 | 154 | C2 = self.layer1(C1) 155 | C3 = self.layer2(C2) 156 | C4 = self.layer3(C3) 157 | C5 = self.layer4(C4) 158 | 159 | return C1, C2, C3, C4, C5 160 | 161 | ############################################################ 162 | # FPN Graph 163 | ############################################################ 164 | 165 | class FPN(nn.Module): # xavier_fill as default 166 | def __init__(self, out_channels): 167 | super(FPN, self).__init__() 168 | self.out_channels = out_channels 169 | self.P6 = nn.MaxPool2d(kernel_size=1, stride=2, padding=0, ceil_mode=False) 170 | self.P5_conv1 = nn.Conv2d(2048, self.out_channels, kernel_size=1, stride=1) 171 | 172 | self.P4_conv1 = nn.Conv2d(1024, self.out_channels, kernel_size=1, stride=1) 173 | self.P4_conv2 = nn.Conv2d(self.out_channels, self.out_channels, kernel_size=3, stride=1, padding=1) 174 | 175 | self.P3_conv1 = nn.Conv2d(512, self.out_channels, kernel_size=1, stride=1) 176 | self.P3_conv2 = nn.Conv2d(self.out_channels, self.out_channels, kernel_size=3, stride=1, padding=1) 177 | self.P2_conv1 = nn.Conv2d(256, self.out_channels, kernel_size=1, stride=1) 178 | self.P2_conv2 = nn.Conv2d(self.out_channels, self.out_channels, kernel_size=3, stride=1, padding=1) 179 | 180 | def forward(self, C1, C2, C3 ,C4, C5): 181 | 182 | p5_out = self.P5_conv1(C5) 183 | 184 | p4_out = torch.add(self.P4_conv1(C4), F.upsample_nearest(p5_out, scale_factor=2)) 185 | p3_out = torch.add(self.P3_conv1(C3), F.upsample_nearest(p4_out, scale_factor=2)) 186 | p2_out = torch.add(self.P2_conv1(C2), F.upsample_nearest(p3_out, scale_factor=2)) 187 | 188 | p4_out = self.P4_conv2(p4_out) 189 | p3_out = self.P3_conv2(p3_out) 190 | p2_out = self.P2_conv2(p2_out) 191 | 192 | # P6 is used for the 5th anchor scale in RPN. Generated by 193 | # subsampling from P5 with stride of 2. 194 | p6_out = self.P6(p5_out) 195 | 196 | return p2_out, p3_out, p4_out, p5_out, p6_out 197 | 198 | 199 | ############################################################ 200 | # Pose Estimation Graph 201 | ############################################################ 202 | 203 | class pose_estimation(nn.Module): 204 | def __init__(self, class_num, pretrain=True): 205 | super(pose_estimation, self).__init__() 206 | # self.resnet = ResNet(Bottleneck, [3, 4, 6, 3]) # resnet50 207 | self.resnet = ResNet(Bottleneck, [3, 4, 23, 3]) #resnet101 208 | if pretrain == True: 209 | self.model_path = '/data/xiaobing.wang/.torch/models/resnet101-5d3b4d8f.pth' 210 | self.resnet.load_weights(self.model_path) 211 | self.apply_fix() 212 | self.out_channels = 256 213 | self.fpn = FPN(self.out_channels) 214 | 215 | # self.block = nn.Sequential(nn.Conv2d(256, 128, 1, 1, 0), nn.ReLU(inplace=True), 216 | # nn.Conv2d(128, 128, 3, 1, 1), nn.ReLU(inplace=True), 217 | # nn.Conv2d(128, 128, 3, 1, 1), nn.ReLU(inplace=True), 218 | # nn.Conv2d(128, 128, 3, 1, 1), nn.ReLU(inplace=True), 219 | # nn.Conv2d(128, 256, 3, 1, 1), nn.ReLU(inplace=True) 220 | # ) 221 | # self._init_weights(self.block) 222 | 223 | self.predict = nn.Sequential(nn.Conv2d(768, 128, 3, 1, 1), nn.ReLU(inplace=True), 224 | nn.Conv2d(128, 128, 3, 1, 1), nn.ReLU(inplace=True), 225 | nn.Conv2d(128, 128, 3, 1, 1), nn.ReLU(inplace=True), 226 | nn.Conv2d(128, 128, 3, 1, 1), nn.ReLU(inplace=True), 227 | nn.Conv2d(128, 25, 3, 1, 1)) 228 | self._init_weights(self.predict) 229 | 230 | 231 | 232 | def _gaussian_init_conv(self, conv): 233 | if isinstance(conv, nn.Conv2d): 234 | conv.weight.data.normal_(0, 0.01) 235 | if conv.bias is not None: 236 | conv.bias.data.zero_() 237 | 238 | def _init_weights(self, model): 239 | for m in model: 240 | if isinstance(m, nn.Conv2d): 241 | m.weight.data.normal_(0, 0.01) 242 | if m.bias is not None: 243 | m.bias.data.zero_() 244 | 245 | 246 | def apply_fix(self): 247 | # 1. fix bn 248 | # 2. fix conv1 conv2 249 | for param in self.resnet.conv1.parameters(): 250 | param.requires_grad = False 251 | for param in self.resnet.layer1.parameters(): 252 | param.requires_grad = False 253 | 254 | def forward(self, x): 255 | C1, C2, C3, C4, C5 = self.resnet(x) 256 | P2, P3, P4, P5, P6 = self.fpn(C1, C2, C3, C4, C5) 257 | 258 | P4_x2 = F.upsample(P4, scale_factor=2) 259 | P5_x4 = F.upsample(P5, scale_factor=4) 260 | featuer_cat = torch.cat([P3, P4_x2, P5_x4],1) 261 | out = self.predict(featuer_cat) 262 | return out 263 | -------------------------------------------------------------------------------- /models/bk/CPM_ResNet.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | import torch 6 | import torch.nn as nn 7 | import math 8 | import torch.nn.functional as F 9 | 10 | ############################################################ 11 | # ResNet 12 | ############################################################ 13 | def conv3x3(in_planes, out_planes, stride=1): 14 | "3x3 convolution with padding" 15 | return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, 16 | padding=1, bias=False) 17 | 18 | class BasicBlock(nn.Module): 19 | expansion = 1 20 | 21 | def __init__(self, inplanes, planes, stride=1, downsample=None): 22 | super(BasicBlock, self).__init__() 23 | self.conv1 = conv3x3(inplanes, planes, stride) 24 | self.bn1 = nn.BatchNorm2d(planes) 25 | self.relu = nn.ReLU(inplace=True) 26 | self.conv2 = conv3x3(planes, planes) 27 | self.bn2 = nn.BatchNorm2d(planes) 28 | self.downsample = downsample 29 | self.stride = stride 30 | 31 | def forward(self, x): 32 | residual = x 33 | 34 | out = self.conv1(x) 35 | out = self.bn1(out) 36 | out = self.relu(out) 37 | 38 | out = self.conv2(out) 39 | out = self.bn2(out) 40 | 41 | if self.downsample is not None: 42 | residual = self.downsample(x) 43 | 44 | out += residual 45 | out = self.relu(out) 46 | 47 | return out 48 | 49 | 50 | class Bottleneck(nn.Module): 51 | expansion = 4 52 | 53 | def __init__(self, inplanes, planes, stride=1, downsample=None): 54 | super(Bottleneck, self).__init__() 55 | self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False) 56 | self.bn1 = nn.BatchNorm2d(planes) 57 | self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, 58 | padding=1, bias=False) 59 | self.bn2 = nn.BatchNorm2d(planes) 60 | self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False) 61 | self.bn3 = nn.BatchNorm2d(planes * 4) 62 | self.relu = nn.ReLU(inplace=True) 63 | self.downsample = downsample 64 | self.stride = stride 65 | 66 | def forward(self, x): 67 | residual = x 68 | 69 | out = self.conv1(x) 70 | out = self.bn1(out) 71 | out = self.relu(out) 72 | 73 | out = self.conv2(out) 74 | out = self.bn2(out) 75 | out = self.relu(out) 76 | 77 | out = self.conv3(out) 78 | out = self.bn3(out) 79 | 80 | if self.downsample is not None: 81 | residual = self.downsample(x) 82 | 83 | out += residual 84 | out = self.relu(out) 85 | 86 | return out 87 | 88 | 89 | class ResNet(nn.Module): 90 | 91 | def __init__(self, block, layers, num_classes=1000): 92 | self.inplanes = 64 93 | super(ResNet, self).__init__() 94 | self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, 95 | bias=False) 96 | self.bn1 = nn.BatchNorm2d(64) 97 | self.relu = nn.ReLU(inplace=True) 98 | self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) 99 | self.layer1 = self._make_layer(block, 64, layers[0]) 100 | self.layer2 = self._make_layer(block, 128, layers[1], stride=2) 101 | self.layer3 = self._make_layer(block, 256, layers[2], stride=2) 102 | self.layer4 = self._make_layer(block, 512, layers[3], stride=2) 103 | 104 | for m in self.modules(): 105 | if isinstance(m, nn.Conv2d): 106 | n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels 107 | m.weight.data.normal_(0, math.sqrt(2. / n)) 108 | elif isinstance(m, nn.BatchNorm2d): 109 | m.weight.data.fill_(1) 110 | m.bias.data.zero_() 111 | 112 | def _make_layer(self, block, planes, blocks, stride=1): 113 | downsample = None 114 | if stride != 1 or self.inplanes != planes * block.expansion: 115 | downsample = nn.Sequential( 116 | nn.Conv2d(self.inplanes, planes * block.expansion, 117 | kernel_size=1, stride=stride, bias=False), 118 | nn.BatchNorm2d(planes * block.expansion), 119 | ) 120 | 121 | layers = [] 122 | layers.append(block(self.inplanes, planes, stride, downsample)) 123 | self.inplanes = planes * block.expansion 124 | for i in range(1, blocks): 125 | layers.append(block(self.inplanes, planes)) 126 | 127 | return nn.Sequential(*layers) 128 | 129 | def load_weights(self, path): 130 | model_dict = self.state_dict() 131 | print('loading model from {}'.format(path)) 132 | try: 133 | #state_dict = torch.load(self.path) 134 | # self.load_state_dict({k: v for k, v in state_dict.items() if k in self.state_dict()}) 135 | pretrained_dict = torch.load(path) 136 | from collections import OrderedDict 137 | tmp = OrderedDict() 138 | for k,v in pretrained_dict.items(): 139 | if k in model_dict: 140 | tmp[k] = v 141 | # pretrained_dict = {k: v for k, v in pretrained_dict.items() if k in model_dict} 142 | # model_dict.update(pretrained_dict) 143 | model_dict.update(tmp) 144 | self.load_state_dict(model_dict) 145 | except: 146 | print ('loading model failed, {} may not exist'.format(path)) 147 | 148 | def forward(self, x): 149 | x = self.conv1(x) 150 | x = self.bn1(x) 151 | x = self.relu(x) 152 | C1 = self.maxpool(x) 153 | 154 | C2 = self.layer1(C1) 155 | C3 = self.layer2(C2) 156 | C4 = self.layer3(C3) 157 | C5 = self.layer4(C4) 158 | 159 | return C1, C2, C3, C4, C5 160 | 161 | ############################################################ 162 | # Pose Estimation Graph 163 | ############################################################ 164 | 165 | class pose_estimation(nn.Module): 166 | def __init__(self, class_num, pretrain=True): 167 | super(pose_estimation, self).__init__() 168 | self.resnet = ResNet(Bottleneck, [3, 4, 6, 3]) # resnet50 169 | if pretrain == True: 170 | self.model_path = '/data/xiaobing.wang/.torch/models/resnet50-19c8e357.pth' 171 | self.resnet.load_weights(self.model_path) 172 | self.apply_fix() 173 | self.block = nn.Sequential(nn.Conv2d(512, 128, 1, 1, 0),nn.ReLU(inplace=True), 174 | nn.Conv2d(128, 128, 3, 1, 1), nn.ReLU(inplace=True), 175 | nn.Conv2d(128, 128, 3, 1, 1), nn.ReLU(inplace=True), 176 | nn.Conv2d(128, 128, 3, 1, 1), nn.ReLU(inplace=True), 177 | nn.Conv2d(128, 512, 3, 1, 1), nn.ReLU(inplace=True) 178 | ) 179 | self._init_weights(self.block) 180 | self.predict = nn.Conv2d(512, 25, 1, 1, 0) 181 | 182 | def _init_weights(self, model): 183 | for m in model: 184 | if isinstance(m, nn.Conv2d): 185 | m.weight.data.normal_(0, 0.01) 186 | if m.bias is not None: 187 | m.bias.data.zero_() 188 | def apply_fix(self): 189 | # 1. fix bn 190 | # 2. fix conv1 conv2 191 | for param in self.resnet.conv1.parameters(): 192 | param.requires_grad = False 193 | for param in self.resnet.layer1.parameters(): 194 | param.requires_grad = False 195 | 196 | 197 | def forward(self, x): 198 | C1, C2, C3, C4, C5 = self.resnet(x) 199 | 200 | out = self.block(C3) 201 | predict = self.predict(out) 202 | return predict -------------------------------------------------------------------------------- /models/bk/CPM_ResNet2.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | import torch 6 | import torch.nn as nn 7 | import math 8 | import torch.nn.functional as F 9 | 10 | ############################################################ 11 | # ResNet 12 | ############################################################ 13 | def conv3x3(in_planes, out_planes, stride=1): 14 | "3x3 convolution with padding" 15 | return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, 16 | padding=1, bias=False) 17 | 18 | class BasicBlock(nn.Module): 19 | expansion = 1 20 | 21 | def __init__(self, inplanes, planes, stride=1, downsample=None): 22 | super(BasicBlock, self).__init__() 23 | self.conv1 = conv3x3(inplanes, planes, stride) 24 | self.bn1 = nn.BatchNorm2d(planes) 25 | self.relu = nn.ReLU(inplace=True) 26 | self.conv2 = conv3x3(planes, planes) 27 | self.bn2 = nn.BatchNorm2d(planes) 28 | self.downsample = downsample 29 | self.stride = stride 30 | 31 | def forward(self, x): 32 | residual = x 33 | 34 | out = self.conv1(x) 35 | out = self.bn1(out) 36 | out = self.relu(out) 37 | 38 | out = self.conv2(out) 39 | out = self.bn2(out) 40 | 41 | if self.downsample is not None: 42 | residual = self.downsample(x) 43 | 44 | out += residual 45 | out = self.relu(out) 46 | 47 | return out 48 | 49 | 50 | class Bottleneck(nn.Module): 51 | expansion = 4 52 | 53 | def __init__(self, inplanes, planes, stride=1, downsample=None): 54 | super(Bottleneck, self).__init__() 55 | self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False) 56 | self.bn1 = nn.BatchNorm2d(planes) 57 | self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, 58 | padding=1, bias=False) 59 | self.bn2 = nn.BatchNorm2d(planes) 60 | self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False) 61 | self.bn3 = nn.BatchNorm2d(planes * 4) 62 | self.relu = nn.ReLU(inplace=True) 63 | self.downsample = downsample 64 | self.stride = stride 65 | 66 | def forward(self, x): 67 | residual = x 68 | 69 | out = self.conv1(x) 70 | out = self.bn1(out) 71 | out = self.relu(out) 72 | 73 | out = self.conv2(out) 74 | out = self.bn2(out) 75 | out = self.relu(out) 76 | 77 | out = self.conv3(out) 78 | out = self.bn3(out) 79 | 80 | if self.downsample is not None: 81 | residual = self.downsample(x) 82 | 83 | out += residual 84 | out = self.relu(out) 85 | 86 | return out 87 | 88 | 89 | class ResNet(nn.Module): 90 | 91 | def __init__(self, block, layers, num_classes=1000): 92 | self.inplanes = 64 93 | super(ResNet, self).__init__() 94 | self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, 95 | bias=False) 96 | self.bn1 = nn.BatchNorm2d(64) 97 | self.relu = nn.ReLU(inplace=True) 98 | self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) 99 | self.layer1 = self._make_layer(block, 64, layers[0]) 100 | self.layer2 = self._make_layer(block, 128, layers[1], stride=2) 101 | self.layer3 = self._make_layer(block, 256, layers[2], stride=2) 102 | self.layer4 = self._make_layer(block, 512, layers[3], stride=2) 103 | 104 | for m in self.modules(): 105 | if isinstance(m, nn.Conv2d): 106 | n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels 107 | m.weight.data.normal_(0, math.sqrt(2. / n)) 108 | elif isinstance(m, nn.BatchNorm2d): 109 | m.weight.data.fill_(1) 110 | m.bias.data.zero_() 111 | 112 | def _make_layer(self, block, planes, blocks, stride=1): 113 | downsample = None 114 | if stride != 1 or self.inplanes != planes * block.expansion: 115 | downsample = nn.Sequential( 116 | nn.Conv2d(self.inplanes, planes * block.expansion, 117 | kernel_size=1, stride=stride, bias=False), 118 | nn.BatchNorm2d(planes * block.expansion), 119 | ) 120 | 121 | layers = [] 122 | layers.append(block(self.inplanes, planes, stride, downsample)) 123 | self.inplanes = planes * block.expansion 124 | for i in range(1, blocks): 125 | layers.append(block(self.inplanes, planes)) 126 | 127 | return nn.Sequential(*layers) 128 | 129 | def load_weights(self, path): 130 | model_dict = self.state_dict() 131 | print('loading model from {}'.format(path)) 132 | try: 133 | #state_dict = torch.load(self.path) 134 | # self.load_state_dict({k: v for k, v in state_dict.items() if k in self.state_dict()}) 135 | pretrained_dict = torch.load(path) 136 | from collections import OrderedDict 137 | tmp = OrderedDict() 138 | for k,v in pretrained_dict.items(): 139 | if k in model_dict: 140 | tmp[k] = v 141 | # pretrained_dict = {k: v for k, v in pretrained_dict.items() if k in model_dict} 142 | # model_dict.update(pretrained_dict) 143 | model_dict.update(tmp) 144 | self.load_state_dict(model_dict) 145 | except: 146 | print ('loading model failed, {} may not exist'.format(path)) 147 | 148 | def forward(self, x): 149 | x = self.conv1(x) 150 | x = self.bn1(x) 151 | x = self.relu(x) 152 | C1 = self.maxpool(x) 153 | 154 | C2 = self.layer1(C1) 155 | C3 = self.layer2(C2) 156 | C4 = self.layer3(C3) 157 | C5 = self.layer4(C4) 158 | 159 | return C1, C2, C3, C4, C5 160 | 161 | ############################################################ 162 | # Pose Estimation Graph 163 | ############################################################ 164 | 165 | class pose_estimation(nn.Module): 166 | def __init__(self, class_num, pretrain=True): 167 | super(pose_estimation, self).__init__() 168 | self.resnet = ResNet(Bottleneck, [3, 4, 6, 3]) # resnet50 169 | if pretrain == True: 170 | self.model_path = '/data/xiaobing.wang/.torch/models/resnet50-19c8e357.pth' 171 | self.resnet.load_weights(self.model_path) 172 | self.apply_fix() 173 | self.block = nn.Sequential(nn.Conv2d(512, 128, 1, 1, 0),nn.ReLU(inplace=True), 174 | ) 175 | self._init_weights(self.block) 176 | self.predict = nn.Conv2d(128, 25, 1, 1, 0) 177 | 178 | def _init_weights(self, model): 179 | for m in model: 180 | if isinstance(m, nn.Conv2d): 181 | m.weight.data.normal_(0, 0.01) 182 | if m.bias is not None: 183 | m.bias.data.zero_() 184 | def apply_fix(self): 185 | # 1. fix bn 186 | # 2. fix conv1 conv2 187 | for param in self.resnet.conv1.parameters(): 188 | param.requires_grad = False 189 | for param in self.resnet.layer1.parameters(): 190 | param.requires_grad = False 191 | 192 | 193 | def forward(self, x): 194 | C1, C2, C3, C4, C5 = self.resnet(x) 195 | 196 | out = self.block(C3) 197 | predict = self.predict(out) 198 | return predict -------------------------------------------------------------------------------- /models/bk/CPM_ResNet3.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | import torch 6 | import torch.nn as nn 7 | import math 8 | import torch.nn.functional as F 9 | 10 | ############################################################ 11 | # ResNet 12 | ############################################################ 13 | def conv3x3(in_planes, out_planes, stride=1): 14 | "3x3 convolution with padding" 15 | return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, 16 | padding=1, bias=False) 17 | 18 | class BasicBlock(nn.Module): 19 | expansion = 1 20 | 21 | def __init__(self, inplanes, planes, stride=1, downsample=None): 22 | super(BasicBlock, self).__init__() 23 | self.conv1 = conv3x3(inplanes, planes, stride) 24 | self.bn1 = nn.BatchNorm2d(planes) 25 | self.relu = nn.ReLU(inplace=True) 26 | self.conv2 = conv3x3(planes, planes) 27 | self.bn2 = nn.BatchNorm2d(planes) 28 | self.downsample = downsample 29 | self.stride = stride 30 | 31 | def forward(self, x): 32 | residual = x 33 | 34 | out = self.conv1(x) 35 | out = self.bn1(out) 36 | out = self.relu(out) 37 | 38 | out = self.conv2(out) 39 | out = self.bn2(out) 40 | 41 | if self.downsample is not None: 42 | residual = self.downsample(x) 43 | 44 | out += residual 45 | out = self.relu(out) 46 | 47 | return out 48 | 49 | 50 | class Bottleneck(nn.Module): 51 | expansion = 4 52 | 53 | def __init__(self, inplanes, planes, stride=1, downsample=None): 54 | super(Bottleneck, self).__init__() 55 | self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False) 56 | self.bn1 = nn.BatchNorm2d(planes) 57 | self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, 58 | padding=1, bias=False) 59 | self.bn2 = nn.BatchNorm2d(planes) 60 | self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False) 61 | self.bn3 = nn.BatchNorm2d(planes * 4) 62 | self.relu = nn.ReLU(inplace=True) 63 | self.downsample = downsample 64 | self.stride = stride 65 | 66 | def forward(self, x): 67 | residual = x 68 | 69 | out = self.conv1(x) 70 | out = self.bn1(out) 71 | out = self.relu(out) 72 | 73 | out = self.conv2(out) 74 | out = self.bn2(out) 75 | out = self.relu(out) 76 | 77 | out = self.conv3(out) 78 | out = self.bn3(out) 79 | 80 | if self.downsample is not None: 81 | residual = self.downsample(x) 82 | 83 | out += residual 84 | out = self.relu(out) 85 | 86 | return out 87 | 88 | 89 | class ResNet(nn.Module): 90 | 91 | def __init__(self, block, layers, num_classes=1000): 92 | self.inplanes = 64 93 | super(ResNet, self).__init__() 94 | self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, 95 | bias=False) 96 | self.bn1 = nn.BatchNorm2d(64) 97 | self.relu = nn.ReLU(inplace=True) 98 | self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) 99 | self.layer1 = self._make_layer(block, 64, layers[0]) 100 | self.layer2 = self._make_layer(block, 128, layers[1], stride=2) 101 | self.layer3 = self._make_layer(block, 256, layers[2], stride=2) 102 | self.layer4 = self._make_layer(block, 512, layers[3], stride=2) 103 | 104 | for m in self.modules(): 105 | if isinstance(m, nn.Conv2d): 106 | n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels 107 | m.weight.data.normal_(0, math.sqrt(2. / n)) 108 | elif isinstance(m, nn.BatchNorm2d): 109 | m.weight.data.fill_(1) 110 | m.bias.data.zero_() 111 | 112 | def _make_layer(self, block, planes, blocks, stride=1): 113 | downsample = None 114 | if stride != 1 or self.inplanes != planes * block.expansion: 115 | downsample = nn.Sequential( 116 | nn.Conv2d(self.inplanes, planes * block.expansion, 117 | kernel_size=1, stride=stride, bias=False), 118 | nn.BatchNorm2d(planes * block.expansion), 119 | ) 120 | 121 | layers = [] 122 | layers.append(block(self.inplanes, planes, stride, downsample)) 123 | self.inplanes = planes * block.expansion 124 | for i in range(1, blocks): 125 | layers.append(block(self.inplanes, planes)) 126 | 127 | return nn.Sequential(*layers) 128 | 129 | def load_weights(self, path): 130 | model_dict = self.state_dict() 131 | print('loading model from {}'.format(path)) 132 | try: 133 | #state_dict = torch.load(self.path) 134 | # self.load_state_dict({k: v for k, v in state_dict.items() if k in self.state_dict()}) 135 | pretrained_dict = torch.load(path) 136 | from collections import OrderedDict 137 | tmp = OrderedDict() 138 | for k,v in pretrained_dict.items(): 139 | if k in model_dict: 140 | tmp[k] = v 141 | # pretrained_dict = {k: v for k, v in pretrained_dict.items() if k in model_dict} 142 | # model_dict.update(pretrained_dict) 143 | model_dict.update(tmp) 144 | self.load_state_dict(model_dict) 145 | except: 146 | print ('loading model failed, {} may not exist'.format(path)) 147 | 148 | def forward(self, x): 149 | x = self.conv1(x) 150 | x = self.bn1(x) 151 | x = self.relu(x) 152 | C1 = self.maxpool(x) 153 | 154 | C2 = self.layer1(C1) 155 | C3 = self.layer2(C2) 156 | C4 = self.layer3(C3) 157 | C5 = self.layer4(C4) 158 | 159 | return C1, C2, C3, C4, C5 160 | 161 | ############################################################ 162 | # Pose Estimation Graph 163 | ############################################################ 164 | 165 | class pose_estimation(nn.Module): 166 | def __init__(self, class_num, pretrain=True): 167 | super(pose_estimation, self).__init__() 168 | self.resnet = ResNet(Bottleneck, [3, 4, 6, 3]) # resnet50 169 | if pretrain == True: 170 | self.model_path = '/data/xiaobing.wang/.torch/models/resnet50-19c8e357.pth' 171 | self.resnet.load_weights(self.model_path) 172 | self.apply_fix() 173 | self.reduce = nn.Sequential(nn.Conv2d(1024, 256, 1, 1, 0)) 174 | 175 | self.block = nn.Sequential(nn.Conv2d(256, 128, 1, 1, 0),nn.ReLU(inplace=True), 176 | nn.Conv2d(128, 128, 3, 1, 1), nn.ReLU(inplace=True), 177 | nn.Conv2d(128, 128, 3, 1, 1), nn.ReLU(inplace=True), 178 | nn.Conv2d(128, 128, 3, 1, 1), nn.ReLU(inplace=True), 179 | nn.Conv2d(128, 512, 3, 1, 1), nn.ReLU(inplace=True) 180 | ) 181 | self._init_weights(self.block) 182 | self.predict = nn.Conv2d(512, 25, 1, 1, 0) 183 | 184 | def _init_weights(self, model): 185 | for m in model: 186 | if isinstance(m, nn.Conv2d): 187 | m.weight.data.normal_(0, 0.01) 188 | if m.bias is not None: 189 | m.bias.data.zero_() 190 | def apply_fix(self): 191 | # 1. fix bn 192 | # 2. fix conv1 conv2 193 | for param in self.resnet.conv1.parameters(): 194 | param.requires_grad = False 195 | for param in self.resnet.layer1.parameters(): 196 | param.requires_grad = False 197 | 198 | 199 | def forward(self, x): 200 | C1, C2, C3, C4, C5 = self.resnet(x) 201 | C4 = self.reduce(C4) 202 | C4 = F.upsample(C4, scale_factor=2) 203 | 204 | out = self.block(C4) 205 | predict = self.predict(out) 206 | return predict -------------------------------------------------------------------------------- /models/bk/CPM_ResNet4.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | import torch 6 | import torch.nn as nn 7 | import math 8 | import torch.nn.functional as F 9 | 10 | ############################################################ 11 | # ResNet 12 | ############################################################ 13 | def conv3x3(in_planes, out_planes, stride=1): 14 | "3x3 convolution with padding" 15 | return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, 16 | padding=1, bias=False) 17 | 18 | class BasicBlock(nn.Module): 19 | expansion = 1 20 | 21 | def __init__(self, inplanes, planes, stride=1, downsample=None): 22 | super(BasicBlock, self).__init__() 23 | self.conv1 = conv3x3(inplanes, planes, stride) 24 | self.bn1 = nn.BatchNorm2d(planes) 25 | self.relu = nn.ReLU(inplace=True) 26 | self.conv2 = conv3x3(planes, planes) 27 | self.bn2 = nn.BatchNorm2d(planes) 28 | self.downsample = downsample 29 | self.stride = stride 30 | 31 | def forward(self, x): 32 | residual = x 33 | 34 | out = self.conv1(x) 35 | out = self.bn1(out) 36 | out = self.relu(out) 37 | 38 | out = self.conv2(out) 39 | out = self.bn2(out) 40 | 41 | if self.downsample is not None: 42 | residual = self.downsample(x) 43 | 44 | out += residual 45 | out = self.relu(out) 46 | 47 | return out 48 | 49 | 50 | class Bottleneck(nn.Module): 51 | expansion = 4 52 | 53 | def __init__(self, inplanes, planes, stride=1, downsample=None): 54 | super(Bottleneck, self).__init__() 55 | self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False) 56 | self.bn1 = nn.BatchNorm2d(planes) 57 | self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, 58 | padding=1, bias=False) 59 | self.bn2 = nn.BatchNorm2d(planes) 60 | self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False) 61 | self.bn3 = nn.BatchNorm2d(planes * 4) 62 | self.relu = nn.ReLU(inplace=True) 63 | self.downsample = downsample 64 | self.stride = stride 65 | 66 | def forward(self, x): 67 | residual = x 68 | 69 | out = self.conv1(x) 70 | out = self.bn1(out) 71 | out = self.relu(out) 72 | 73 | out = self.conv2(out) 74 | out = self.bn2(out) 75 | out = self.relu(out) 76 | 77 | out = self.conv3(out) 78 | out = self.bn3(out) 79 | 80 | if self.downsample is not None: 81 | residual = self.downsample(x) 82 | 83 | out += residual 84 | out = self.relu(out) 85 | 86 | return out 87 | 88 | 89 | class ResNet(nn.Module): 90 | 91 | def __init__(self, block, layers, num_classes=1000): 92 | self.inplanes = 64 93 | super(ResNet, self).__init__() 94 | self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, 95 | bias=False) 96 | self.bn1 = nn.BatchNorm2d(64) 97 | self.relu = nn.ReLU(inplace=True) 98 | self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) 99 | self.layer1 = self._make_layer(block, 64, layers[0]) 100 | self.layer2 = self._make_layer(block, 128, layers[1], stride=2) 101 | self.layer3 = self._make_layer(block, 256, layers[2], stride=2) 102 | self.layer4 = self._make_layer(block, 512, layers[3], stride=2) 103 | 104 | for m in self.modules(): 105 | if isinstance(m, nn.Conv2d): 106 | n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels 107 | m.weight.data.normal_(0, math.sqrt(2. / n)) 108 | elif isinstance(m, nn.BatchNorm2d): 109 | m.weight.data.fill_(1) 110 | m.bias.data.zero_() 111 | 112 | def _make_layer(self, block, planes, blocks, stride=1): 113 | downsample = None 114 | if stride != 1 or self.inplanes != planes * block.expansion: 115 | downsample = nn.Sequential( 116 | nn.Conv2d(self.inplanes, planes * block.expansion, 117 | kernel_size=1, stride=stride, bias=False), 118 | nn.BatchNorm2d(planes * block.expansion), 119 | ) 120 | 121 | layers = [] 122 | layers.append(block(self.inplanes, planes, stride, downsample)) 123 | self.inplanes = planes * block.expansion 124 | for i in range(1, blocks): 125 | layers.append(block(self.inplanes, planes)) 126 | 127 | return nn.Sequential(*layers) 128 | 129 | def load_weights(self, path): 130 | model_dict = self.state_dict() 131 | print('loading model from {}'.format(path)) 132 | try: 133 | #state_dict = torch.load(self.path) 134 | # self.load_state_dict({k: v for k, v in state_dict.items() if k in self.state_dict()}) 135 | pretrained_dict = torch.load(path) 136 | from collections import OrderedDict 137 | tmp = OrderedDict() 138 | for k,v in pretrained_dict.items(): 139 | if k in model_dict: 140 | tmp[k] = v 141 | # pretrained_dict = {k: v for k, v in pretrained_dict.items() if k in model_dict} 142 | # model_dict.update(pretrained_dict) 143 | model_dict.update(tmp) 144 | self.load_state_dict(model_dict) 145 | except: 146 | print ('loading model failed, {} may not exist'.format(path)) 147 | 148 | def forward(self, x): 149 | x = self.conv1(x) 150 | x = self.bn1(x) 151 | x = self.relu(x) 152 | C1 = self.maxpool(x) 153 | 154 | C2 = self.layer1(C1) 155 | C3 = self.layer2(C2) 156 | C4 = self.layer3(C3) 157 | C5 = self.layer4(C4) 158 | 159 | return C1, C2, C3, C4, C5 160 | 161 | ############################################################ 162 | # Pose Estimation Graph 163 | ############################################################ 164 | 165 | class pose_estimation(nn.Module): 166 | def __init__(self, class_num, pretrain=True): 167 | super(pose_estimation, self).__init__() 168 | self.resnet = ResNet(Bottleneck, [3, 4, 6, 3]) # resnet50 169 | if pretrain == True: 170 | self.model_path = '/data/xiaobing.wang/.torch/models/resnet50-19c8e357.pth' 171 | self.resnet.load_weights(self.model_path) 172 | self.apply_fix() 173 | self.reduce = nn.Sequential(nn.Conv2d(1024, 256, 1, 1, 0)) 174 | 175 | self.block = nn.Sequential(nn.Conv2d(256, 128, 1, 1, 0),nn.ReLU(inplace=True), 176 | nn.Conv2d(128, 128, 3, 1, 1), nn.ReLU(inplace=True), 177 | nn.Conv2d(128, 128, 3, 1, 1), nn.ReLU(inplace=True), 178 | nn.Conv2d(128, 128, 3, 1, 1), nn.ReLU(inplace=True), 179 | nn.Conv2d(128, 512, 3, 1, 1), nn.ReLU(inplace=True) 180 | ) 181 | self._init_weights(self.block) 182 | self.predict = nn.Conv2d(512, 25, 1, 1, 0) 183 | 184 | def _init_weights(self, model): 185 | for m in model: 186 | if isinstance(m, nn.Conv2d): 187 | m.weight.data.normal_(0, 0.01) 188 | if m.bias is not None: 189 | m.bias.data.zero_() 190 | def apply_fix(self): 191 | # 1. fix bn 192 | # 2. fix conv1 conv2 193 | for param in self.resnet.conv1.parameters(): 194 | param.requires_grad = False 195 | for param in self.resnet.layer1.parameters(): 196 | param.requires_grad = False 197 | 198 | 199 | def forward(self, x): 200 | C1, C2, C3, C4, C5 = self.resnet(x) 201 | C4 = self.reduce(C4) 202 | C4 = F.upsample(C4, scale_factor=2) 203 | 204 | out = self.block(C4) 205 | predict = self.predict(out) 206 | return predict -------------------------------------------------------------------------------- /models/bk/CPM_ResNet5.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | import torch 6 | import torch.nn as nn 7 | import math 8 | import torch.nn.functional as F 9 | 10 | ############################################################ 11 | # ResNet 12 | ############################################################ 13 | def conv3x3(in_planes, out_planes, stride=1): 14 | "3x3 convolution with padding" 15 | return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, 16 | padding=1, bias=False) 17 | 18 | class BasicBlock(nn.Module): 19 | expansion = 1 20 | 21 | def __init__(self, inplanes, planes, stride=1, downsample=None): 22 | super(BasicBlock, self).__init__() 23 | self.conv1 = conv3x3(inplanes, planes, stride) 24 | self.bn1 = nn.BatchNorm2d(planes) 25 | self.relu = nn.ReLU(inplace=True) 26 | self.conv2 = conv3x3(planes, planes) 27 | self.bn2 = nn.BatchNorm2d(planes) 28 | self.downsample = downsample 29 | self.stride = stride 30 | 31 | def forward(self, x): 32 | residual = x 33 | 34 | out = self.conv1(x) 35 | out = self.bn1(out) 36 | out = self.relu(out) 37 | 38 | out = self.conv2(out) 39 | out = self.bn2(out) 40 | 41 | if self.downsample is not None: 42 | residual = self.downsample(x) 43 | 44 | out += residual 45 | out = self.relu(out) 46 | 47 | return out 48 | 49 | 50 | class Bottleneck(nn.Module): 51 | expansion = 4 52 | 53 | def __init__(self, inplanes, planes, stride=1, downsample=None): 54 | super(Bottleneck, self).__init__() 55 | self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False) 56 | self.bn1 = nn.BatchNorm2d(planes) 57 | self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, 58 | padding=1, bias=False) 59 | self.bn2 = nn.BatchNorm2d(planes) 60 | self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False) 61 | self.bn3 = nn.BatchNorm2d(planes * 4) 62 | self.relu = nn.ReLU(inplace=True) 63 | self.downsample = downsample 64 | self.stride = stride 65 | 66 | def forward(self, x): 67 | residual = x 68 | 69 | out = self.conv1(x) 70 | out = self.bn1(out) 71 | out = self.relu(out) 72 | 73 | out = self.conv2(out) 74 | out = self.bn2(out) 75 | out = self.relu(out) 76 | 77 | out = self.conv3(out) 78 | out = self.bn3(out) 79 | 80 | if self.downsample is not None: 81 | residual = self.downsample(x) 82 | 83 | out += residual 84 | out = self.relu(out) 85 | 86 | return out 87 | 88 | 89 | class ResNet(nn.Module): 90 | 91 | def __init__(self, block, layers, num_classes=1000): 92 | self.inplanes = 64 93 | super(ResNet, self).__init__() 94 | self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, 95 | bias=False) 96 | self.bn1 = nn.BatchNorm2d(64) 97 | self.relu = nn.ReLU(inplace=True) 98 | self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) 99 | self.layer1 = self._make_layer(block, 64, layers[0]) 100 | self.layer2 = self._make_layer(block, 128, layers[1], stride=2) 101 | self.layer3 = self._make_layer(block, 256, layers[2], stride=2) 102 | self.layer4 = self._make_layer(block, 512, layers[3], stride=2) 103 | 104 | for m in self.modules(): 105 | if isinstance(m, nn.Conv2d): 106 | n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels 107 | m.weight.data.normal_(0, math.sqrt(2. / n)) 108 | elif isinstance(m, nn.BatchNorm2d): 109 | m.weight.data.fill_(1) 110 | m.bias.data.zero_() 111 | 112 | def _make_layer(self, block, planes, blocks, stride=1): 113 | downsample = None 114 | if stride != 1 or self.inplanes != planes * block.expansion: 115 | downsample = nn.Sequential( 116 | nn.Conv2d(self.inplanes, planes * block.expansion, 117 | kernel_size=1, stride=stride, bias=False), 118 | nn.BatchNorm2d(planes * block.expansion), 119 | ) 120 | 121 | layers = [] 122 | layers.append(block(self.inplanes, planes, stride, downsample)) 123 | self.inplanes = planes * block.expansion 124 | for i in range(1, blocks): 125 | layers.append(block(self.inplanes, planes)) 126 | 127 | return nn.Sequential(*layers) 128 | 129 | def load_weights(self, path): 130 | model_dict = self.state_dict() 131 | print('loading model from {}'.format(path)) 132 | try: 133 | #state_dict = torch.load(self.path) 134 | # self.load_state_dict({k: v for k, v in state_dict.items() if k in self.state_dict()}) 135 | pretrained_dict = torch.load(path) 136 | from collections import OrderedDict 137 | tmp = OrderedDict() 138 | for k,v in pretrained_dict.items(): 139 | if k in model_dict: 140 | tmp[k] = v 141 | # pretrained_dict = {k: v for k, v in pretrained_dict.items() if k in model_dict} 142 | # model_dict.update(pretrained_dict) 143 | model_dict.update(tmp) 144 | self.load_state_dict(model_dict) 145 | except: 146 | print ('loading model failed, {} may not exist'.format(path)) 147 | 148 | def forward(self, x): 149 | x = self.conv1(x) 150 | x = self.bn1(x) 151 | x = self.relu(x) 152 | C1 = self.maxpool(x) 153 | 154 | C2 = self.layer1(C1) 155 | C3 = self.layer2(C2) 156 | C4 = self.layer3(C3) 157 | C5 = self.layer4(C4) 158 | 159 | return C1, C2, C3, C4, C5 160 | 161 | ############################################################ 162 | # Pose Estimation Graph 163 | ############################################################ 164 | 165 | class pose_estimation(nn.Module): 166 | def __init__(self, class_num, pretrain=True): 167 | super(pose_estimation, self).__init__() 168 | self.resnet = ResNet(Bottleneck, [3, 4, 6, 3]) # resnet50 169 | if pretrain == True: 170 | self.model_path = '/data/xiaobing.wang/.torch/models/resnet50-19c8e357.pth' 171 | self.resnet.load_weights(self.model_path) 172 | self.apply_fix() 173 | self.reduce_C4 = nn.Sequential(nn.Conv2d(1024, 256, 1, 1, 0)) 174 | self.reduce_C5 = nn.Sequential(nn.Conv2d(2048, 256, 1, 1, 0)) 175 | 176 | self.block = nn.Sequential(nn.Conv2d(256, 128, 1, 1, 0),nn.ReLU(inplace=True), 177 | nn.Conv2d(128, 128, 3, 1, 1), nn.ReLU(inplace=True), 178 | nn.Conv2d(128, 128, 3, 1, 1), nn.ReLU(inplace=True), 179 | nn.Conv2d(128, 128, 3, 1, 1), nn.ReLU(inplace=True), 180 | nn.Conv2d(128, 512, 3, 1, 1), nn.ReLU(inplace=True) 181 | ) 182 | self._init_weights(self.block) 183 | self.predict = nn.Conv2d(512, 25, 1, 1, 0) 184 | 185 | def _init_weights(self, model): 186 | for m in model: 187 | if isinstance(m, nn.Conv2d): 188 | m.weight.data.normal_(0, 0.01) 189 | if m.bias is not None: 190 | m.bias.data.zero_() 191 | def apply_fix(self): 192 | # 1. fix bn 193 | # 2. fix conv1 conv2 194 | for param in self.resnet.conv1.parameters(): 195 | param.requires_grad = False 196 | for param in self.resnet.layer1.parameters(): 197 | param.requires_grad = False 198 | 199 | 200 | def forward(self, x): 201 | C1, C2, C3, C4, C5 = self.resnet(x) 202 | # C4 = self.reduce_C4(C4) 203 | # C4 = F.upsample(C4, scale_factor=2) 204 | 205 | C5 = self.reduce_C5(C5) 206 | C5 = F.upsample(C5, scale_factor=4) 207 | 208 | out = self.block(C5) 209 | predict = self.predict(out) 210 | return predict -------------------------------------------------------------------------------- /models/bk/CPM_ResNet6.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | import torch 6 | import torch.nn as nn 7 | import math 8 | import torch.nn.functional as F 9 | 10 | ############################################################ 11 | # ResNet 12 | ############################################################ 13 | def conv3x3(in_planes, out_planes, stride=1): 14 | "3x3 convolution with padding" 15 | return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, 16 | padding=1, bias=False) 17 | 18 | class BasicBlock(nn.Module): 19 | expansion = 1 20 | 21 | def __init__(self, inplanes, planes, stride=1, downsample=None): 22 | super(BasicBlock, self).__init__() 23 | self.conv1 = conv3x3(inplanes, planes, stride) 24 | self.bn1 = nn.BatchNorm2d(planes) 25 | self.relu = nn.ReLU(inplace=True) 26 | self.conv2 = conv3x3(planes, planes) 27 | self.bn2 = nn.BatchNorm2d(planes) 28 | self.downsample = downsample 29 | self.stride = stride 30 | 31 | def forward(self, x): 32 | residual = x 33 | 34 | out = self.conv1(x) 35 | out = self.bn1(out) 36 | out = self.relu(out) 37 | 38 | out = self.conv2(out) 39 | out = self.bn2(out) 40 | 41 | if self.downsample is not None: 42 | residual = self.downsample(x) 43 | 44 | out += residual 45 | out = self.relu(out) 46 | 47 | return out 48 | 49 | 50 | class Bottleneck(nn.Module): 51 | expansion = 4 52 | 53 | def __init__(self, inplanes, planes, stride=1, downsample=None): 54 | super(Bottleneck, self).__init__() 55 | self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False) 56 | self.bn1 = nn.BatchNorm2d(planes) 57 | self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, 58 | padding=1, bias=False) 59 | self.bn2 = nn.BatchNorm2d(planes) 60 | self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False) 61 | self.bn3 = nn.BatchNorm2d(planes * 4) 62 | self.relu = nn.ReLU(inplace=True) 63 | self.downsample = downsample 64 | self.stride = stride 65 | 66 | def forward(self, x): 67 | residual = x 68 | 69 | out = self.conv1(x) 70 | out = self.bn1(out) 71 | out = self.relu(out) 72 | 73 | out = self.conv2(out) 74 | out = self.bn2(out) 75 | out = self.relu(out) 76 | 77 | out = self.conv3(out) 78 | out = self.bn3(out) 79 | 80 | if self.downsample is not None: 81 | residual = self.downsample(x) 82 | 83 | out += residual 84 | out = self.relu(out) 85 | 86 | return out 87 | 88 | 89 | class ResNet(nn.Module): 90 | 91 | def __init__(self, block, layers, num_classes=1000): 92 | self.inplanes = 64 93 | super(ResNet, self).__init__() 94 | self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, 95 | bias=False) 96 | self.bn1 = nn.BatchNorm2d(64) 97 | self.relu = nn.ReLU(inplace=True) 98 | self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) 99 | self.layer1 = self._make_layer(block, 64, layers[0]) 100 | self.layer2 = self._make_layer(block, 128, layers[1], stride=2) 101 | self.layer3 = self._make_layer(block, 256, layers[2], stride=2) 102 | self.layer4 = self._make_layer(block, 512, layers[3], stride=2) 103 | 104 | for m in self.modules(): 105 | if isinstance(m, nn.Conv2d): 106 | n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels 107 | m.weight.data.normal_(0, math.sqrt(2. / n)) 108 | elif isinstance(m, nn.BatchNorm2d): 109 | m.weight.data.fill_(1) 110 | m.bias.data.zero_() 111 | 112 | def _make_layer(self, block, planes, blocks, stride=1): 113 | downsample = None 114 | if stride != 1 or self.inplanes != planes * block.expansion: 115 | downsample = nn.Sequential( 116 | nn.Conv2d(self.inplanes, planes * block.expansion, 117 | kernel_size=1, stride=stride, bias=False), 118 | nn.BatchNorm2d(planes * block.expansion), 119 | ) 120 | 121 | layers = [] 122 | layers.append(block(self.inplanes, planes, stride, downsample)) 123 | self.inplanes = planes * block.expansion 124 | for i in range(1, blocks): 125 | layers.append(block(self.inplanes, planes)) 126 | 127 | return nn.Sequential(*layers) 128 | 129 | def load_weights(self, path): 130 | model_dict = self.state_dict() 131 | print('loading model from {}'.format(path)) 132 | try: 133 | #state_dict = torch.load(self.path) 134 | # self.load_state_dict({k: v for k, v in state_dict.items() if k in self.state_dict()}) 135 | pretrained_dict = torch.load(path) 136 | from collections import OrderedDict 137 | tmp = OrderedDict() 138 | for k,v in pretrained_dict.items(): 139 | if k in model_dict: 140 | tmp[k] = v 141 | # pretrained_dict = {k: v for k, v in pretrained_dict.items() if k in model_dict} 142 | # model_dict.update(pretrained_dict) 143 | model_dict.update(tmp) 144 | self.load_state_dict(model_dict) 145 | except: 146 | print ('loading model failed, {} may not exist'.format(path)) 147 | 148 | def forward(self, x): 149 | x = self.conv1(x) 150 | x = self.bn1(x) 151 | x = self.relu(x) 152 | C1 = self.maxpool(x) 153 | 154 | C2 = self.layer1(C1) 155 | C3 = self.layer2(C2) 156 | C4 = self.layer3(C3) 157 | C5 = self.layer4(C4) 158 | 159 | return C1, C2, C3, C4, C5 160 | 161 | ############################################################ 162 | # Pose Estimation Graph 163 | ############################################################ 164 | 165 | class pose_estimation(nn.Module): 166 | def __init__(self, class_num, pretrain=True): 167 | super(pose_estimation, self).__init__() 168 | self.resnet = ResNet(Bottleneck, [3, 4, 6, 3]) # resnet50 169 | if pretrain == True: 170 | self.model_path = '/data/xiaobing.wang/.torch/models/resnet50-19c8e357.pth' 171 | self.resnet.load_weights(self.model_path) 172 | self.apply_fix() 173 | self.reduce_C4 = nn.Sequential(nn.Conv2d(1024, 256, 1, 1, 0)) 174 | self.reduce_C5 = nn.Sequential(nn.Conv2d(2048, 256, 1, 1, 0)) 175 | 176 | self.block = nn.Sequential(nn.Conv2d(256, 128, 1, 1, 0),nn.ReLU(inplace=True), 177 | nn.Conv2d(128, 128, 3, 1, 1), nn.ReLU(inplace=True), 178 | nn.Conv2d(128, 128, 3, 1, 1), nn.ReLU(inplace=True), 179 | nn.Conv2d(128, 128, 3, 1, 1), nn.ReLU(inplace=True), 180 | nn.Conv2d(128, 512, 3, 1, 1), nn.ReLU(inplace=True) 181 | ) 182 | self._init_weights(self.block) 183 | self.predict = nn.Conv2d(512, 25, 1, 1, 0) 184 | 185 | def _init_weights(self, model): 186 | for m in model: 187 | if isinstance(m, nn.Conv2d): 188 | m.weight.data.normal_(0, 0.01) 189 | if m.bias is not None: 190 | m.bias.data.zero_() 191 | def apply_fix(self): 192 | # 1. fix bn 193 | # 2. fix conv1 conv2 194 | for param in self.resnet.conv1.parameters(): 195 | param.requires_grad = False 196 | for param in self.resnet.layer1.parameters(): 197 | param.requires_grad = False 198 | 199 | 200 | def forward(self, x): 201 | C1, C2, C3, C4, C5 = self.resnet(x) 202 | C4 = self.reduce_C4(C4) 203 | C4 = F.upsample(C4, scale_factor=2) 204 | 205 | C5 = self.reduce_C5(C5) 206 | C5 = F.upsample(C5, scale_factor=4) 207 | 208 | P4 = C5 + C4 209 | 210 | out = self.block(P4) 211 | predict = self.predict(out) 212 | return predict -------------------------------------------------------------------------------- /models/bk/CPM_ResNet7.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | import torch 6 | import torch.nn as nn 7 | import math 8 | import torch.nn.functional as F 9 | 10 | ############################################################ 11 | # ResNet 12 | ############################################################ 13 | def conv3x3(in_planes, out_planes, stride=1): 14 | "3x3 convolution with padding" 15 | return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, 16 | padding=1, bias=False) 17 | 18 | class BasicBlock(nn.Module): 19 | expansion = 1 20 | 21 | def __init__(self, inplanes, planes, stride=1, downsample=None): 22 | super(BasicBlock, self).__init__() 23 | self.conv1 = conv3x3(inplanes, planes, stride) 24 | self.bn1 = nn.BatchNorm2d(planes) 25 | self.relu = nn.ReLU(inplace=True) 26 | self.conv2 = conv3x3(planes, planes) 27 | self.bn2 = nn.BatchNorm2d(planes) 28 | self.downsample = downsample 29 | self.stride = stride 30 | 31 | def forward(self, x): 32 | residual = x 33 | 34 | out = self.conv1(x) 35 | out = self.bn1(out) 36 | out = self.relu(out) 37 | 38 | out = self.conv2(out) 39 | out = self.bn2(out) 40 | 41 | if self.downsample is not None: 42 | residual = self.downsample(x) 43 | 44 | out += residual 45 | out = self.relu(out) 46 | 47 | return out 48 | 49 | 50 | class Bottleneck(nn.Module): 51 | expansion = 4 52 | 53 | def __init__(self, inplanes, planes, stride=1, downsample=None): 54 | super(Bottleneck, self).__init__() 55 | self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False) 56 | self.bn1 = nn.BatchNorm2d(planes) 57 | self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, 58 | padding=1, bias=False) 59 | self.bn2 = nn.BatchNorm2d(planes) 60 | self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False) 61 | self.bn3 = nn.BatchNorm2d(planes * 4) 62 | self.relu = nn.ReLU(inplace=True) 63 | self.downsample = downsample 64 | self.stride = stride 65 | 66 | def forward(self, x): 67 | residual = x 68 | 69 | out = self.conv1(x) 70 | out = self.bn1(out) 71 | out = self.relu(out) 72 | 73 | out = self.conv2(out) 74 | out = self.bn2(out) 75 | out = self.relu(out) 76 | 77 | out = self.conv3(out) 78 | out = self.bn3(out) 79 | 80 | if self.downsample is not None: 81 | residual = self.downsample(x) 82 | 83 | out += residual 84 | out = self.relu(out) 85 | 86 | return out 87 | 88 | 89 | class ResNet(nn.Module): 90 | 91 | def __init__(self, block, layers, num_classes=1000): 92 | self.inplanes = 64 93 | super(ResNet, self).__init__() 94 | self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, 95 | bias=False) 96 | self.bn1 = nn.BatchNorm2d(64) 97 | self.relu = nn.ReLU(inplace=True) 98 | self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) 99 | self.layer1 = self._make_layer(block, 64, layers[0]) 100 | self.layer2 = self._make_layer(block, 128, layers[1], stride=2) 101 | self.layer3 = self._make_layer(block, 256, layers[2], stride=2) 102 | self.layer4 = self._make_layer(block, 512, layers[3], stride=2) 103 | 104 | for m in self.modules(): 105 | if isinstance(m, nn.Conv2d): 106 | n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels 107 | m.weight.data.normal_(0, math.sqrt(2. / n)) 108 | elif isinstance(m, nn.BatchNorm2d): 109 | m.weight.data.fill_(1) 110 | m.bias.data.zero_() 111 | 112 | def _make_layer(self, block, planes, blocks, stride=1): 113 | downsample = None 114 | if stride != 1 or self.inplanes != planes * block.expansion: 115 | downsample = nn.Sequential( 116 | nn.Conv2d(self.inplanes, planes * block.expansion, 117 | kernel_size=1, stride=stride, bias=False), 118 | nn.BatchNorm2d(planes * block.expansion), 119 | ) 120 | 121 | layers = [] 122 | layers.append(block(self.inplanes, planes, stride, downsample)) 123 | self.inplanes = planes * block.expansion 124 | for i in range(1, blocks): 125 | layers.append(block(self.inplanes, planes)) 126 | 127 | return nn.Sequential(*layers) 128 | 129 | def load_weights(self, path): 130 | model_dict = self.state_dict() 131 | print('loading model from {}'.format(path)) 132 | try: 133 | #state_dict = torch.load(self.path) 134 | # self.load_state_dict({k: v for k, v in state_dict.items() if k in self.state_dict()}) 135 | pretrained_dict = torch.load(path) 136 | from collections import OrderedDict 137 | tmp = OrderedDict() 138 | for k,v in pretrained_dict.items(): 139 | if k in model_dict: 140 | tmp[k] = v 141 | # pretrained_dict = {k: v for k, v in pretrained_dict.items() if k in model_dict} 142 | # model_dict.update(pretrained_dict) 143 | model_dict.update(tmp) 144 | self.load_state_dict(model_dict) 145 | except: 146 | print ('loading model failed, {} may not exist'.format(path)) 147 | 148 | def forward(self, x): 149 | x = self.conv1(x) 150 | x = self.bn1(x) 151 | x = self.relu(x) 152 | C1 = self.maxpool(x) 153 | 154 | C2 = self.layer1(C1) 155 | C3 = self.layer2(C2) 156 | C4 = self.layer3(C3) 157 | C5 = self.layer4(C4) 158 | 159 | return C1, C2, C3, C4, C5 160 | 161 | ############################################################ 162 | # Pose Estimation Graph 163 | ############################################################ 164 | 165 | class pose_estimation(nn.Module): 166 | def __init__(self, class_num, pretrain=True): 167 | super(pose_estimation, self).__init__() 168 | self.resnet = ResNet(Bottleneck, [3, 4, 6, 3]) # resnet50 169 | if pretrain == True: 170 | self.model_path = '/data/xiaobing.wang/.torch/models/resnet50-19c8e357.pth' 171 | self.resnet.load_weights(self.model_path) 172 | self.apply_fix() 173 | self.reduce_C4 = nn.Sequential(nn.Conv2d(1024, 256, 1, 1, 0)) 174 | self.reduce_C5 = nn.Sequential(nn.Conv2d(2048, 256, 1, 1, 0)) 175 | 176 | self.block = nn.Sequential(nn.Conv2d(256, 128, 1, 1, 0),nn.ReLU(inplace=True), 177 | nn.Conv2d(128, 128, 3, 1, 1), nn.ReLU(inplace=True), 178 | nn.Conv2d(128, 128, 3, 1, 1), nn.ReLU(inplace=True), 179 | nn.Conv2d(128, 128, 3, 1, 1), nn.ReLU(inplace=True), 180 | nn.Conv2d(128, 512, 3, 1, 1), nn.ReLU(inplace=True) 181 | ) 182 | self._init_weights(self.block) 183 | self.predict = nn.Conv2d(512, 25, 1, 1, 0) 184 | 185 | def _init_weights(self, model): 186 | for m in model: 187 | if isinstance(m, nn.Conv2d): 188 | m.weight.data.normal_(0, 0.01) 189 | if m.bias is not None: 190 | m.bias.data.zero_() 191 | def apply_fix(self): 192 | # 1. fix bn 193 | # 2. fix conv1 conv2 194 | for param in self.resnet.conv1.parameters(): 195 | param.requires_grad = False 196 | for param in self.resnet.layer1.parameters(): 197 | param.requires_grad = False 198 | 199 | 200 | def forward(self, x): 201 | C1, C2, C3, C4, C5 = self.resnet(x) 202 | C4 = self.reduce_C4(C4) 203 | C4 = F.upsample(C4, scale_factor=2) 204 | 205 | C5 = self.reduce_C5(C5) 206 | C5 = F.upsample(C5, scale_factor=4) 207 | 208 | P4 = C5 + C4 209 | 210 | out = self.block(P4) 211 | predict = self.predict(out) 212 | return predict -------------------------------------------------------------------------------- /models/bk/FPN.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import math 4 | import torch.nn.functional as F 5 | 6 | ############################################################ 7 | # ResNet 8 | ############################################################ 9 | def conv3x3(in_planes, out_planes, stride=1): 10 | "3x3 convolution with padding" 11 | return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, 12 | padding=1, bias=False) 13 | 14 | class BasicBlock(nn.Module): 15 | expansion = 1 16 | 17 | def __init__(self, inplanes, planes, stride=1, downsample=None): 18 | super(BasicBlock, self).__init__() 19 | self.conv1 = conv3x3(inplanes, planes, stride) 20 | self.bn1 = nn.BatchNorm2d(planes) 21 | self.relu = nn.ReLU(inplace=True) 22 | self.conv2 = conv3x3(planes, planes) 23 | self.bn2 = nn.BatchNorm2d(planes) 24 | self.downsample = downsample 25 | self.stride = stride 26 | 27 | def forward(self, x): 28 | residual = x 29 | 30 | out = self.conv1(x) 31 | out = self.bn1(out) 32 | out = self.relu(out) 33 | 34 | out = self.conv2(out) 35 | out = self.bn2(out) 36 | 37 | if self.downsample is not None: 38 | residual = self.downsample(x) 39 | 40 | out += residual 41 | out = self.relu(out) 42 | 43 | return out 44 | 45 | 46 | class Bottleneck(nn.Module): 47 | expansion = 4 48 | 49 | def __init__(self, inplanes, planes, stride=1, downsample=None): 50 | super(Bottleneck, self).__init__() 51 | self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False) 52 | self.bn1 = nn.BatchNorm2d(planes) 53 | self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, 54 | padding=1, bias=False) 55 | self.bn2 = nn.BatchNorm2d(planes) 56 | self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False) 57 | self.bn3 = nn.BatchNorm2d(planes * 4) 58 | self.relu = nn.ReLU(inplace=True) 59 | self.downsample = downsample 60 | self.stride = stride 61 | 62 | def forward(self, x): 63 | residual = x 64 | 65 | out = self.conv1(x) 66 | out = self.bn1(out) 67 | out = self.relu(out) 68 | 69 | out = self.conv2(out) 70 | out = self.bn2(out) 71 | out = self.relu(out) 72 | 73 | out = self.conv3(out) 74 | out = self.bn3(out) 75 | 76 | if self.downsample is not None: 77 | residual = self.downsample(x) 78 | 79 | out += residual 80 | out = self.relu(out) 81 | 82 | return out 83 | 84 | 85 | class ResNet(nn.Module): 86 | 87 | def __init__(self, block, layers, num_classes=1000): 88 | self.inplanes = 64 89 | super(ResNet, self).__init__() 90 | self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, 91 | bias=False) 92 | self.bn1 = nn.BatchNorm2d(64) 93 | self.relu = nn.ReLU(inplace=True) 94 | self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) 95 | self.layer1 = self._make_layer(block, 64, layers[0]) 96 | self.layer2 = self._make_layer(block, 128, layers[1], stride=2) 97 | self.layer3 = self._make_layer(block, 256, layers[2], stride=2) 98 | self.layer4 = self._make_layer(block, 512, layers[3], stride=2) 99 | 100 | for m in self.modules(): 101 | if isinstance(m, nn.Conv2d): 102 | n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels 103 | m.weight.data.normal_(0, math.sqrt(2. / n)) 104 | elif isinstance(m, nn.BatchNorm2d): 105 | m.weight.data.fill_(1) 106 | m.bias.data.zero_() 107 | 108 | def _make_layer(self, block, planes, blocks, stride=1): 109 | downsample = None 110 | if stride != 1 or self.inplanes != planes * block.expansion: 111 | downsample = nn.Sequential( 112 | nn.Conv2d(self.inplanes, planes * block.expansion, 113 | kernel_size=1, stride=stride, bias=False), 114 | nn.BatchNorm2d(planes * block.expansion), 115 | ) 116 | 117 | layers = [] 118 | layers.append(block(self.inplanes, planes, stride, downsample)) 119 | self.inplanes = planes * block.expansion 120 | for i in range(1, blocks): 121 | layers.append(block(self.inplanes, planes)) 122 | 123 | return nn.Sequential(*layers) 124 | 125 | def forward(self, x): 126 | x = self.conv1(x) 127 | x = self.bn1(x) 128 | x = self.relu(x) 129 | C1 = self.maxpool(x) 130 | 131 | C2 = self.layer1(C1) 132 | C3 = self.layer2(C2) 133 | C4 = self.layer3(C3) 134 | C5 = self.layer4(C4) 135 | 136 | return C1, C2, C3, C4, C5 137 | 138 | ############################################################ 139 | # FPN Graph 140 | ############################################################ 141 | 142 | class FPN(nn.Module): 143 | def __init__(self, out_channels): 144 | super(FPN, self).__init__() 145 | self.out_channels = out_channels 146 | self.P6 = nn.MaxPool2d(kernel_size=1, stride=2, padding=0, ceil_mode=False) 147 | self.P5_conv1 = nn.Conv2d(2048, self.out_channels, kernel_size=1, stride=1) 148 | self.P5_conv2 = nn.Conv2d(self.out_channels, self.out_channels, kernel_size=3, stride=1), 149 | self.P4_conv1 = nn.Conv2d(1024, self.out_channels, kernel_size=1, stride=1) 150 | self.P4_conv2 = nn.Conv2d(self.out_channels, self.out_channels, kernel_size=3, stride=1), 151 | 152 | self.P3_conv1 = nn.Conv2d(512, self.out_channels, kernel_size=1, stride=1) 153 | self.P3_conv2 = nn.Conv2d(self.out_channels, self.out_channels, kernel_size=3, stride=1) 154 | self.P2_conv1 = nn.Conv2d(256, self.out_channels, kernel_size=1, stride=1) 155 | self.P2_conv2 = nn.Conv2d(self.out_channels, self.out_channels, kernel_size=3, stride=1) 156 | 157 | 158 | def forward(self, C1, C2, C3 ,C4, C5): 159 | 160 | p5_out = self.P5_conv1(x) 161 | p4_out = self.P4_conv1(c4_out) + F.upsample(p5_out, scale_factor=2) 162 | p3_out = self.P3_conv1(c3_out) + F.upsample(p4_out, scale_factor=2) 163 | p2_out = self.P2_conv1(c2_out) + F.upsample(p3_out, scale_factor=2) 164 | 165 | p5_out = self.P5_conv2(p5_out) 166 | p4_out = self.P4_conv2(p4_out) 167 | p3_out = self.P3_conv2(p3_out) 168 | p2_out = self.P2_conv2(p2_out) 169 | 170 | # P6 is used for the 5th anchor scale in RPN. Generated by 171 | # subsampling from P5 with stride of 2. 172 | p6_out = self.P6(p5_out) 173 | 174 | return p2_out, p3_out, p4_out, p5_out, p6_out 175 | 176 | ############################################################ 177 | # Pose Estimation Graph 178 | ############################################################ 179 | 180 | class pose_estimation(nn.Module): 181 | def __init__(self, pretrain=True): 182 | self.resnet50 = ResNet(Bottleneck, [3, 4, 6, 3]) 183 | if pretrain == True: 184 | self.model_path = '/data/xiaobing.wang/.torch/models/resnet50-19c8e357.pth' 185 | state_dict = torch.load(self.model_path) 186 | self.resnet50.load_state_dict(state_dict) 187 | self.fpn = FPN(out_channels) 188 | 189 | 190 | -------------------------------------------------------------------------------- /models/bk/hourglass.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torchvision.models as models 4 | import torch.nn.functional as F 5 | 6 | 7 | __all__ = ['HourglassNet', 'hg'] 8 | 9 | class Bottleneck(nn.Module): 10 | expansion = 2 11 | 12 | def __init__(self, inplanes, planes, stride=1, downsample=None): 13 | super(Bottleneck, self).__init__() 14 | 15 | self.bn1 = nn.BatchNorm2d(inplanes) 16 | self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=True) 17 | self.bn2 = nn.BatchNorm2d(planes) 18 | self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, 19 | padding=1, bias=True) 20 | self.bn3 = nn.BatchNorm2d(planes) 21 | self.conv3 = nn.Conv2d(planes, planes * 2, kernel_size=1, bias=True) 22 | self.relu = nn.ReLU(inplace=True) 23 | self.downsample = downsample 24 | self.stride = stride 25 | 26 | def forward(self, x): 27 | residual = x 28 | 29 | out = self.bn1(x) 30 | out = self.relu(out) 31 | out = self.conv1(out) 32 | 33 | out = self.bn2(out) 34 | out = self.relu(out) 35 | out = self.conv2(out) 36 | 37 | out = self.bn3(out) 38 | out = self.relu(out) 39 | out = self.conv3(out) 40 | 41 | if self.downsample is not None: 42 | residual = self.downsample(x) 43 | 44 | out += residual 45 | 46 | return out 47 | 48 | 49 | class Hourglass(nn.Module): 50 | def __init__(self, block, num_blocks, planes, depth): 51 | super(Hourglass, self).__init__() 52 | self.depth = depth 53 | self.block = block 54 | self.upsample = nn.Upsample(scale_factor=2) 55 | self.hg = self._make_hour_glass(block, num_blocks, planes, depth) 56 | 57 | def _make_residual(self, block, num_blocks, planes): 58 | layers = [] 59 | for i in range(0, num_blocks): 60 | layers.append(block(planes*block.expansion, planes)) 61 | return nn.Sequential(*layers) 62 | 63 | def _make_hour_glass(self, block, num_blocks, planes, depth): 64 | hg = [] 65 | for i in range(depth): 66 | res = [] 67 | for j in range(3): 68 | res.append(self._make_residual(block, num_blocks, planes)) 69 | if i == 0: 70 | res.append(self._make_residual(block, num_blocks, planes)) 71 | hg.append(nn.ModuleList(res)) 72 | return nn.ModuleList(hg) 73 | 74 | def _hour_glass_forward(self, n, x): 75 | up1 = self.hg[n-1][0](x) 76 | low1 = F.max_pool2d(x, 2, stride=2) 77 | low1 = self.hg[n-1][1](low1) 78 | 79 | if n > 1: 80 | low2 = self._hour_glass_forward(n-1, low1) 81 | else: 82 | low2 = self.hg[n-1][3](low1) 83 | low3 = self.hg[n-1][2](low2) 84 | up2 = self.upsample(low3) 85 | out = up1 + up2 86 | return out 87 | 88 | def forward(self, x): 89 | return self._hour_glass_forward(self.depth, x) 90 | 91 | 92 | class HourglassNet(nn.Module): 93 | '''Hourglass model from Newell et al ECCV 2016''' 94 | def __init__(self, block, num_stacks=2, num_blocks=4, num_classes=16): 95 | super(HourglassNet, self).__init__() 96 | 97 | self.inplanes = 64 98 | self.num_feats = 128 99 | self.num_stacks = num_stacks 100 | self.conv1 = nn.Conv2d(3, self.inplanes, kernel_size=7, stride=2, padding=3, 101 | bias=True) 102 | self.bn1 = nn.BatchNorm2d(self.inplanes) 103 | self.relu = nn.ReLU(inplace=True) 104 | self.layer1 = self._make_residual(block, self.inplanes, 1) 105 | self.layer2 = self._make_residual(block, self.inplanes, 1) 106 | self.layer3 = self._make_residual(block, self.num_feats, 1) 107 | self.maxpool = nn.MaxPool2d(2, stride=2) 108 | 109 | # build hourglass modules 110 | ch = self.num_feats*block.expansion 111 | hg, res, fc, score, fc_, score_ = [], [], [], [], [], [] 112 | for i in range(num_stacks): 113 | hg.append(Hourglass(block, num_blocks, self.num_feats, 4)) 114 | res.append(self._make_residual(block, self.num_feats, num_blocks)) 115 | fc.append(self._make_fc(ch, ch)) 116 | score.append(nn.Conv2d(ch, num_classes, kernel_size=1, bias=True)) 117 | if i < num_stacks-1: 118 | fc_.append(nn.Conv2d(ch, ch, kernel_size=1, bias=True)) 119 | score_.append(nn.Conv2d(num_classes, ch, kernel_size=1, bias=True)) 120 | self.hg = nn.ModuleList(hg) 121 | self.res = nn.ModuleList(res) 122 | self.fc = nn.ModuleList(fc) 123 | self.score = nn.ModuleList(score) 124 | self.fc_ = nn.ModuleList(fc_) 125 | self.score_ = nn.ModuleList(score_) 126 | 127 | def _make_residual(self, block, planes, blocks, stride=1): 128 | downsample = None 129 | if stride != 1 or self.inplanes != planes * block.expansion: 130 | downsample = nn.Sequential( 131 | nn.Conv2d(self.inplanes, planes * block.expansion, 132 | kernel_size=1, stride=stride, bias=True), 133 | ) 134 | 135 | layers = [] 136 | layers.append(block(self.inplanes, planes, stride, downsample)) 137 | self.inplanes = planes * block.expansion 138 | for i in range(1, blocks): 139 | layers.append(block(self.inplanes, planes)) 140 | 141 | return nn.Sequential(*layers) 142 | 143 | def _make_fc(self, inplanes, outplanes): 144 | bn = nn.BatchNorm2d(inplanes) 145 | conv = nn.Conv2d(inplanes, outplanes, kernel_size=1, bias=True) 146 | return nn.Sequential( 147 | conv, 148 | bn, 149 | self.relu, 150 | ) 151 | 152 | def forward(self, x): 153 | out = [] 154 | x = self.conv1(x) 155 | x = self.bn1(x) 156 | x = self.relu(x) 157 | 158 | x = self.layer1(x) 159 | x = self.maxpool(x) 160 | x = self.layer2(x) 161 | x = self.layer3(x) 162 | 163 | for i in range(self.num_stacks): 164 | y = self.hg[i](x) 165 | y = self.res[i](y) 166 | y = self.fc[i](y) 167 | score = self.score[i](y) 168 | out.append(score) 169 | if i < self.num_stacks-1: 170 | fc_ = self.fc_[i](y) 171 | score_ = self.score_[i](score) 172 | x = x + fc_ + score_ 173 | 174 | return out 175 | 176 | 177 | def hg(num_stacks, num_blocks, num_classes): 178 | model = HourglassNet(Bottleneck, num_stacks=num_stacks, num_blocks=num_blocks, 179 | num_classes=num_classes) 180 | return model -------------------------------------------------------------------------------- /models/hourglass.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torchvision.models as models 4 | import torch.nn.functional as F 5 | 6 | 7 | __all__ = ['HourglassNet', 'hg'] 8 | 9 | class Bottleneck(nn.Module): 10 | expansion = 2 11 | 12 | def __init__(self, inplanes, planes, stride=1, downsample=None): 13 | super(Bottleneck, self).__init__() 14 | 15 | self.bn1 = nn.BatchNorm2d(inplanes) 16 | self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=True) 17 | self.bn2 = nn.BatchNorm2d(planes) 18 | self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, 19 | padding=1, bias=True) 20 | self.bn3 = nn.BatchNorm2d(planes) 21 | self.conv3 = nn.Conv2d(planes, planes * 2, kernel_size=1, bias=True) 22 | self.relu = nn.ReLU(inplace=True) 23 | self.downsample = downsample 24 | self.stride = stride 25 | 26 | def forward(self, x): 27 | residual = x 28 | 29 | out = self.bn1(x) 30 | out = self.relu(out) 31 | out = self.conv1(out) 32 | 33 | out = self.bn2(out) 34 | out = self.relu(out) 35 | out = self.conv2(out) 36 | 37 | out = self.bn3(out) 38 | out = self.relu(out) 39 | out = self.conv3(out) 40 | 41 | if self.downsample is not None: 42 | residual = self.downsample(x) 43 | 44 | out += residual 45 | 46 | return out 47 | 48 | 49 | class Hourglass(nn.Module): 50 | def __init__(self, block, num_blocks, planes, depth): 51 | super(Hourglass, self).__init__() 52 | self.depth = depth 53 | self.block = block 54 | self.upsample = nn.Upsample(scale_factor=2) 55 | self.hg = self._make_hour_glass(block, num_blocks, planes, depth) 56 | 57 | def _make_residual(self, block, num_blocks, planes): 58 | layers = [] 59 | for i in range(0, num_blocks): 60 | layers.append(block(planes*block.expansion, planes)) 61 | return nn.Sequential(*layers) 62 | 63 | def _make_hour_glass(self, block, num_blocks, planes, depth): 64 | hg = [] 65 | for i in range(depth): 66 | res = [] 67 | for j in range(3): 68 | res.append(self._make_residual(block, num_blocks, planes)) 69 | if i == 0: 70 | res.append(self._make_residual(block, num_blocks, planes)) 71 | hg.append(nn.ModuleList(res)) 72 | return nn.ModuleList(hg) 73 | 74 | def _hour_glass_forward(self, n, x): 75 | up1 = self.hg[n-1][0](x) 76 | low1 = F.max_pool2d(x, 2, stride=2) 77 | low1 = self.hg[n-1][1](low1) 78 | 79 | if n > 1: 80 | low2 = self._hour_glass_forward(n-1, low1) 81 | else: 82 | low2 = self.hg[n-1][3](low1) 83 | low3 = self.hg[n-1][2](low2) 84 | up2 = self.upsample(low3) 85 | out = up1 + up2 86 | return out 87 | 88 | def forward(self, x): 89 | return self._hour_glass_forward(self.depth, x) 90 | 91 | 92 | class HourglassNet(nn.Module): 93 | '''Hourglass model from Newell et al ECCV 2016''' 94 | def __init__(self, block, num_stacks=2, num_blocks=4, num_classes=16): 95 | super(HourglassNet, self).__init__() 96 | 97 | self.inplanes = 64 98 | self.num_feats = 128 99 | self.num_stacks = num_stacks 100 | self.conv1 = nn.Conv2d(3, self.inplanes, kernel_size=7, stride=2, padding=3, 101 | bias=True) 102 | self.bn1 = nn.BatchNorm2d(self.inplanes) 103 | self.relu = nn.ReLU(inplace=True) 104 | self.layer1 = self._make_residual(block, self.inplanes, 1) 105 | self.layer2 = self._make_residual(block, self.inplanes, 1) 106 | self.layer3 = self._make_residual(block, self.num_feats, 1) 107 | self.maxpool = nn.MaxPool2d(2, stride=2) 108 | 109 | # build hourglass modules 110 | ch = self.num_feats*block.expansion 111 | hg, res, fc, score, fc_, score_ = [], [], [], [], [], [] 112 | for i in range(num_stacks): 113 | hg.append(Hourglass(block, num_blocks, self.num_feats, 4)) 114 | res.append(self._make_residual(block, self.num_feats, num_blocks)) 115 | fc.append(self._make_fc(ch, ch)) 116 | score.append(nn.Conv2d(ch, num_classes, kernel_size=1, bias=True)) 117 | if i < num_stacks-1: 118 | fc_.append(nn.Conv2d(ch, ch, kernel_size=1, bias=True)) 119 | score_.append(nn.Conv2d(num_classes, ch, kernel_size=1, bias=True)) 120 | self.hg = nn.ModuleList(hg) 121 | self.res = nn.ModuleList(res) 122 | self.fc = nn.ModuleList(fc) 123 | self.score = nn.ModuleList(score) 124 | self.fc_ = nn.ModuleList(fc_) 125 | self.score_ = nn.ModuleList(score_) 126 | 127 | def _make_residual(self, block, planes, blocks, stride=1): 128 | downsample = None 129 | if stride != 1 or self.inplanes != planes * block.expansion: 130 | downsample = nn.Sequential( 131 | nn.Conv2d(self.inplanes, planes * block.expansion, 132 | kernel_size=1, stride=stride, bias=True), 133 | ) 134 | 135 | layers = [] 136 | layers.append(block(self.inplanes, planes, stride, downsample)) 137 | self.inplanes = planes * block.expansion 138 | for i in range(1, blocks): 139 | layers.append(block(self.inplanes, planes)) 140 | 141 | return nn.Sequential(*layers) 142 | 143 | def _make_fc(self, inplanes, outplanes): 144 | bn = nn.BatchNorm2d(inplanes) 145 | conv = nn.Conv2d(inplanes, outplanes, kernel_size=1, bias=True) 146 | return nn.Sequential( 147 | conv, 148 | bn, 149 | self.relu, 150 | ) 151 | 152 | def forward(self, x): 153 | out = [] 154 | x = self.conv1(x) 155 | x = self.bn1(x) 156 | x = self.relu(x) 157 | 158 | x = self.layer1(x) 159 | x = self.maxpool(x) 160 | x = self.layer2(x) 161 | x = self.layer3(x) 162 | 163 | for i in range(self.num_stacks): 164 | y = self.hg[i](x) 165 | y = self.res[i](y) 166 | y = self.fc[i](y) 167 | score = self.score[i](y) 168 | out.append(score) 169 | if i < self.num_stacks-1: 170 | fc_ = self.fc_[i](y) 171 | score_ = self.score_[i](score) 172 | x = x + fc_ + score_ 173 | 174 | return out 175 | 176 | 177 | def hg(num_stacks, num_blocks, num_classes): 178 | model = HourglassNet(Bottleneck, num_stacks=num_stacks, num_blocks=num_blocks, 179 | num_classes=num_classes) 180 | return model -------------------------------------------------------------------------------- /util.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from easydict import EasyDict as edict 3 | import yaml 4 | import math 5 | def padRightDownCorner(img, stride, padValue): 6 | h = img.shape[0] 7 | w = img.shape[1] 8 | 9 | pad = 4 * [None] 10 | pad[0] = 0 # up 11 | pad[1] = 0 # left 12 | pad[2] = 0 if (h%stride==0) else stride - (h % stride) # down 13 | pad[3] = 0 if (w%stride==0) else stride - (w % stride) # right 14 | 15 | img_padded = img 16 | pad_up = np.tile(img_padded[0:1,:,:]*0 + padValue, (pad[0], 1, 1)) 17 | img_padded = np.concatenate((pad_up, img_padded), axis=0) 18 | pad_left = np.tile(img_padded[:,0:1,:]*0 + padValue, (1, pad[1], 1)) 19 | img_padded = np.concatenate((pad_left, img_padded), axis=1) 20 | pad_down = np.tile(img_padded[-2:-1,:,:]*0 + padValue, (pad[2], 1, 1)) 21 | img_padded = np.concatenate((img_padded, pad_down), axis=0) 22 | pad_right = np.tile(img_padded[:,-2:-1,:]*0 + padValue, (1, pad[3], 1)) 23 | img_padded = np.concatenate((img_padded, pad_right), axis=1) 24 | 25 | return img_padded, pad 26 | 27 | 28 | def get_transform(center, scale, res, rot=0): 29 | # Generate transformation matrix 30 | h = 200 * scale 31 | t = np.zeros((3, 3)) 32 | t[0, 0] = float(res[1]) / h 33 | t[1, 1] = float(res[0]) / h 34 | t[0, 2] = res[1] * (-float(center[0]) / h + .5) 35 | t[1, 2] = res[0] * (-float(center[1]) / h + .5) 36 | t[2, 2] = 1 37 | if not rot == 0: 38 | rot = -rot # To match direction of rotation from cropping 39 | rot_mat = np.zeros((3,3)) 40 | rot_rad = rot * np.pi / 180 41 | sn,cs = np.sin(rot_rad), np.cos(rot_rad) 42 | rot_mat[0,:2] = [cs, -sn] 43 | rot_mat[1,:2] = [sn, cs] 44 | rot_mat[2,2] = 1 45 | # Need to rotate around center 46 | t_mat = np.eye(3) 47 | t_mat[0,2] = -res[1]/2 48 | t_mat[1,2] = -res[0]/2 49 | t_inv = t_mat.copy() 50 | t_inv[:2,2] *= -1 51 | t = np.dot(t_inv,np.dot(rot_mat,np.dot(t_mat,t))) 52 | return t 53 | 54 | def kpt_affine(kpt, mat): 55 | shape = kpt.shape 56 | kpt = kpt.reshape(-1, 2) 57 | return np.dot( np.concatenate((kpt, kpt[:, 0:1]*0+1), axis = 1), mat.T ).reshape(shape) 58 | 59 | def Config(filename): 60 | 61 | with open(filename, 'r') as f: 62 | parser = edict(yaml.load(f)) 63 | for x in parser: 64 | print '{}: {}'.format(x, parser[x]) 65 | return parser 66 | 67 | def adjust_learning_rate(optimizer, iters, base_lr, policy_parameter, policy='step', multiple=None): 68 | 69 | if policy == 'fixed': 70 | lr = base_lr 71 | elif policy == 'step': 72 | lr = base_lr * (policy_parameter['gamma'] ** (iters // policy_parameter['step_size'])) 73 | elif policy == 'exp': 74 | lr = base_lr * (policy_parameter['gamma'] ** iters) 75 | elif policy == 'inv': 76 | lr = base_lr * ((1 + policy_parameter['gamma'] * iters) ** (-policy_parameter['power'])) 77 | elif policy == 'multistep': 78 | lr = base_lr 79 | for stepvalue in policy_parameter['stepvalue']: 80 | if iters >= stepvalue: 81 | lr *= policy_parameter['gamma'] 82 | else: 83 | break 84 | elif policy == 'poly': 85 | lr = base_lr * ((1 - iters * 1.0 / policy_parameter['max_iter']) ** policy_parameter['power']) 86 | elif policy == 'sigmoid': 87 | lr = base_lr * (1.0 / (1 + math.exp(-policy_parameter['gamma'] * (iters - policy_parameter['stepsize'])))) 88 | elif policy == 'multistep-poly': 89 | lr = base_lr 90 | stepstart = 0 91 | stepend = policy_parameter['max_iter'] 92 | for stepvalue in policy_parameter['stepvalue']: 93 | if iters >= stepvalue: 94 | lr *= policy_parameter['gamma'] 95 | stepstart = stepvalue 96 | else: 97 | stepend = stepvalue 98 | break 99 | lr = max(lr * policy_parameter['gamma'], lr * (1 - (iters - stepstart) * 1.0 / (stepend - stepstart)) ** policy_parameter['power']) 100 | 101 | if multiple != None: 102 | for i, param_group in enumerate(optimizer.param_groups): 103 | param_group['lr'] = lr * multiple[i] 104 | else: 105 | for i, param_group in enumerate(optimizer.param_groups): 106 | param_group['lr'] = lr 107 | return lr 108 | 109 | 110 | class AverageMeter(object): 111 | """ Computes ans stores the average and current value""" 112 | def __init__(self): 113 | self.reset() 114 | 115 | def reset(self): 116 | self.val = 0. 117 | self.avg = 0. 118 | self.sum = 0. 119 | self.count = 0 120 | 121 | def update(self, val, n=1): 122 | self.val = val 123 | self.sum += val * n 124 | self.count += n 125 | self.avg = self.sum / self.count 126 | -------------------------------------------------------------------------------- /vis_input.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": { 7 | "collapsed": true 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "import os\n" 12 | ] 13 | } 14 | ], 15 | "metadata": { 16 | "kernelspec": { 17 | "display_name": "Python 2", 18 | "language": "python", 19 | "name": "python2" 20 | }, 21 | "language_info": { 22 | "codemirror_mode": { 23 | "name": "ipython", 24 | "version": 2 25 | }, 26 | "file_extension": ".py", 27 | "mimetype": "text/x-python", 28 | "name": "python", 29 | "nbconvert_exporter": "python", 30 | "pygments_lexer": "ipython2", 31 | "version": "2.7.6" 32 | } 33 | }, 34 | "nbformat": 4, 35 | "nbformat_minor": 0 36 | } 37 | --------------------------------------------------------------------------------