├── .idea
├── Heatmap.iml
├── codeStyleSettings.xml
├── misc.xml
└── modules.xml
├── Mytransforms.py
├── README.md
├── __init__.py
├── dataset_loader.py
├── debug.py
├── evaluation
├── csv_evaluation.py
├── csv_evaluation_FPN.py
├── csv_evaluation_ResNet.py
├── generate_val.py
├── modify.py
└── submit.py
├── experiments
├── CPM
│ ├── config.yml
│ └── train_net.py
├── CPM_FPN
│ ├── config.yml
│ └── train_net.py
├── CPM_ResNet
│ ├── config.yml
│ └── train_net.py
├── FPN
│ ├── config.yml
│ └── train_net.py
└── hourglass
│ ├── config.yml
│ └── train_net.py
├── models
├── CPM.py
├── CPM_FPN.py
├── CPM_ResNet.py
├── __init__.py
├── bk
│ ├── CPM.py
│ ├── CPM_FPN.py
│ ├── CPM_FPN2.py
│ ├── CPM_FPN3.py
│ ├── CPM_FPN4.py
│ ├── CPM_FPN5.py
│ ├── CPM_ResNet.py
│ ├── CPM_ResNet2.py
│ ├── CPM_ResNet3.py
│ ├── CPM_ResNet4.py
│ ├── CPM_ResNet5.py
│ ├── CPM_ResNet6.py
│ ├── CPM_ResNet7.py
│ ├── FPN.py
│ └── hourglass.py
└── hourglass.py
├── util.py
└── vis_input.ipynb
/.idea/Heatmap.iml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
--------------------------------------------------------------------------------
/.idea/codeStyleSettings.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
9 |
10 |
11 |
--------------------------------------------------------------------------------
/.idea/misc.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
--------------------------------------------------------------------------------
/.idea/modules.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Heatmap
2 | Heatmap approach for Fashion AI keypoint
3 |
4 | Preprocessing
5 | 1. split train to trainminusval and val
6 |
7 | train
8 | 1. cd ./experiments/CPM_FPN/
9 | 2. python ./train_net
10 |
11 | eval
12 | 1. cd ./evaluation
13 | 2. python ./csv_evaluation_FPN.py
14 |
15 | experiments
16 | 1. CPM -> 23% on leaderboard
17 | 2. CPM_ResNet 17.9% on valset
18 | 3. CPM_FPN + data_aug -> 11% on valset, 12% on leaderboard
19 |
20 |
21 |
22 |
23 | #---------------------------------------------- Related Papers--------------------------------------------------------------
24 |
25 | 1. Attentive Fashion Grammar Network for Fashion Landmark Detection and Clothing Category Classification (BIT, UCLA) - CVPR 2018
26 | - Worthy reading
27 |
28 | 2. Fashion Landmark Detection in the Wild (Sensetime) - ECCV 2016
29 | - Don't waste your time reading this paper, unless you want to learn from scratch
30 |
31 |
32 | 3. A Coarse-Fine Network for Keypoint Localization (U of Sydeny) - ICCV 2017
33 | - Worthy reading. 非常有意思,文章有两个部分,第一个部分Coarse利用detection的方式检测潜在的目标区域,第二个部分利用softmax的方式分类点。
34 |
--------------------------------------------------------------------------------
/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Xiangyu-CAS/FashionAI_Keypoints/dab6cbd975ba6071b070fb7da2fb163d01e2e2e4/__init__.py
--------------------------------------------------------------------------------
/dataset_loader.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.utils.data as data
3 | import numpy as np
4 | import os
5 | import math
6 | from PIL import Image
7 | import cv2
8 | import csv
9 |
10 | class dataset_loader(data.Dataset):
11 |
12 | def __init__(self, img_dir, ann_path, stride, transforms=None, sigma = 15):
13 |
14 | self.sigma = sigma#15 #9 #15
15 | self.stride = stride
16 | self.img_dir = img_dir
17 | self.transforms = transforms
18 | self.anns = []
19 | self.info = []
20 | with open(ann_path,'rb') as f:
21 | reader = csv.reader(f)
22 | for row in reader:
23 | self.anns.append(row)
24 | self.info.append(self.anns[0])
25 | self.anns=self.anns[1:]
26 |
27 |
28 | def __getitem__(self, index):
29 | # ---------------- read info -----------------------
30 | ann = self.anns[index]
31 | img_path = os.path.join(self.img_dir, ann[0])
32 | img = cv2.imread(img_path) # BGR
33 | catergory = ann[1]
34 | kpt = _get_keypoints(ann)
35 | # ----------------- transform ----------------------
36 | center = [img.shape[0]/2,img.shape[1]/2]
37 |
38 | # ----------------- transform ----------------------
39 | if not self.transforms:
40 | img, kpt = _croppad(img, kpt, center, 384, 384)
41 | else:
42 | img, kpt, center = self.transforms(img, kpt, center)
43 | #---------------------------------------------------
44 | heatmaps = _generate_heatmap(img, kpt,self.stride, self.sigma)
45 |
46 | img = np.array(img, dtype=np.float32)
47 | img -= 128.0
48 | img /= 255.0
49 |
50 | img = torch.from_numpy(img.transpose((2, 0, 1)))
51 | heatmaps = torch.from_numpy(heatmaps.transpose((2, 0, 1)))
52 |
53 | # img = self.trasforms(img)
54 | # heatmaps = self.trasforms(heatmaps)
55 |
56 | return img, heatmaps
57 |
58 | def __len__(self):
59 | return len(self.anns)
60 |
61 | def _croppad(img, kpt, center, w, h):
62 | num = len(kpt)
63 | height, width, _ = img.shape
64 | new_img = np.empty((h, w, 3), dtype=np.float32)
65 | new_img.fill(128)
66 |
67 | # calculate offset
68 | offset_up = -1*(h/2 - center[0])
69 | offset_left = -1*(w/2 - center[1])
70 |
71 | for i in range(num):
72 | kpt[i][0] -= offset_left
73 | kpt[i][1] -= offset_up
74 |
75 | st_x = 0
76 | ed_x = w
77 | st_y = 0
78 | ed_y = h
79 | or_st_x = offset_left
80 | or_ed_x = offset_left + w
81 | or_st_y = offset_up
82 | or_ed_y = offset_up + h
83 |
84 | if offset_left < 0:
85 | st_x = -offset_left
86 | or_st_x = 0
87 | if offset_left + w > width:
88 | ed_x = width - offset_left
89 | or_ed_x = width
90 | if offset_up < 0:
91 | st_y = -offset_up
92 | or_st_y = 0
93 | if offset_up + h > height:
94 | ed_y = height - offset_up
95 | or_ed_y = height
96 | new_img[st_y: ed_y, st_x: ed_x, :] = img[or_st_y: or_ed_y, or_st_x: or_ed_x, :].copy()
97 |
98 | return np.ascontiguousarray(new_img), kpt
99 |
100 |
101 | def _get_keypoints(ann):
102 | kpt = np.zeros((24, 3))
103 | for i in range(2, len(ann)):
104 | str = ann[i]
105 | [x_str, y_str, vis_str] = str.split('_')
106 | kpt[i - 2, 0], kpt[i - 2, 1], kpt[i - 2, 2] = int(x_str), int(y_str), int(vis_str)
107 | return kpt
108 |
109 | def _generate_heatmap(img, kpt, stride, sigma):
110 | height, width, _ = img.shape
111 | heatmap = np.zeros((height / stride, width / stride, len(kpt) + 1), dtype=np.float32) # (24 points + background)
112 | height, width, num_point = heatmap.shape
113 | start = stride / 2.0 - 0.5
114 |
115 | num = len(kpt)
116 | for i in range(num):
117 | if kpt[i][2] == -1: # not labeled
118 | continue
119 | x = kpt[i][0]
120 | y = kpt[i][1]
121 | for h in range(height):
122 | for w in range(width):
123 | xx = start + w * stride
124 | yy = start + h * stride
125 | dis = ((xx - x) * (xx - x) + (yy - y) * (yy - y)) / 2.0 / sigma / sigma
126 | if dis > 4.6052:
127 | continue
128 | heatmap[h][w][i] += math.exp(-dis)
129 | if heatmap[h][w][i] > 1:
130 | heatmap[h][w][i] = 1
131 |
132 | heatmap[:, :, -1] = 1.0 - np.max(heatmap[:, :, :-1], axis=2) # for background
133 | return heatmap
134 |
135 | '''
136 | 0: labeled but not visble
137 | 1: labeled and visble
138 | -1: not labeled
139 |
140 | 'image_id',
141 | 'image_category',
142 | 0'neckline_left',
143 | 1'neckline_right',
144 | 2 'center_front',
145 | 3'shoulder_left',
146 | 4 'shoulder_right',
147 | 5 'armpit_left',
148 | 6 'armpit_right',
149 | 7 'waistline_left',
150 | 8 'waistline_right',
151 | 9 'cuff_left_in',
152 | 10 'cuff_left_out',
153 | 11 'cuff_right_in',
154 | 12 'cuff_right_out',
155 | 13 'top_hem_left',
156 | 14 'top_hem_right',
157 | 15 'waistband_left',
158 | 16 'waistband_right',
159 | 17 'hemline_left',
160 | 18 'hemline_right',
161 | 19 'crotch',
162 | 20 'bottom_left_in',
163 | 21 'bottom_left_out',
164 | 22 'bottom_right_in',
165 | 23 'bottom_right_out
166 | '''
--------------------------------------------------------------------------------
/debug.py:
--------------------------------------------------------------------------------
1 | from __future__ import print_function
2 | import os, sys
3 | import os
4 | import dataset_loader
5 | import torch
6 | import util
7 | import matplotlib
8 | import matplotlib.pyplot as plt
9 | import numpy as np
10 | import torchvision.transforms as transforms
11 |
12 | ann_path = '/data/xiaobing.wang/xiangyu.zhu/FashionAI/data/warm_up_train/Annotations/annotations.csv'
13 | img_dir = '/data/xiaobing.wang/xiangyu.zhu/FashionAI/data/warm_up_train/'
14 |
15 | train_loader = torch.utils.data.DataLoader(
16 | dataset_loader.dataset_loader(img_dir, ann_path, 8,
17 | transforms.ToTensor()),
18 | batch_size=4, shuffle=True,
19 | num_workers=2, pin_memory=True)
20 |
21 | for i, (input, heatmap) in enumerate(train_loader):
22 | imgs = input.numpy()
23 | heats = heatmap.numpy()
24 | break
--------------------------------------------------------------------------------
/evaluation/csv_evaluation.py:
--------------------------------------------------------------------------------
1 | import csv
2 | import os
3 | import sys
4 | import numpy as np
5 | import cv2
6 | from scipy.ndimage.filters import gaussian_filter
7 | import math, time
8 | import torch
9 | import csv
10 | import util
11 | sys.path.append('../')
12 | def apply_model(oriImg, model, multiplier):
13 | stride = 8
14 | height, width, _ = oriImg.shape
15 | normed_img = np.array(oriImg, dtype=np.float32)
16 | heatmap_avg = np.zeros((oriImg.shape[0], oriImg.shape[1], 25), dtype=np.float32)
17 | for m in range(len(multiplier)):
18 | scale = multiplier[m]
19 | imageToTest = cv2.resize(normed_img, (0, 0), fx=scale, fy=scale, interpolation=cv2.INTER_CUBIC)
20 | imgToTest_padded, pad = util.padRightDownCorner(imageToTest, stride, 128)
21 |
22 | input_img = np.transpose(np.float32(imgToTest_padded[:, :, :, np.newaxis]),
23 | (3, 2, 0, 1)) / 255 - 0.5 # required shape (1, c, h, w)
24 |
25 | input_var = torch.autograd.Variable(torch.from_numpy(input_img).cuda())
26 |
27 | # get the features
28 | heat1, heat2, heat3, heat4, heat5, heat6 = model(input_var)
29 |
30 | # get the heatmap
31 | heatmap = heat6.data.cpu().numpy()
32 | heatmap = np.transpose(np.squeeze(heatmap), (1, 2, 0)) # (h, w, c)
33 | heatmap = cv2.resize(heatmap, (0, 0), fx=stride, fy=stride, interpolation=cv2.INTER_CUBIC)
34 | heatmap = heatmap[:imgToTest_padded.shape[0] - pad[2], :imgToTest_padded.shape[1] - pad[3], :]
35 | heatmap = cv2.resize(heatmap, (width, height), interpolation=cv2.INTER_CUBIC)
36 | heatmap_avg = heatmap_avg + heatmap / len(multiplier)
37 |
38 | all_peaks = [] # all of the possible points by classes.
39 | peak_counter = 0
40 | thre1 = 0.1
41 | for part in range(25 - 1):
42 | x_list = []
43 | y_list = []
44 | map_ori = heatmap_avg[:, :, part]
45 | map = gaussian_filter(map_ori, sigma=3)
46 |
47 | map_left = np.zeros(map.shape)
48 | map_left[1:, :] = map[:-1, :]
49 | map_right = np.zeros(map.shape)
50 | map_right[:-1, :] = map[1:, :]
51 | map_up = np.zeros(map.shape)
52 | map_up[:, 1:] = map[:, :-1]
53 | map_down = np.zeros(map.shape)
54 | map_down[:, :-1] = map[:, 1:]
55 |
56 | peaks_binary = np.logical_and.reduce(
57 | (map >= map_left, map >= map_right, map >= map_up, map >= map_down, map > thre1))
58 | peaks = zip(np.nonzero(peaks_binary)[1], np.nonzero(peaks_binary)[0]) # note reverse
59 | peaks_with_score = [x + (map_ori[x[1], x[0]],) for x in peaks]
60 | id = range(peak_counter, peak_counter + len(peaks))
61 | peaks_with_score_and_id = [peaks_with_score[i] + (id[i],) for i in range(len(id))]
62 |
63 | all_peaks.append(peaks_with_score_and_id)
64 | peak_counter += len(peaks)
65 |
66 | # sort by score
67 | for i in range(24):
68 | all_peaks[i] = sorted(all_peaks[i], key=lambda ele : ele[2],reverse = True)
69 |
70 | canvas = oriImg.copy()
71 | # draw points
72 | for i in range(24):
73 | for j in range(len(all_peaks[i])):
74 | if j is 0:
75 | cv2.circle(canvas, all_peaks[i][j][0:2], 4, [0, 0, 255], thickness=-1)
76 | else:
77 | cv2.circle(canvas, all_peaks[i][j][0:2], 4, [255, 0, 0], thickness=-1)
78 |
79 | keypoints = -1*np.ones((24, 3))
80 | for i in range(24):
81 | if len(all_peaks[i]) == 0:
82 | continue
83 | else:
84 | keypoints[i,0], keypoints[i,1], keypoints[i,2] = all_peaks[i][0][0], all_peaks[i][0][1], 1
85 |
86 | return keypoints, canvas
87 |
88 |
89 | def write_csv(name, results):
90 | import csv
91 | with open(name, 'w') as f:
92 | writer = csv.writer(f)
93 | writer.writerows(results)
94 |
95 | def prepare_row(ann, keypoints):
96 | # cls
97 | image_name = ann[0]
98 | category = ann[1]
99 | keypoints_str = []
100 | for i in range(24):
101 | cell_str = str(int(keypoints[i][0])) + '_' + str(int(keypoints[i][1])) + '_' + str(int(keypoints[i][2]))
102 | keypoints_str.append(cell_str)
103 | row = [image_name, category] + keypoints_str
104 | return row
105 |
106 | def read_csv(ann_file):
107 | info = []
108 | anns = []
109 | with open(ann_file, 'rb') as f:
110 | reader = csv.reader(f)
111 | for row in reader:
112 | anns.append(row)
113 | info = anns[0]
114 | anns = anns[1:]
115 | return info, anns
116 |
117 | def euclidean_distance(a, b):
118 | return math.sqrt((a[0] - b[0]) ** 2 + (a[1] - b[1]) ** 2)
119 |
120 | def criterion(ann_gt, ann_dt):
121 | category = ann_gt[1]
122 | gt_kpt = -1 * np.ones((24, 3))
123 | for i in range(len(gt_kpt)):
124 | x_str, y_str, vis_str = ann_gt[i + 2].split('_')
125 | gt_kpt[i][0], gt_kpt[i][1], gt_kpt[i][2] = int(x_str), int(y_str), int(vis_str)
126 |
127 | dt_kpt = -1 * np.ones((24, 3))
128 | for i in range(len(dt_kpt)):
129 | x_str, y_str, vis_str = ann_dt[i + 2].split('_')
130 | dt_kpt[i][0], dt_kpt[i][1], dt_kpt[i][2] = int(x_str), int(y_str), int(vis_str)
131 |
132 | if category in ['blouse','outwear','skirt']: # armpit distance
133 | thre = euclidean_distance(gt_kpt[5], gt_kpt[6])
134 | elif category in ['trousers', 'dress']: # waistband distance
135 | thre = euclidean_distance(gt_kpt[7], gt_kpt[8])
136 | if thre == 0:
137 | return []
138 | score = []
139 | for i in range(len(gt_kpt)):
140 | if gt_kpt[i][2] == 1:
141 | #if dt_kpt[i][2] == -1:
142 | # score.append(2)
143 | #else:
144 | score.append(1.0* euclidean_distance(gt_kpt[i],dt_kpt[i])/ thre)
145 | return score
146 | #print('score = {}'.format(score))
147 |
148 |
149 |
150 | def evaluate(gt_file, dt_file, num_imgs):
151 | info_gt, anns_gt = read_csv(gt_file)
152 | info_dt, anns_dt = read_csv(dt_file)
153 | anns_gt = anns_gt[:num_imgs]
154 | assert len(anns_gt) == len(anns_dt)
155 | scores = []
156 | for i in range(len(anns_gt)):
157 | ann_gt = anns_gt[i]
158 | ann_dt = anns_dt[i]
159 | score = criterion(ann_gt, ann_dt)
160 | scores += score
161 | value = sum(scores)/len(scores)
162 | print('score = {}'.format(value))
163 |
164 | def eval():
165 | gt_file = '../FashionAI/data/train/Annotations/val.csv'
166 | dt_file = 'val_result.csv'
167 | # dt_file = 'modify.csv'
168 |
169 | num_imgs = 100
170 | evaluate(gt_file, dt_file,num_imgs)
171 |
172 |
173 | def main():
174 | os.environ['CUDA_VISIBLE_DEVICES'] = '2'
175 |
176 | #--------------------------- model -------------------------------------------------------------------------------
177 | import models.CPM
178 | pytorch_model = '../FashionAI/Heatmap/experiments/CPM/20000.pth.tar'
179 | model = models.CPM.PoseModel(num_point=25, pretrained=False)
180 | #-----------------------------------------------------------------------------------------------------------------
181 |
182 | img_dir = '../FashionAI/data/train/'
183 | ann_path = '../FashionAI/data/train/Annotations/val.csv'
184 | result_name = 'val_result.csv'
185 | scale_search = [0.5, 0.7, 1.0, 1.3] #[0.5, 1.0, 1.5]
186 | boxsize = 384
187 | # -------------------------- pytorch model------------------
188 | state_dict = torch.load(pytorch_model)['state_dict']
189 | model.load_state_dict(state_dict)
190 | model = model.cuda()
191 | model.eval()
192 | # --------------------------------------------------------
193 | anns = []
194 | with open(ann_path, 'rb') as f:
195 | reader = csv.reader(f)
196 | for row in reader:
197 | anns.append(row)
198 | info=anns[0]
199 | anns = anns[1:]
200 | #---------------------------------------------------------
201 | num_imgs = 100# len(anns)
202 | results = []
203 | results.append(info)
204 |
205 | for i in range(num_imgs):
206 | print('{}/{}'.format(i, num_imgs))
207 | ann = anns[i]
208 | image_path = os.path.join(img_dir,ann[0])
209 | oriImg = cv2.imread(image_path)
210 | #multiplier = [x * boxsize / oriImg.shape[0] for x in scale_search]
211 | multiplier = scale_search
212 | keypoints, canvas = apply_model(oriImg, model, multiplier)
213 | # cv2.imwrite(os.path.join('./result', ann[0].split('/')[-1]), canvas)
214 | row = prepare_row(ann, keypoints)
215 | results.append(row)
216 | write_csv(result_name, results)
217 | evaluate(ann_path, result_name,num_imgs)
218 |
219 | if __name__ == '__main__':
220 | main()
221 | # eval
--------------------------------------------------------------------------------
/evaluation/csv_evaluation_FPN.py:
--------------------------------------------------------------------------------
1 | import csv
2 | import os
3 | import sys
4 | import numpy as np
5 | import cv2
6 | from scipy.ndimage.filters import gaussian_filter
7 | import math, time
8 | import torch
9 | import csv
10 | import util
11 | sys.path.append('../')
12 | def apply_model(oriImg, model, multiplier):
13 | stride = 8
14 | height, width, _ = oriImg.shape
15 | normed_img = np.array(oriImg, dtype=np.float32)
16 | heatmap_avg = np.zeros((oriImg.shape[0], oriImg.shape[1], 25), dtype=np.float32)
17 | for m in range(len(multiplier)):
18 | scale = multiplier[m]
19 | imageToTest = cv2.resize(normed_img, (0, 0), fx=scale, fy=scale, interpolation=cv2.INTER_CUBIC)
20 | # imgToTest_padded, pad = util.padRightDownCorner(imageToTest, stride, 128)
21 | imgToTest_padded, pad = util.padRightDownCorner(imageToTest, 64, 128)
22 |
23 | input_img = np.transpose(np.float32(imgToTest_padded[:, :, :, np.newaxis]),
24 | (3, 2, 0, 1)) / 255 - 0.5 # required shape (1, c, h, w)
25 |
26 | input_var = torch.autograd.Variable(torch.from_numpy(input_img).cuda())
27 |
28 | # get the features
29 | heat = model(input_var)
30 | # heat = model(input_var)
31 |
32 | # get the heatmap
33 | heatmap = heat.data.cpu().numpy()
34 | heatmap = np.transpose(np.squeeze(heatmap), (1, 2, 0)) # (h, w, c)
35 | heatmap = cv2.resize(heatmap, (0, 0), fx=stride, fy=stride, interpolation=cv2.INTER_CUBIC)
36 | heatmap = heatmap[:imgToTest_padded.shape[0] - pad[2], :imgToTest_padded.shape[1] - pad[3], :]
37 | heatmap = cv2.resize(heatmap, (width, height), interpolation=cv2.INTER_CUBIC)
38 | heatmap_avg = heatmap_avg + heatmap / len(multiplier)
39 |
40 | all_peaks = [] # all of the possible points by classes.
41 | peak_counter = 0
42 | thre1 = 0.1
43 | for part in range(25 - 1):
44 | x_list = []
45 | y_list = []
46 | map_ori = heatmap_avg[:, :, part]
47 | map = gaussian_filter(map_ori, sigma=3)
48 |
49 | map_left = np.zeros(map.shape)
50 | map_left[1:, :] = map[:-1, :]
51 | map_right = np.zeros(map.shape)
52 | map_right[:-1, :] = map[1:, :]
53 | map_up = np.zeros(map.shape)
54 | map_up[:, 1:] = map[:, :-1]
55 | map_down = np.zeros(map.shape)
56 | map_down[:, :-1] = map[:, 1:]
57 |
58 | peaks_binary = np.logical_and.reduce(
59 | (map >= map_left, map >= map_right, map >= map_up, map >= map_down, map > thre1))
60 | peaks = zip(np.nonzero(peaks_binary)[1], np.nonzero(peaks_binary)[0]) # note reverse
61 | peaks_with_score = [x + (map_ori[x[1], x[0]],) for x in peaks]
62 | id = range(peak_counter, peak_counter + len(peaks))
63 | peaks_with_score_and_id = [peaks_with_score[i] + (id[i],) for i in range(len(id))]
64 |
65 | all_peaks.append(peaks_with_score_and_id)
66 | peak_counter += len(peaks)
67 |
68 | # sort by score
69 | for i in range(24):
70 | all_peaks[i] = sorted(all_peaks[i], key=lambda ele : ele[2],reverse = True)
71 |
72 | canvas = oriImg.copy()
73 | # draw points
74 | for i in range(24):
75 | for j in range(len(all_peaks[i])):
76 | if j is 0:
77 | cv2.circle(canvas, all_peaks[i][j][0:2], 4, [0, 0, 255], thickness=-1)
78 | else:
79 | cv2.circle(canvas, all_peaks[i][j][0:2], 4, [255, 0, 0], thickness=-1)
80 |
81 | keypoints = -1*np.ones((24, 3))
82 | for i in range(24):
83 | if len(all_peaks[i]) == 0:
84 | continue
85 | else:
86 | keypoints[i,0], keypoints[i,1], keypoints[i,2] = all_peaks[i][0][0], all_peaks[i][0][1], 1
87 |
88 | return keypoints, canvas
89 |
90 |
91 | def write_csv(name, results):
92 | import csv
93 | with open(name, 'w') as f:
94 | writer = csv.writer(f)
95 | writer.writerows(results)
96 |
97 | def prepare_row(ann, keypoints):
98 | # cls
99 | image_name = ann[0]
100 | category = ann[1]
101 | keypoints_str = []
102 | for i in range(24):
103 | cell_str = str(int(keypoints[i][0])) + '_' + str(int(keypoints[i][1])) + '_' + str(int(keypoints[i][2]))
104 | keypoints_str.append(cell_str)
105 | row = [image_name, category] + keypoints_str
106 | return row
107 |
108 | def read_csv(ann_file):
109 | info = []
110 | anns = []
111 | with open(ann_file, 'rb') as f:
112 | reader = csv.reader(f)
113 | for row in reader:
114 | anns.append(row)
115 | info = anns[0]
116 | anns = anns[1:]
117 | return info, anns
118 |
119 | def euclidean_distance(a, b):
120 | return math.sqrt((a[0] - b[0]) ** 2 + (a[1] - b[1]) ** 2)
121 |
122 | def criterion(ann_gt, ann_dt):
123 | category = ann_gt[1]
124 | gt_kpt = -1 * np.ones((24, 3))
125 | for i in range(len(gt_kpt)):
126 | x_str, y_str, vis_str = ann_gt[i + 2].split('_')
127 | gt_kpt[i][0], gt_kpt[i][1], gt_kpt[i][2] = int(x_str), int(y_str), int(vis_str)
128 |
129 | dt_kpt = -1 * np.ones((24, 3))
130 | for i in range(len(dt_kpt)):
131 | x_str, y_str, vis_str = ann_dt[i + 2].split('_')
132 | dt_kpt[i][0], dt_kpt[i][1], dt_kpt[i][2] = int(x_str), int(y_str), int(vis_str)
133 |
134 | if category in ['blouse','outwear','dress']: # armpit distance
135 | thre = euclidean_distance(gt_kpt[5], gt_kpt[6])
136 | elif category in ['trousers', 'skirt']: # waistband distance
137 | thre = euclidean_distance(gt_kpt[15], gt_kpt[16])
138 | if thre == 0:
139 | return []
140 | score = []
141 | for i in range(len(gt_kpt)):
142 | if gt_kpt[i][2] == 1:
143 | #if dt_kpt[i][2] == -1:
144 | # score.append(2)
145 | #else:
146 | score.append(1.0* euclidean_distance(gt_kpt[i],dt_kpt[i])/ thre)
147 | return score
148 | #print('score = {}'.format(score))
149 |
150 |
151 |
152 | def evaluate(gt_file, dt_file, num_imgs):
153 | info_gt, anns_gt = read_csv(gt_file)
154 | info_dt, anns_dt = read_csv(dt_file)
155 | anns_gt = anns_gt[:num_imgs]
156 | assert len(anns_gt) == len(anns_dt)
157 | scores = []
158 | for i in range(len(anns_gt)):
159 | ann_gt = anns_gt[i]
160 | ann_dt = anns_dt[i]
161 | score = criterion(ann_gt, ann_dt)
162 | scores += score
163 | value = sum(scores)/len(scores)
164 | print('score = {}'.format(value))
165 |
166 | def eval():
167 | gt_file = '../FashionAI/data/train/Annotations/val.csv'
168 | # dt_file = 'val_result.csv'
169 | dt_file = 'modify.csv'
170 |
171 | num_imgs = 500
172 | evaluate(gt_file, dt_file,num_imgs)
173 |
174 |
175 | def main():
176 | os.environ['CUDA_VISIBLE_DEVICES'] = '3'
177 |
178 | # --------------------------- model -------------------------------------------------------------------------------
179 | import models.CPM_FPN
180 | pytorch_model = '../FashionAI/Heatmap/experiments/CPM_FPN/160000.pth.tar'
181 | model = models.CPM_FPN.pose_estimation(class_num=25, pretrain=False)
182 | # -----------------------------------------------------------------------------------------------------------------
183 |
184 | img_dir = '../FashionAI/data/train/'
185 | ann_path = '../FashionAI/data/train/Annotations/val.csv'
186 | # ann_path = '/data/xiaobing.wang/xiangyu.zhu/FashionAI/data/train/Annotations/trainminusval.csv'
187 | result_name = 'val_result.csv'
188 | # scale_search = [0.5, 0.7, 1.0, 1.3] # [0.5, 1.0, 1.5]
189 | scale_search = [0.5, 0.7, 1.0, 1.3]
190 | boxsize = 384
191 | # -------------------------- pytorch model------------------
192 | state_dict = torch.load(pytorch_model)['state_dict']
193 | model.load_state_dict(state_dict)
194 | model = model.cuda()
195 | model.eval()
196 | # --------------------------------------------------------
197 | anns = []
198 | with open(ann_path, 'rb') as f:
199 | reader = csv.reader(f)
200 | for row in reader:
201 | anns.append(row)
202 | info=anns[0]
203 | anns = anns[1:]
204 | #---------------------------------------------------------
205 | num_imgs =100# len(anns)
206 | results = []
207 | results.append(info)
208 |
209 | for i in range(num_imgs):
210 | print('{}/{}'.format(i, num_imgs))
211 | ann = anns[i]
212 | image_path = os.path.join(img_dir, ann[0])
213 | oriImg = cv2.imread(image_path)
214 | # multiplier = [x * boxsize / oriImg.shape[0] for x in scale_search]
215 | multiplier = scale_search
216 | keypoints, canvas = apply_model(oriImg, model, multiplier)
217 | # cv2.imwrite(os.path.join('./result', ann[0].split('/')[-1]), canvas)
218 | row = prepare_row(ann, keypoints)
219 | results.append(row)
220 | write_csv(result_name, results)
221 | evaluate(ann_path, result_name,num_imgs)
222 |
223 | if __name__ == '__main__':
224 | main()
225 | # eval()
226 |
--------------------------------------------------------------------------------
/evaluation/csv_evaluation_ResNet.py:
--------------------------------------------------------------------------------
1 | import csv
2 | import os
3 | import sys
4 | import numpy as np
5 | import cv2
6 | from scipy.ndimage.filters import gaussian_filter
7 | import math, time
8 | import torch
9 | import csv
10 | import util
11 | sys.path.append('../')
12 | def apply_model(oriImg, model, multiplier):
13 | stride = 8
14 | height, width, _ = oriImg.shape
15 | normed_img = np.array(oriImg, dtype=np.float32)
16 | heatmap_avg = np.zeros((oriImg.shape[0], oriImg.shape[1], 25), dtype=np.float32)
17 | for m in range(len(multiplier)):
18 | scale = multiplier[m]
19 | imageToTest = cv2.resize(normed_img, (0, 0), fx=scale, fy=scale, interpolation=cv2.INTER_CUBIC)
20 | # imgToTest_padded, pad = util.padRightDownCorner(imageToTest, stride, 128)
21 | imgToTest_padded, pad = util.padRightDownCorner(imageToTest, 64, 128)
22 |
23 | input_img = np.transpose(np.float32(imgToTest_padded[:, :, :, np.newaxis]),
24 | (3, 2, 0, 1)) / 255 - 0.5 # required shape (1, c, h, w)
25 |
26 | input_var = torch.autograd.Variable(torch.from_numpy(input_img).cuda())
27 |
28 | # get the features
29 | heat = model(input_var)
30 | # heat = model(input_var)
31 |
32 | # get the heatmap
33 | heatmap = heat.data.cpu().numpy()
34 | heatmap = np.transpose(np.squeeze(heatmap), (1, 2, 0)) # (h, w, c)
35 | heatmap = cv2.resize(heatmap, (0, 0), fx=stride, fy=stride, interpolation=cv2.INTER_CUBIC)
36 | heatmap = heatmap[:imgToTest_padded.shape[0] - pad[2], :imgToTest_padded.shape[1] - pad[3], :]
37 | heatmap = cv2.resize(heatmap, (width, height), interpolation=cv2.INTER_CUBIC)
38 | heatmap_avg = heatmap_avg + heatmap / len(multiplier)
39 |
40 | all_peaks = [] # all of the possible points by classes.
41 | peak_counter = 0
42 | thre1 = 0.1
43 | for part in range(25 - 1):
44 | x_list = []
45 | y_list = []
46 | map_ori = heatmap_avg[:, :, part]
47 | map = gaussian_filter(map_ori, sigma=3)
48 |
49 | map_left = np.zeros(map.shape)
50 | map_left[1:, :] = map[:-1, :]
51 | map_right = np.zeros(map.shape)
52 | map_right[:-1, :] = map[1:, :]
53 | map_up = np.zeros(map.shape)
54 | map_up[:, 1:] = map[:, :-1]
55 | map_down = np.zeros(map.shape)
56 | map_down[:, :-1] = map[:, 1:]
57 |
58 | peaks_binary = np.logical_and.reduce(
59 | (map >= map_left, map >= map_right, map >= map_up, map >= map_down, map > thre1))
60 | peaks = zip(np.nonzero(peaks_binary)[1], np.nonzero(peaks_binary)[0]) # note reverse
61 | peaks_with_score = [x + (map_ori[x[1], x[0]],) for x in peaks]
62 | id = range(peak_counter, peak_counter + len(peaks))
63 | peaks_with_score_and_id = [peaks_with_score[i] + (id[i],) for i in range(len(id))]
64 |
65 | all_peaks.append(peaks_with_score_and_id)
66 | peak_counter += len(peaks)
67 |
68 | # sort by score
69 | for i in range(24):
70 | all_peaks[i] = sorted(all_peaks[i], key=lambda ele : ele[2],reverse = True)
71 |
72 | canvas = oriImg.copy()
73 | # draw points
74 | for i in range(24):
75 | for j in range(len(all_peaks[i])):
76 | if j is 0:
77 | cv2.circle(canvas, all_peaks[i][j][0:2], 4, [0, 0, 255], thickness=-1)
78 | else:
79 | cv2.circle(canvas, all_peaks[i][j][0:2], 4, [255, 0, 0], thickness=-1)
80 |
81 | keypoints = -1*np.ones((24, 3))
82 | for i in range(24):
83 | if len(all_peaks[i]) == 0:
84 | continue
85 | else:
86 | keypoints[i,0], keypoints[i,1], keypoints[i,2] = all_peaks[i][0][0], all_peaks[i][0][1], 1
87 |
88 | return keypoints, canvas
89 |
90 |
91 | def write_csv(name, results):
92 | import csv
93 | with open(name, 'w') as f:
94 | writer = csv.writer(f)
95 | writer.writerows(results)
96 |
97 | def prepare_row(ann, keypoints):
98 | # cls
99 | image_name = ann[0]
100 | category = ann[1]
101 | keypoints_str = []
102 | for i in range(24):
103 | cell_str = str(int(keypoints[i][0])) + '_' + str(int(keypoints[i][1])) + '_' + str(int(keypoints[i][2]))
104 | keypoints_str.append(cell_str)
105 | row = [image_name, category] + keypoints_str
106 | return row
107 |
108 | def read_csv(ann_file):
109 | info = []
110 | anns = []
111 | with open(ann_file, 'rb') as f:
112 | reader = csv.reader(f)
113 | for row in reader:
114 | anns.append(row)
115 | info = anns[0]
116 | anns = anns[1:]
117 | return info, anns
118 |
119 | def euclidean_distance(a, b):
120 | return math.sqrt((a[0] - b[0]) ** 2 + (a[1] - b[1]) ** 2)
121 |
122 | def criterion(ann_gt, ann_dt):
123 | category = ann_gt[1]
124 | gt_kpt = -1 * np.ones((24, 3))
125 | for i in range(len(gt_kpt)):
126 | x_str, y_str, vis_str = ann_gt[i + 2].split('_')
127 | gt_kpt[i][0], gt_kpt[i][1], gt_kpt[i][2] = int(x_str), int(y_str), int(vis_str)
128 |
129 | dt_kpt = -1 * np.ones((24, 3))
130 | for i in range(len(dt_kpt)):
131 | x_str, y_str, vis_str = ann_dt[i + 2].split('_')
132 | dt_kpt[i][0], dt_kpt[i][1], dt_kpt[i][2] = int(x_str), int(y_str), int(vis_str)
133 |
134 | if category in ['blouse','outwear','dress']: # armpit distance
135 | thre = euclidean_distance(gt_kpt[5], gt_kpt[6])
136 | elif category in ['trousers', 'skirt']: # waistband distance
137 | thre = euclidean_distance(gt_kpt[7], gt_kpt[8])
138 | if thre == 0:
139 | return []
140 | score = []
141 | for i in range(len(gt_kpt)):
142 | if gt_kpt[i][2] == 1:
143 | #if dt_kpt[i][2] == -1:
144 | # score.append(2)
145 | #else:
146 | score.append(1.0* euclidean_distance(gt_kpt[i],dt_kpt[i])/ thre)
147 | return score
148 | #print('score = {}'.format(score))
149 |
150 |
151 |
152 | def evaluate(gt_file, dt_file, num_imgs):
153 | info_gt, anns_gt = read_csv(gt_file)
154 | info_dt, anns_dt = read_csv(dt_file)
155 | anns_gt = anns_gt[:num_imgs]
156 | assert len(anns_gt) == len(anns_dt)
157 | scores = []
158 | for i in range(len(anns_gt)):
159 | ann_gt = anns_gt[i]
160 | ann_dt = anns_dt[i]
161 | score = criterion(ann_gt, ann_dt)
162 | scores += score
163 | value = sum(scores)/len(scores)
164 | print('score = {}'.format(value))
165 |
166 | def eval():
167 | gt_file = '../FashionAI/data/train/Annotations/val.csv'
168 | # dt_file = 'val_result.csv'
169 | dt_file = 'modify.csv'
170 |
171 | num_imgs = 500
172 | evaluate(gt_file, dt_file,num_imgs)
173 |
174 |
175 | def main():
176 | os.environ['CUDA_VISIBLE_DEVICES'] = '1'
177 |
178 | # --------------------------- model -------------------------------------------------------------------------------
179 | import models.CPM_ResNet
180 | pytorch_model = '../FashionAI/Heatmap/experiments/CPM_ResNet/120000.pth.tar'
181 | model = models.CPM_ResNet.pose_estimation(class_num=25, pretrain=False)
182 | # -----------------------------------------------------------------------------------------------------------------
183 |
184 | img_dir = '../FashionAI/data/train/'
185 | ann_path = '../FashionAI/data/train/Annotations/val.csv'
186 | result_name = 'val_result.csv'
187 | # scale_search = [0.5, 0.7, 1.0, 1.3] # [0.5, 1.0, 1.5]
188 | scale_search = [0.5, 0.7, 1.0, 1.3]
189 | boxsize = 384
190 | # -------------------------- pytorch model------------------
191 | state_dict = torch.load(pytorch_model)['state_dict']
192 | model.load_state_dict(state_dict)
193 | model = model.cuda()
194 | model.eval()
195 | # --------------------------------------------------------
196 | anns = []
197 | with open(ann_path, 'rb') as f:
198 | reader = csv.reader(f)
199 | for row in reader:
200 | anns.append(row)
201 | info=anns[0]
202 | anns = anns[1:]
203 | #---------------------------------------------------------
204 | num_imgs =100# len(anns)
205 | results = []
206 | results.append(info)
207 |
208 | for i in range(num_imgs):
209 | print('{}/{}'.format(i, num_imgs))
210 | ann = anns[i]
211 | image_path = os.path.join(img_dir, ann[0])
212 | oriImg = cv2.imread(image_path)
213 | # multiplier = [x * boxsize / oriImg.shape[0] for x in scale_search]
214 | multiplier = scale_search
215 | keypoints, canvas = apply_model(oriImg, model, multiplier)
216 | # cv2.imwrite(os.path.join('./result', ann[0].split('/')[-1]), canvas)
217 | row = prepare_row(ann, keypoints)
218 | results.append(row)
219 | write_csv(result_name, results)
220 | evaluate(ann_path, result_name,num_imgs)
221 |
222 | if __name__ == '__main__':
223 | main()
224 | # eval()
--------------------------------------------------------------------------------
/evaluation/generate_val.py:
--------------------------------------------------------------------------------
1 | # split train to trainminusval and val (500)
2 | import csv
3 | import os, random
4 |
5 | train_ann_path = '/data/xiaobing.wang/xiangyu.zhu/FashionAI/data/train/Annotations/train.csv'
6 | output_dir = '/data/xiaobing.wang/xiangyu.zhu/FashionAI/data/train/Annotations/'
7 | val_num = 500
8 |
9 | info = []
10 | anns = []
11 | with open(train_ann_path,'rb') as f:
12 | reader = csv.reader(f)
13 | for row in reader:
14 | anns.append(row)
15 | info = anns[0]
16 | anns = anns[1:]
17 |
18 | random.shuffle(anns)
19 | trainminusval_anns = [info]
20 | val_anns = [info]
21 | trainminusval_anns = trainminusval_anns + anns[:-500]
22 | val_anns = val_anns +anns[-500:]
23 |
24 | with open(os.path.join(output_dir,'trainminusval.csv'), 'w') as f:
25 | writer = csv.writer(f)
26 | writer.writerows(trainminusval_anns)
27 |
28 | with open(os.path.join(output_dir, 'val.csv'), 'w') as f:
29 | writer = csv.writer(f)
30 | writer.writerows(val_anns)
31 |
32 |
33 |
--------------------------------------------------------------------------------
/evaluation/modify.py:
--------------------------------------------------------------------------------
1 | import csv
2 |
3 |
4 | def cell_str2init(cell):
5 | [x_str, y_str, vis_str] = cell.split('_')
6 | x, y, vis = int(x_str), int(y_str), int(vis_str)
7 | return [x,y,vis]
8 |
9 | csv_file = 'val_result.csv'
10 | # csv_file = 'result_0309_23.16%.csv'
11 | anns = []
12 | with open(csv_file, 'rb') as f:
13 | reader = csv.reader(f)
14 | for row in reader:
15 | anns.append(row)
16 | info = anns[0]
17 | anns = anns[1:]
18 |
19 | center_pair = [[0,3,5,7,13,15,17],
20 | [1,4,6,8,14,16,18]]
21 |
22 | near_pair = [[ 9,11,20,22],
23 | [10,12,21,23]]
24 |
25 | for i in range(len(anns)):
26 | ann = anns[i]
27 | center_x = 0
28 | count = 0
29 | for j in range(2, len(ann)):
30 | cell = ann[j]
31 | [x, y, vis] = cell_str2init(cell)
32 | center_x += x
33 | count += 1
34 | center_x = int(1.0*center_x/count)
35 | for j in range(len(near_pair[0])):
36 | indexA = near_pair[0][j] + 2
37 | indexB = near_pair[1][j] + 2
38 | [x_str_A, y_str_A, vis_str_A] = ann[indexA].split('_')
39 | x_A, y_A, vis_A = int(x_str_A), int(y_str_A), int(vis_str_A)
40 |
41 | [x_str_B, y_str_B, vis_str_B] = ann[indexB].split('_')
42 | x_B, y_B, vis_B = int(x_str_B), int(y_str_B), int(vis_str_B)
43 |
44 | if (vis_A == -1 and vis_B == -1) or (vis_A == 1 and vis_B == 1):
45 | continue
46 | if (vis_A == 1 and vis_B == -1):
47 | vis_B = 1
48 | x_B = x_A
49 | y_B = y_A
50 | elif (vis_B == 1 and vis_A == -1):
51 | vis_A = 1
52 | x_A = x_B
53 | y_A = y_B
54 | anns[i][indexA] = str(x_A) + '_' + str(y_A) + '_' + str(vis_A)
55 | anns[i][indexB] = str(x_B) + '_' + str(y_B) + '_' + str(vis_B)
56 |
57 | for j in range(len(center_pair[0])):
58 | indexA = center_pair[0][j] + 2
59 | indexB = center_pair[1][j] + 2
60 | [x_str_A, y_str_A, vis_str_A] = ann[indexA].split('_')
61 | x_A, y_A, vis_A = int(x_str_A), int(y_str_A), int(vis_str_A)
62 |
63 | [x_str_B, y_str_B, vis_str_B] = ann[indexB].split('_')
64 | x_B, y_B, vis_B = int(x_str_B), int(y_str_B), int(vis_str_B)
65 |
66 | if (vis_A == -1 and vis_B== -1) or (vis_A == 1 and vis_B == 1):
67 | continue
68 | if (vis_A == 1 and vis_B == -1):
69 | vis_B = 1
70 | x_B = abs(2*center_x - x_A)
71 | y_B = y_A
72 | elif (vis_B == 1 and vis_A == -1):
73 | vis_A = 1
74 | x_A = abs(2*center_x - x_B)
75 | y_A = y_B
76 | anns[i][indexA] = str(x_A) + '_' + str(y_A) + '_' + str(vis_A)
77 | anns[i][indexB] = str(x_B) + '_' + str(y_B) + '_' + str(vis_B)
78 |
79 | results = [info]
80 | results = results + anns
81 |
82 | with open('modify.csv', 'w') as f:
83 | writer = csv.writer(f)
84 | writer.writerows(results)
85 |
86 |
87 | '''
88 | 0'neckline_left',
89 | 1'neckline_right',
90 | 2 'center_front',
91 | 3'shoulder_left',
92 | 4 'shoulder_right',
93 | 5 'armpit_left',
94 | 6 'armpit_right',
95 | 7 'waistline_left',
96 | 8 'waistline_right',
97 | 9 'cuff_left_in',
98 | 10 'cuff_left_out',
99 | 11 'cuff_right_in',
100 | 12 'cuff_right_out',
101 | 13 'top_hem_left',
102 | 14 'top_hem_right',
103 | 15 'waistband_left',
104 | 16 'waistband_right',
105 | 17 'hemline_left',
106 | 18 'hemline_right',
107 | 19 'crotch',
108 | 20 'bottom_left_in',
109 | 21 'bottom_left_out',
110 | 22 'bottom_right_in',
111 | 23 'bottom_right_out
112 | '''
--------------------------------------------------------------------------------
/evaluation/submit.py:
--------------------------------------------------------------------------------
1 | import csv
2 | import os
3 | import sys
4 | import numpy as np
5 | import cv2
6 | from scipy.ndimage.filters import gaussian_filter
7 | import math, time
8 | import torch
9 | import csv
10 | import util
11 | sys.path.append('../')
12 | def apply_model(oriImg, model, multiplier):
13 | stride = 8
14 | height, width, _ = oriImg.shape
15 | normed_img = np.array(oriImg, dtype=np.float32)
16 | heatmap_avg = np.zeros((oriImg.shape[0], oriImg.shape[1], 25), dtype=np.float32)
17 | for m in range(len(multiplier)):
18 | scale = multiplier[m]
19 | imageToTest = cv2.resize(normed_img, (0, 0), fx=scale, fy=scale, interpolation=cv2.INTER_CUBIC)
20 | # imgToTest_padded, pad = util.padRightDownCorner(imageToTest, stride, 128)
21 | imgToTest_padded, pad = util.padRightDownCorner(imageToTest, 32, 128)
22 |
23 | input_img = np.transpose(np.float32(imgToTest_padded[:, :, :, np.newaxis]),
24 | (3, 2, 0, 1)) / 255 - 0.5 # required shape (1, c, h, w)
25 |
26 | input_var = torch.autograd.Variable(torch.from_numpy(input_img).cuda())
27 |
28 | # get the features
29 | # heat1, heat2, heat3, heat4, heat5, heat6 = model(input_var)
30 | heat = model(input_var)
31 |
32 | # get the heatmap
33 | heatmap = heat.data.cpu().numpy()
34 | heatmap = np.transpose(np.squeeze(heatmap), (1, 2, 0)) # (h, w, c)
35 | heatmap = cv2.resize(heatmap, (0, 0), fx=stride, fy=stride, interpolation=cv2.INTER_CUBIC)
36 | heatmap = heatmap[:imgToTest_padded.shape[0] - pad[2], :imgToTest_padded.shape[1] - pad[3], :]
37 | heatmap = cv2.resize(heatmap, (width, height), interpolation=cv2.INTER_CUBIC)
38 | heatmap_avg = heatmap_avg + heatmap / len(multiplier)
39 |
40 | all_peaks = [] # all of the possible points by classes.
41 | peak_counter = 0
42 | thre1 = 0.1
43 | for part in range(25 - 1):
44 | x_list = []
45 | y_list = []
46 | map_ori = heatmap_avg[:, :, part]
47 | map = gaussian_filter(map_ori, sigma=3)
48 |
49 | map_left = np.zeros(map.shape)
50 | map_left[1:, :] = map[:-1, :]
51 | map_right = np.zeros(map.shape)
52 | map_right[:-1, :] = map[1:, :]
53 | map_up = np.zeros(map.shape)
54 | map_up[:, 1:] = map[:, :-1]
55 | map_down = np.zeros(map.shape)
56 | map_down[:, :-1] = map[:, 1:]
57 |
58 | peaks_binary = np.logical_and.reduce(
59 | (map >= map_left, map >= map_right, map >= map_up, map >= map_down, map > thre1))
60 | peaks = zip(np.nonzero(peaks_binary)[1], np.nonzero(peaks_binary)[0]) # note reverse
61 | peaks_with_score = [x + (map_ori[x[1], x[0]],) for x in peaks]
62 | id = range(peak_counter, peak_counter + len(peaks))
63 | peaks_with_score_and_id = [peaks_with_score[i] + (id[i],) for i in range(len(id))]
64 |
65 | all_peaks.append(peaks_with_score_and_id)
66 | peak_counter += len(peaks)
67 |
68 | # sort by score
69 | for i in range(24):
70 | all_peaks[i] = sorted(all_peaks[i], key=lambda ele : ele[2],reverse = True)
71 |
72 | canvas = oriImg.copy()
73 | # draw points
74 | for i in range(24):
75 | for j in range(len(all_peaks[i])):
76 | if j is 0:
77 | cv2.circle(canvas, all_peaks[i][j][0:2], 4, [0, 0, 255], thickness=-1)
78 | else:
79 | cv2.circle(canvas, all_peaks[i][j][0:2], 4, [255, 0, 0], thickness=-1)
80 |
81 | keypoints = -1*np.ones((24, 3))
82 | for i in range(24):
83 | if len(all_peaks[i]) == 0:
84 | continue
85 | else:
86 | keypoints[i,0], keypoints[i,1], keypoints[i,2] = all_peaks[i][0][0], all_peaks[i][0][1], 1
87 |
88 | return keypoints, canvas
89 |
90 |
91 | def write_csv(name, results):
92 | import csv
93 | with open(name, 'w') as f:
94 | writer = csv.writer(f)
95 | writer.writerows(results)
96 |
97 | def prepare_row(ann, keypoints):
98 | # cls
99 | image_name = ann[0]
100 | category = ann[1]
101 | keypoints_str = []
102 | for i in range(24):
103 | cell_str = str(int(keypoints[i][0])) + '_' + str(int(keypoints[i][1])) + '_' + str(int(keypoints[i][2]))
104 | keypoints_str.append(cell_str)
105 | row = [image_name, category] + keypoints_str
106 | return row
107 |
108 | def read_csv(ann_file):
109 | info = []
110 | anns = []
111 | with open(ann_file, 'rb') as f:
112 | reader = csv.reader(f)
113 | for row in reader:
114 | anns.append(row)
115 | info = anns[0]
116 | anns = anns[1:]
117 | return info, anns
118 |
119 | def euclidean_distance(a, b):
120 | return math.sqrt((a[0] - b[0]) ** 2 + (a[1] - b[1]) ** 2)
121 |
122 | def criterion(ann_gt, ann_dt):
123 | category = ann_gt[1]
124 | gt_kpt = -1 * np.ones((24, 3))
125 | for i in range(len(gt_kpt)):
126 | x_str, y_str, vis_str = ann_gt[i + 2].split('_')
127 | gt_kpt[i][0], gt_kpt[i][1], gt_kpt[i][2] = int(x_str), int(y_str), int(vis_str)
128 |
129 | dt_kpt = -1 * np.ones((24, 3))
130 | for i in range(len(dt_kpt)):
131 | x_str, y_str, vis_str = ann_dt[i + 2].split('_')
132 | dt_kpt[i][0], dt_kpt[i][1], dt_kpt[i][2] = int(x_str), int(y_str), int(vis_str)
133 |
134 | if category in ['blouse','outwear','dress']: # armpit distance
135 | thre = euclidean_distance(gt_kpt[5], gt_kpt[6])
136 | elif category in ['trousers', 'skirt']: # waistband distance
137 | thre = euclidean_distance(gt_kpt[7], gt_kpt[8])
138 | if thre == 0:
139 | return []
140 | score = []
141 | for i in range(len(gt_kpt)):
142 | if gt_kpt[i][2] == 1:
143 | #if dt_kpt[i][2] == -1:
144 | # score.append(2)
145 | #else:
146 | score.append(1.0* euclidean_distance(gt_kpt[i],dt_kpt[i])/ thre)
147 | return score
148 | #print('score = {}'.format(score))
149 |
150 |
151 |
152 | def evaluate(gt_file, dt_file, num_imgs):
153 | info_gt, anns_gt = read_csv(gt_file)
154 | info_dt, anns_dt = read_csv(dt_file)
155 | anns_gt = anns_gt[:num_imgs]
156 | assert len(anns_gt) == len(anns_dt)
157 | scores = []
158 | for i in range(len(anns_gt)):
159 | ann_gt = anns_gt[i]
160 | ann_dt = anns_dt[i]
161 | score = criterion(ann_gt, ann_dt)
162 | scores += score
163 | value = sum(scores)/len(scores)
164 | print('score = {}'.format(value))
165 |
166 | def eval():
167 | gt_file = '../FashionAI/data/train/Annotations/val.csv'
168 | dt_file = 'val_result.csv'
169 | # dt_file = 'modify.csv'
170 |
171 | num_imgs = 100
172 | evaluate(gt_file, dt_file,num_imgs)
173 |
174 |
175 | def main():
176 | os.environ['CUDA_VISIBLE_DEVICES'] = '2'
177 |
178 | # --------------------------- model -------------------------------------------------------------------------------
179 | import models.CPM_FPN
180 | pytorch_model = '../FashionAI/Heatmap/experiments/CPM_FPN3/120000_8%.pth.tar'
181 | model = models.CPM_FPN.pose_estimation(class_num=25, pretrain=False)
182 | # -----------------------------------------------------------------------------------------------------------------
183 |
184 | img_dir = '../FashionAI/data/test/'
185 | ann_path = '../FashionAI/data/test/test.csv'
186 | result_name = 'result.csv'
187 | # scale_search = [0.5, 0.7, 1.0, 1.3] # [0.5, 1.0, 1.5]
188 | scale_search = [0.5, 0.7, 1.0]
189 | boxsize = 384
190 | # -------------------------- pytorch model------------------
191 | state_dict = torch.load(pytorch_model)['state_dict']
192 | model.load_state_dict(state_dict)
193 | model = model.cuda()
194 | model.eval()
195 | # --------------------------------------------------------
196 | anns = []
197 | with open(ann_path, 'rb') as f:
198 | reader = csv.reader(f)
199 | for row in reader:
200 | anns.append(row)
201 | info=anns[0]
202 | anns = anns[1:]
203 | #---------------------------------------------------------
204 | num_imgs = len(anns)
205 | results = []
206 | results.append(info)
207 |
208 | for i in range(num_imgs):
209 | print('{}/{}'.format(i, num_imgs))
210 | ann = anns[i]
211 | image_path = os.path.join(img_dir, ann[0])
212 | oriImg = cv2.imread(image_path)
213 | # multiplier = [x * boxsize / oriImg.shape[0] for x in scale_search]
214 | multiplier = scale_search
215 | keypoints, canvas = apply_model(oriImg, model, multiplier)
216 | # cv2.imwrite(os.path.join('./result', ann[0].split('/')[-1]), canvas)
217 | row = prepare_row(ann, keypoints)
218 | results.append(row)
219 | write_csv(result_name, results)
220 |
221 | if __name__ == '__main__':
222 | main()
--------------------------------------------------------------------------------
/experiments/CPM/config.yml:
--------------------------------------------------------------------------------
1 | workers: 6
2 | weight_decay: 0.0005
3 | momentum: 0.9
4 | display: 50
5 | max_iter: 160000
6 | batch_size: 10
7 | test_interval: 50
8 | topk: 3
9 | base_lr: 0.00004
10 | start_iters: 0
11 | best_model: 12345678.9
12 | #-------------lr_policy--------------------#
13 | lr_policy: 'multistep'
14 | policy_parameter:
15 | stepvalue: [50000, 100000, 120000]
16 | gamma: 0.33
--------------------------------------------------------------------------------
/experiments/CPM/train_net.py:
--------------------------------------------------------------------------------
1 | from __future__ import print_function
2 | import os, sys
3 | sys.path.append('../../')
4 | import dataset_loader
5 | import torch
6 | import torch.nn as nn
7 | import torch.backends.cudnn as cudnn
8 | import util
9 | import cv2
10 | import argparse
11 | import models.CPM
12 | import torchvision.transforms as transforms
13 | import time
14 |
15 | def parse():
16 | parser = argparse.ArgumentParser()
17 | return parser.parse_args()
18 |
19 | def construct_model(args):
20 | model = models.CPM .PoseModel(num_point=25, pretrained=True)
21 | model.cuda()
22 | return model
23 |
24 | def get_parameters(model, config, isdefault=True):
25 | if isdefault:
26 | return model.parameters(), [1.]
27 | lr_1 = []
28 | lr_2 = []
29 | lr_4 = []
30 | lr_8 = []
31 | params_dict = dict(model.named_parameters())
32 | for key, value in params_dict.items():
33 | if 'stage' in key:
34 | if key[-4:] == 'bias':
35 | lr_8.append(value)
36 | else:
37 | lr_4.append(value)
38 | elif key[-4:] == 'bias':
39 | lr_2.append(value)
40 | else:
41 | lr_1.append(value)
42 |
43 |
44 | params = [{'params': lr_1, 'lr': config.base_lr},
45 | {'params': lr_2, 'lr': config.base_lr * 2.},
46 | {'params': lr_4, 'lr': config.base_lr * 4.},
47 | {'params': lr_8, 'lr': config.base_lr * 8.}]
48 |
49 | return params, [1., 2., 4., 8.]
50 |
51 | def train_net(model, args):
52 | ann_path = '../FashionAI/data/train/Annotations/trainminusval.csv'
53 | img_dir = '../FashionAI/data/train/'
54 |
55 | stride = 8
56 | cudnn.benchmark = True
57 | config = util.Config('./config.yml')
58 |
59 | train_loader = torch.utils.data.DataLoader(
60 | dataset_loader.dataset_loader(img_dir, ann_path, stride,
61 | transforms.ToTensor()),
62 | batch_size=config.batch_size, shuffle=True,
63 | num_workers=config.workers, pin_memory=True)
64 |
65 | criterion = nn.MSELoss().cuda()
66 | params, multiple = get_parameters(model, config, False)
67 |
68 | optimizer = torch.optim.SGD(params, config.base_lr, momentum=config.momentum,
69 | weight_decay=config.weight_decay)
70 | model.train()
71 | iters = 0
72 | batch_time = util.AverageMeter()
73 | data_time = util.AverageMeter()
74 | losses = util.AverageMeter()
75 | losses_list = [util.AverageMeter() for i in range(12)]
76 | end = time.time()
77 |
78 | heat_weight = 48 * 48 * 25 / 2.0 # for convenient to compare with origin code
79 | # heat_weight = 1
80 |
81 | while iters < config.max_iter:
82 | for i, (input, heatmap) in enumerate(train_loader):
83 | learning_rate = util.adjust_learning_rate(optimizer, iters, config.base_lr, policy=config.lr_policy,\
84 | policy_parameter=config.policy_parameter, multiple=multiple)
85 | data_time.update(time.time() - end)
86 |
87 | input = input.cuda(async=True)
88 | heatmap = heatmap.cuda(async=True)
89 | input_var = torch.autograd.Variable(input)
90 | heatmap_var = torch.autograd.Variable(heatmap)
91 |
92 | heat1, heat2, heat3, heat4, heat5, heat6 = model(input_var)
93 | loss1 = criterion(heat1,heatmap_var) * heat_weight
94 | loss2 = criterion(heat2, heatmap_var) * heat_weight
95 | loss3 = criterion(heat3, heatmap_var) * heat_weight
96 | loss4 = criterion(heat4, heatmap_var) * heat_weight
97 | loss5 = criterion(heat5, heatmap_var) * heat_weight
98 | loss6 = criterion(heat6, heatmap_var) * heat_weight
99 | loss = loss1 + loss2 + loss3 + loss4 + loss5 + loss6
100 | losses.update(loss.data[0], input.size(0))
101 | loss_list = [loss1 , loss2 , loss3 , loss4 , loss5 , loss6]
102 | for cnt, l in enumerate(loss_list):
103 | losses_list[cnt].update(l.data[0], input.size(0))
104 |
105 | optimizer.zero_grad()
106 | loss.backward()
107 | optimizer.step()
108 | batch_time.update(time.time() - end)
109 | end = time.time()
110 |
111 |
112 | iters += 1
113 | if iters % config.display == 0:
114 | print('Train Iteration: {0}\t'
115 | 'Time {batch_time.sum:.3f}s / {1}iters, ({batch_time.avg:.3f})\t'
116 | 'Data load {data_time.sum:.3f}s / {1}iters, ({data_time.avg:3f})\n'
117 | 'Learning rate = {2}\n'
118 | 'Loss = {loss.val:.8f} (ave = {loss.avg:.8f})\n'.format(
119 | iters, config.display, learning_rate, batch_time=batch_time,
120 | data_time=data_time, loss=losses))
121 | for cnt in range(0, 6):
122 | print('Loss{0}_1 = {loss1.val:.8f} (ave = {loss1.avg:.8f})'.format(cnt + 1,loss1=losses_list[cnt]))
123 | print(time.strftime(
124 | '%Y-%m-%d %H:%M:%S -----------------------------------------------------------------------------------------------------------------\n',
125 | time.localtime()))
126 |
127 | batch_time.reset()
128 | data_time.reset()
129 | losses.reset()
130 | for cnt in range(12):
131 | losses_list[cnt].reset()
132 |
133 | if iters % 5000 == 0:
134 | torch.save({
135 | 'iter': iters,
136 | 'state_dict': model.state_dict(),
137 | }, str(iters) + '.pth.tar')
138 |
139 | if iters == config.max_iter:
140 | break
141 | return
142 |
143 | if __name__ == '__main__':
144 | os.environ['CUDA_VISIBLE_DEVICES'] = '3'
145 | args = parse()
146 | model = construct_model(args)
147 | train_net(model, args)
--------------------------------------------------------------------------------
/experiments/CPM_FPN/config.yml:
--------------------------------------------------------------------------------
1 | workers: 6
2 | weight_decay: 0.0005
3 | momentum: 0.9
4 | display: 50
5 | max_iter: 160000
6 | batch_size: 10
7 | test_interval: 10
8 | topk: 3
9 | base_lr: 0.00004
10 | start_iters: 0
11 | best_model: 12345678.9
12 | #-------------lr_policy--------------------#
13 | lr_policy: 'multistep'
14 | policy_parameter:
15 | stepvalue: [50000, 80000]
16 | gamma: 0.1
--------------------------------------------------------------------------------
/experiments/CPM_FPN/train_net.py:
--------------------------------------------------------------------------------
1 | from __future__ import print_function
2 | import os, sys
3 | sys.path.append('../../')
4 | import Mytransforms
5 | import dataset_loader
6 | import torch
7 | import torch.nn as nn
8 | import torch.backends.cudnn as cudnn
9 | import util
10 | import cv2
11 | import argparse
12 | import models.CPM_FPN
13 | import torchvision.transforms as transforms
14 | import time
15 |
16 | def parse():
17 | parser = argparse.ArgumentParser()
18 | return parser.parse_args()
19 |
20 | def construct_model(args):
21 | model = models.CPM_FPN.pose_estimation(class_num=25, pretrain=True)
22 | model.cuda()
23 | print (model)
24 | return model
25 |
26 |
27 |
28 | def train_net(model, args):
29 |
30 | ann_path = '../FashionAI/data/train/Annotations/trainminusval.csv'
31 | img_dir = '../FashionAI/data/train/'
32 |
33 | stride = 8
34 | cudnn.benchmark = True
35 | config = util.Config('./config.yml')
36 | train_loader = torch.utils.data.DataLoader(
37 | dataset_loader.dataset_loader(img_dir, ann_path, stride,
38 | Mytransforms.Compose([Mytransforms.RandomResized(),
39 | Mytransforms.RandomRotate(40),
40 | Mytransforms.RandomCrop(384),
41 | ]), sigma=15),
42 | batch_size=config.batch_size, shuffle=True,
43 | num_workers=config.workers, pin_memory=True)
44 |
45 | criterion = nn.MSELoss().cuda()
46 | params = []
47 | for key, value in model.named_parameters():
48 | if value.requires_grad != False:
49 | params.append({'params': value, 'lr': config.base_lr})
50 |
51 | optimizer = torch.optim.SGD(params, config.base_lr, momentum=config.momentum,
52 | weight_decay=config.weight_decay)
53 | # model.train() # only for bn and dropout
54 | model.eval()
55 |
56 | from matplotlib import pyplot as plt
57 |
58 | iters = 0
59 | batch_time = util.AverageMeter()
60 | data_time = util.AverageMeter()
61 | losses = util.AverageMeter()
62 | losses_list = [util.AverageMeter() for i in range(12)]
63 | end = time.time()
64 |
65 | heat_weight = 48 * 48 * 25 / 2.0 # for convenient to compare with origin code
66 | # heat_weight = 1
67 |
68 | while iters < config.max_iter:
69 | for i, (input, heatmap) in enumerate(train_loader):
70 | learning_rate = util.adjust_learning_rate(optimizer, iters, config.base_lr, policy=config.lr_policy,\
71 | policy_parameter=config.policy_parameter)
72 | data_time.update(time.time() - end)
73 |
74 | input = input.cuda(async=True)
75 | heatmap = heatmap.cuda(async=True)
76 | input_var = torch.autograd.Variable(input)
77 | heatmap_var = torch.autograd.Variable(heatmap)
78 |
79 | heat = model(input_var)
80 |
81 | # feat = C4.cpu().data.numpy()
82 | # for n in range(100):
83 | # plt.subplot(10, 10, n + 1);
84 | # plt.imshow(feat[0, n, :, :], cmap='gray')
85 | # plt.xticks([]);
86 | # plt.yticks([])
87 | # plt.show()
88 |
89 | loss1 = criterion(heat, heatmap_var) * heat_weight
90 | # loss2 = criterion(heat4, heatmap_var) * heat_weight
91 | # loss3 = criterion(heat5, heatmap_var) * heat_weight
92 | # loss4 = criterion(heat6, heatmap_var) * heat_weight
93 | # loss5 = criterion(heat, heatmap_var)
94 | # loss6 = criterion(heat, heatmap_var)
95 |
96 | loss = loss1 # + loss2 + loss3# + loss4# + loss5 + loss6
97 | losses.update(loss.data[0], input.size(0))
98 | loss_list = [loss1]#, loss2, loss3]# , loss4 ]# , loss5 , loss6]
99 | for cnt, l in enumerate(loss_list):
100 | losses_list[cnt].update(l.data[0], input.size(0))
101 |
102 | optimizer.zero_grad()
103 | loss.backward()
104 | optimizer.step()
105 | batch_time.update(time.time() - end)
106 | end = time.time()
107 |
108 |
109 | iters += 1
110 | if iters % config.display == 0:
111 | print('Train Iteration: {0}\t'
112 | 'Time {batch_time.sum:.3f}s / {1}iters, ({batch_time.avg:.3f})\t'
113 | 'Data load {data_time.sum:.3f}s / {1}iters, ({data_time.avg:3f})\n'
114 | 'Learning rate = {2}\n'
115 | 'Loss = {loss.val:.8f} (ave = {loss.avg:.8f})\n'.format(
116 | iters, config.display, learning_rate, batch_time=batch_time,
117 | data_time=data_time, loss=losses))
118 | for cnt in range(0, 1):
119 | print('Loss{0}_1 = {loss1.val:.8f} (ave = {loss1.avg:.8f})'.format(cnt + 1,loss1=losses_list[cnt]))
120 | print(time.strftime(
121 | '%Y-%m-%d %H:%M:%S -----------------------------------------------------------------------------------------------------------------\n',
122 | time.localtime()))
123 |
124 | batch_time.reset()
125 | data_time.reset()
126 | losses.reset()
127 | for cnt in range(12):
128 | losses_list[cnt].reset()
129 |
130 | if iters % 5000 == 0:
131 | torch.save({
132 | 'iter': iters,
133 | 'state_dict': model.state_dict(),
134 | }, str(iters) + '.pth.tar')
135 |
136 | if iters == config.max_iter:
137 | break
138 | return
139 |
140 | if __name__ == '__main__':
141 | os.environ['CUDA_VISIBLE_DEVICES'] = '0'
142 | args = parse()
143 | model = construct_model(args)
144 | train_net(model, args)
145 |
--------------------------------------------------------------------------------
/experiments/CPM_ResNet/config.yml:
--------------------------------------------------------------------------------
1 | workers: 6
2 | weight_decay: 0.0005
3 | momentum: 0.9
4 | display: 50
5 | max_iter: 70000
6 | batch_size: 10
7 | test_interval: 10
8 | topk: 3
9 | base_lr: 0.00004
10 | start_iters: 0
11 | best_model: 12345678.9
12 | #-------------lr_policy--------------------#
13 | lr_policy: 'multistep'
14 | policy_parameter:
15 | stepvalue: [30000, 50000]
16 | gamma: 0.1
--------------------------------------------------------------------------------
/experiments/CPM_ResNet/train_net.py:
--------------------------------------------------------------------------------
1 | from __future__ import print_function
2 | import os, sys
3 | sys.path.append('../../')
4 | import dataset_loader
5 | import torch
6 | import torch.nn as nn
7 | import torch.backends.cudnn as cudnn
8 | import util
9 | import cv2
10 | import argparse
11 | import models.CPM_ResNet
12 | import torchvision.transforms as transforms
13 | import time
14 |
15 | def parse():
16 | parser = argparse.ArgumentParser()
17 | return parser.parse_args()
18 |
19 | def construct_model(args):
20 | model = models.CPM_ResNet.pose_estimation(class_num=25, pretrain=True)
21 | model.cuda()
22 | print (model)
23 | return model
24 |
25 |
26 |
27 | def train_net(model, args):
28 |
29 | ann_path = '../FashionAI/data/train/Annotations/trainminusval.csv'
30 | img_dir = '../FashionAI/data/train/'
31 |
32 | stride = 8
33 | cudnn.benchmark = True
34 | config = util.Config('./config.yml')
35 |
36 | train_loader = torch.utils.data.DataLoader(
37 | dataset_loader.dataset_loader(img_dir, ann_path, stride,
38 | transforms.ToTensor()),
39 | batch_size=config.batch_size, shuffle=True,
40 | num_workers=config.workers, pin_memory=True)
41 |
42 | criterion = nn.MSELoss().cuda()
43 | params = []
44 | for key, value in model.named_parameters():
45 | if value.requires_grad != False:
46 | params.append({'params': value, 'lr': config.base_lr})
47 |
48 | optimizer = torch.optim.SGD(params, config.base_lr, momentum=config.momentum,
49 | weight_decay=config.weight_decay)
50 | # model.train() # only for bn and dropout
51 | model.eval()
52 |
53 | from matplotlib import pyplot as plt
54 |
55 | iters = 0
56 | batch_time = util.AverageMeter()
57 | data_time = util.AverageMeter()
58 | losses = util.AverageMeter()
59 | losses_list = [util.AverageMeter() for i in range(12)]
60 | end = time.time()
61 |
62 | heat_weight = 48 * 48 * 25 / 2.0 # for convenient to compare with origin code
63 | # heat_weight = 1
64 |
65 | while iters < config.max_iter:
66 | for i, (input, heatmap) in enumerate(train_loader):
67 | learning_rate = util.adjust_learning_rate(optimizer, iters, config.base_lr, policy=config.lr_policy,\
68 | policy_parameter=config.policy_parameter)
69 | data_time.update(time.time() - end)
70 |
71 | input = input.cuda(async=True)
72 | heatmap = heatmap.cuda(async=True)
73 | input_var = torch.autograd.Variable(input)
74 | heatmap_var = torch.autograd.Variable(heatmap)
75 |
76 | heat = model(input_var)
77 |
78 | # feat = C4.cpu().data.numpy()
79 | # for n in range(100):
80 | # plt.subplot(10, 10, n + 1);
81 | # plt.imshow(feat[0, n, :, :], cmap='gray')
82 | # plt.xticks([]);
83 | # plt.yticks([])
84 | # plt.show()
85 |
86 | loss1 = criterion(heat, heatmap_var) * heat_weight
87 | # loss2 = criterion(heat, heatmap_var)
88 | # loss3 = criterion(heat, heatmap_var)
89 | # loss4 = criterion(heat, heatmap_var)
90 | # loss5 = criterion(heat, heatmap_var)
91 | # loss6 = criterion(heat, heatmap_var)
92 |
93 | loss = loss1# + loss2 + loss3 + loss4 + loss5 + loss6
94 | losses.update(loss.data[0], input.size(0))
95 | loss_list = [loss1]# , loss2 , loss3 , loss4 , loss5 , loss6]
96 | for cnt, l in enumerate(loss_list):
97 | losses_list[cnt].update(l.data[0], input.size(0))
98 |
99 | optimizer.zero_grad()
100 | loss.backward()
101 | optimizer.step()
102 | batch_time.update(time.time() - end)
103 | end = time.time()
104 |
105 |
106 | iters += 1
107 | if iters % config.display == 0:
108 | print('Train Iteration: {0}\t'
109 | 'Time {batch_time.sum:.3f}s / {1}iters, ({batch_time.avg:.3f})\t'
110 | 'Data load {data_time.sum:.3f}s / {1}iters, ({data_time.avg:3f})\n'
111 | 'Learning rate = {2}\n'
112 | 'Loss = {loss.val:.8f} (ave = {loss.avg:.8f})\n'.format(
113 | iters, config.display, learning_rate, batch_time=batch_time,
114 | data_time=data_time, loss=losses))
115 | for cnt in range(0, 1):
116 | print('Loss{0}_1 = {loss1.val:.8f} (ave = {loss1.avg:.8f})'.format(cnt + 1,loss1=losses_list[cnt]))
117 | print(time.strftime(
118 | '%Y-%m-%d %H:%M:%S -----------------------------------------------------------------------------------------------------------------\n',
119 | time.localtime()))
120 |
121 | batch_time.reset()
122 | data_time.reset()
123 | losses.reset()
124 | for cnt in range(12):
125 | losses_list[cnt].reset()
126 |
127 | if iters % 5000 == 0:
128 | torch.save({
129 | 'iter': iters,
130 | 'state_dict': model.state_dict(),
131 | }, str(iters) + '.pth.tar')
132 |
133 | if iters == config.max_iter:
134 | break
135 | return
136 |
137 | if __name__ == '__main__':
138 | os.environ['CUDA_VISIBLE_DEVICES'] = '1'
139 | args = parse()
140 | model = construct_model(args)
141 | train_net(model, args)
--------------------------------------------------------------------------------
/experiments/FPN/config.yml:
--------------------------------------------------------------------------------
1 | workers: 6
2 | weight_decay: 0.0005
3 | momentum: 0.9
4 | display: 50
5 | max_iter: 160000
6 | batch_size: 10
7 | test_interval: 10
8 | topk: 3
9 | base_lr: 0.0001
10 | start_iters: 0
11 | best_model: 12345678.9
12 | #-------------lr_policy--------------------#
13 | lr_policy: 'multistep'
14 | policy_parameter:
15 | stepvalue: [50000, 100000, 120000]
16 | gamma: 0.33
--------------------------------------------------------------------------------
/experiments/FPN/train_net.py:
--------------------------------------------------------------------------------
1 | from __future__ import print_function
2 | import os, sys
3 | sys.path.append('../../')
4 | import dataset_loader
5 | import torch
6 | import torch.nn as nn
7 | import torch.backends.cudnn as cudnn
8 | import util
9 | import cv2
10 | import argparse
11 | import models.FPN
12 | import torchvision.transforms as transforms
13 | import time
14 |
15 | def parse():
16 | parser = argparse.ArgumentParser()
17 | return parser.parse_args()
18 |
19 | def construct_model(args):
20 | model = models.FPN.pose_estimation(class_num=25, pretrain=True)
21 | model.cuda()
22 | return model
23 |
24 |
25 |
26 | def train_net(model, args):
27 |
28 | ann_path = '/disk/data/fashionAI/train/Annotations/train.csv'
29 | img_dir = '/disk/data/fashionAI/train/'
30 |
31 | stride = 8
32 | cudnn.benchmark = True
33 | config = util.Config('./config.yml')
34 |
35 | train_loader = torch.utils.data.DataLoader(
36 | dataset_loader.dataset_loader(img_dir, ann_path, stride,
37 | transforms.ToTensor()),
38 | batch_size=config.batch_size, shuffle=True,
39 | num_workers=config.workers, pin_memory=True)
40 |
41 | criterion = nn.MSELoss().cuda()
42 | params = []
43 | for key, value in model.named_parameters():
44 | if value.requires_grad != False:
45 | params.append({'params': value, 'lr': config.base_lr})
46 |
47 | optimizer = torch.optim.SGD(params, config.base_lr, momentum=config.momentum,
48 | weight_decay=config.weight_decay)
49 | model.train() # only for bn and dropout
50 | # model.eval()
51 |
52 |
53 | iters = 0
54 | batch_time = util.AverageMeter()
55 | data_time = util.AverageMeter()
56 | losses = util.AverageMeter()
57 | losses_list = [util.AverageMeter() for i in range(12)]
58 | end = time.time()
59 |
60 | heat_weight = 48 * 48 * 25 / 2.0 # for convenient to compare with origin code
61 | # heat_weight = 1
62 |
63 | while iters < config.max_iter:
64 | for i, (input, heatmap) in enumerate(train_loader):
65 | learning_rate = util.adjust_learning_rate(optimizer, iters, config.base_lr, policy=config.lr_policy,\
66 | policy_parameter=config.policy_parameter)
67 | data_time.update(time.time() - end)
68 |
69 | input = input.cuda(async=True)
70 | heatmap = heatmap.cuda(async=True)
71 | input_var = torch.autograd.Variable(input)
72 | heatmap_var = torch.autograd.Variable(heatmap)
73 |
74 | heat2, heat3, heat4, heat5, heat6 = model(input_var)
75 | loss1 = criterion(heat3, heatmap_var)
76 | loss2 = criterion(heat2, heatmap_var)
77 | loss3 = criterion(heat3, heatmap_var)
78 | loss4 = criterion(heat4, heatmap_var)
79 | loss5 = criterion(heat5, heatmap_var)
80 | loss6 = criterion(heat3, heatmap_var)
81 |
82 | loss = loss1 + loss2 + loss3 + loss4 + loss5 + loss6
83 | losses.update(loss.data[0], input.size(0))
84 | loss_list = [loss1 , loss2 , loss3 , loss4 , loss5 , loss6]
85 | for cnt, l in enumerate(loss_list):
86 | losses_list[cnt].update(l.data[0], input.size(0))
87 |
88 | optimizer.zero_grad()
89 | loss.backward()
90 | optimizer.step()
91 | batch_time.update(time.time() - end)
92 | end = time.time()
93 |
94 |
95 | iters += 1
96 | if iters % config.display == 0:
97 | print('Train Iteration: {0}\t'
98 | 'Time {batch_time.sum:.3f}s / {1}iters, ({batch_time.avg:.3f})\t'
99 | 'Data load {data_time.sum:.3f}s / {1}iters, ({data_time.avg:3f})\n'
100 | 'Learning rate = {2}\n'
101 | 'Loss = {loss.val:.8f} (ave = {loss.avg:.8f})\n'.format(
102 | iters, config.display, learning_rate, batch_time=batch_time,
103 | data_time=data_time, loss=losses))
104 | for cnt in range(0, 6):
105 | print('Loss{0}_1 = {loss1.val:.8f} (ave = {loss1.avg:.8f})'.format(cnt + 1,loss1=losses_list[cnt]))
106 | print(time.strftime(
107 | '%Y-%m-%d %H:%M:%S -----------------------------------------------------------------------------------------------------------------\n',
108 | time.localtime()))
109 |
110 | batch_time.reset()
111 | data_time.reset()
112 | losses.reset()
113 | for cnt in range(12):
114 | losses_list[cnt].reset()
115 |
116 | if iters % 5000 == 0:
117 | torch.save({
118 | 'iter': iters,
119 | 'state_dict': model.state_dict(),
120 | }, str(iters) + '.pth.tar')
121 |
122 | if iters == config.max_iter:
123 | break
124 | return
125 |
126 | if __name__ == '__main__':
127 | os.environ['CUDA_VISIBLE_DEVICES'] = '0'
128 | args = parse()
129 | model = construct_model(args)
130 | train_net(model, args)
--------------------------------------------------------------------------------
/experiments/hourglass/config.yml:
--------------------------------------------------------------------------------
1 | workers: 6
2 | weight_decay: 0.0005
3 | momentum: 0.9
4 | display: 50
5 | max_iter: 160000
6 | batch_size: 10
7 | test_interval: 50
8 | topk: 3
9 | base_lr: 0.00025
10 | start_iters: 0
11 | best_model: 12345678.9
12 | #-------------lr_policy--------------------#
13 | lr_policy: 'multistep'
14 | policy_parameter:
15 | stepvalue: [100000, 150000, 200000]
16 | gamma: 0.1
--------------------------------------------------------------------------------
/experiments/hourglass/train_net.py:
--------------------------------------------------------------------------------
1 | from __future__ import print_function
2 | import os, sys
3 | sys.path.append('../../')
4 | import dataset_loader
5 | import torch
6 | import torch.nn as nn
7 | import torch.backends.cudnn as cudnn
8 | import util
9 | import cv2
10 | import argparse
11 | import models.hourglass
12 | import torchvision.transforms as transforms
13 | import time
14 |
15 | def parse():
16 | parser = argparse.ArgumentParser()
17 | return parser.parse_args()
18 |
19 | def construct_model(args):
20 | model = models.hourglass.hg(num_stacks=2, num_blocks=1, num_classes=5)
21 | model.cuda()
22 | return model
23 |
24 | def train_net(model, args):
25 |
26 | ann_path = '../FashionAI/data/train/Annotations/train.csv'
27 | img_dir = '../FashionAI/data/train/'
28 |
29 | stride = 8
30 | cudnn.benchmark = True
31 | config = util.Config('./config.yml')
32 |
33 | train_loader = torch.utils.data.DataLoader(
34 | dataset_loader.dataset_loader(img_dir, ann_path, stride,
35 | transforms.ToTensor()),
36 | batch_size=config.batch_size, shuffle=True,
37 | num_workers=config.workers, pin_memory=True)
38 |
39 | criterion = nn.MSELoss().cuda()
40 |
41 | optimizer = torch.optim.SGD(model.parameters(), config.base_lr, momentum=config.momentum,
42 | weight_decay=config.weight_decay)
43 | model.train()
44 | iters = 0
45 | batch_time = util.AverageMeter()
46 | data_time = util.AverageMeter()
47 | losses = util.AverageMeter()
48 | losses_list = [util.AverageMeter() for i in range(12)]
49 | end = time.time()
50 |
51 | heat_weight = 48 * 48 * 25 / 2.0 # for convenient to compare with origin code
52 | # heat_weight = 1
53 |
54 | while iters < config.max_iter:
55 | for i, (input, heatmap) in enumerate(train_loader):
56 | learning_rate = util.adjust_learning_rate(optimizer, iters, config.base_lr, policy=config.lr_policy,\
57 | policy_parameter=config.policy_parameter)
58 | data_time.update(time.time() - end)
59 |
60 | input = input.cuda(async=True)
61 | heatmap = heatmap.cuda(async=True)
62 | input_var = torch.autograd.Variable(input)
63 | heatmap_var = torch.autograd.Variable(heatmap)
64 |
65 | output = model(input_var)
66 | loss = criterion(output[0], heatmap_var)* heat_weight
67 | for j in range(1, len(output)):
68 | loss += criterion(output[j], heatmap_var) * heat_weight
69 |
70 | losses.update(loss.data[0], input.size(0))
71 | loss_list = loss
72 | for cnt, l in enumerate(loss_list):
73 | losses_list[cnt].update(l.data[0], input.size(0))
74 |
75 | optimizer.zero_grad()
76 | loss.backward()
77 | optimizer.step()
78 | batch_time.update(time.time() - end)
79 | end = time.time()
80 |
81 |
82 | iters += 1
83 | if iters % config.display == 0:
84 | print('Train Iteration: {0}\t'
85 | 'Time {batch_time.sum:.3f}s / {1}iters, ({batch_time.avg:.3f})\t'
86 | 'Data load {data_time.sum:.3f}s / {1}iters, ({data_time.avg:3f})\n'
87 | 'Learning rate = {2}\n'
88 | 'Loss = {loss.val:.8f} (ave = {loss.avg:.8f})\n'.format(
89 | iters, config.display, learning_rate, batch_time=batch_time,
90 | data_time=data_time, loss=losses))
91 | for cnt in range(0, 1):
92 | print('Loss{0}_1 = {loss1.val:.8f} (ave = {loss1.avg:.8f})'.format(cnt + 1,loss1=losses_list[cnt]))
93 | print(time.strftime(
94 | '%Y-%m-%d %H:%M:%S -----------------------------------------------------------------------------------------------------------------\n',
95 | time.localtime()))
96 |
97 | batch_time.reset()
98 | data_time.reset()
99 | losses.reset()
100 | for cnt in range(12):
101 | losses_list[cnt].reset()
102 |
103 | if iters % 5000 == 0:
104 | torch.save({
105 | 'iter': iters,
106 | 'state_dict': model.state_dict(),
107 | }, str(iters) + '.pth.tar')
108 |
109 | if iters == config.max_iter:
110 | break
111 | return
112 |
113 | if __name__ == '__main__':
114 | os.environ['CUDA_VISIBLE_DEVICES'] = '0'
115 | args = parse()
116 | model = construct_model(args)
117 | train_net(model, args)
--------------------------------------------------------------------------------
/models/CPM.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | from __future__ import division
3 | from __future__ import print_function
4 |
5 | import torch
6 | import torch.nn as nn
7 | import os
8 | import sys
9 | import math
10 | import torchvision.models as models
11 |
12 |
13 |
14 | def make_net_dict():
15 |
16 | feature = [{'conv1_1': [3, 64, 3, 1, 1]}, {'conv1_2': [64, 64, 3, 1, 1]}, {'pool1': [2, 2, 0]},
17 | {'conv2_1': [64, 128, 3, 1, 1]}, {'conv2_2': [128, 128, 3, 1, 1]}, {'pool2': [2, 2, 0]},
18 | {'conv3_1': [128, 256, 3, 1, 1]}, {'conv3_2': [256, 256, 3, 1, 1]}, {'conv3_3': [256, 256, 3, 1, 1]}, {'conv3_4': [256, 256, 3, 1, 1]}, {'pool3': [2, 2, 0]},
19 | {'conv4_1': [256, 512, 3, 1, 1]}, {'conv4_2': [512, 512, 3, 1, 1]}, {'conv4_3_cpm': [512, 256, 3, 1, 1]}, {'conv4_4_cpm': [256, 128, 3, 1, 1]}]
20 |
21 |
22 | block1 = [{'conv5_1_CPM': [128, 128, 3, 1, 1]},{'conv5_2_CPM': [128, 128, 3, 1, 1]},{'conv5_3_CPM': [128, 128, 3, 1, 1]},
23 | {'conv5_4_CPM': [128, 512, 1, 1, 0]}]
24 |
25 |
26 | block2 = [{'Mconv1': [128+25, 128, 7, 1, 3]}, {'Mconv2': [128, 128, 7, 1, 3]},
27 | {'Mconv3': [128, 128, 7, 1, 3]},{'Mconv4': [128, 128, 7, 1, 3]},
28 | {'Mconv5': [128, 128, 7, 1, 3]},
29 | {'Mconv6': [128, 128, 1, 1, 0]}
30 | ]
31 | predict_layers_stage1 = [{'predict_L1': [512, 25, 1, 1, 0]}]
32 |
33 | predict_layers_stageN = [{'predict_L1': [128, 25, 1, 1, 0]}]
34 |
35 | net_dict = [feature,block1,predict_layers_stage1,block2,predict_layers_stageN]
36 |
37 | return net_dict
38 |
39 |
40 | class CPM(nn.Module):
41 |
42 | def __init__(self, net_dict, batch_norm=False):
43 |
44 | super(CPM, self).__init__()
45 |
46 | self.feature = self._make_layer(net_dict[0])
47 |
48 | self.block = self._make_layer(net_dict[1])
49 |
50 | self.predict = self._make_layer(net_dict[2])
51 |
52 | # repeate
53 | self.block_stage2 = self._make_layer(net_dict[3])
54 |
55 | self.predict_stage2 = self._make_layer(net_dict[4])
56 |
57 | self.block_stage3 = self._make_layer(net_dict[3])
58 |
59 | self.predict_stage3 = self._make_layer(net_dict[4])
60 |
61 | self.block_stage4 = self._make_layer(net_dict[3])
62 |
63 | self.predict_stage4 = self._make_layer(net_dict[4])
64 |
65 | self.block_stage5 = self._make_layer(net_dict[3])
66 |
67 | self.predict_stage5 = self._make_layer(net_dict[4])
68 |
69 | self.block_stage6 = self._make_layer(net_dict[3])
70 |
71 | self.predict_stage6 = self._make_layer(net_dict[4])
72 |
73 | self._init_weights()
74 |
75 | def _init_weights(self):
76 | for m in self.modules():
77 | if isinstance(m, nn.Conv2d):
78 | m.weight.data.normal_(0, 0.01)
79 | if m.bias is not None:
80 | m.bias.data.zero_()
81 |
82 | def _make_layer(self, net_dict, batch_norm=False):
83 | layers = []
84 | length = len(net_dict)
85 | for i in range(length):
86 | one_layer = net_dict[i]
87 | key = one_layer.keys()[0]
88 | v = one_layer[key]
89 |
90 | if 'pool' in key:
91 | layers += [nn.MaxPool2d(kernel_size=v[0], stride=v[1], padding=v[2])]
92 | elif 'predict' in key:
93 | conv2d = nn.Conv2d(in_channels=v[0], out_channels=v[1], kernel_size=v[2], stride=v[3], padding=v[4])
94 | layers += [conv2d]
95 | else:
96 | conv2d = nn.Conv2d(in_channels=v[0], out_channels=v[1], kernel_size=v[2], stride=v[3], padding=v[4])
97 | if batch_norm:
98 | layers += [conv2d, nn.BatchNorm2d(v[1]), nn.ReLU(inplace=True)]
99 | else:
100 | layers += [conv2d, nn.ReLU(inplace=True)]
101 |
102 | return nn.Sequential(*layers)
103 |
104 | def forward(self, x):
105 | # define forward flow
106 | feature = self.feature(x)
107 |
108 | out_stage1 = self.block(feature)
109 | L1_stage1 = self.predict(out_stage1)
110 |
111 |
112 | concat_stage2 = torch.cat([L1_stage1, feature], 1)
113 | out_stage2 = self.block_stage2(concat_stage2)
114 | L1_stage2 = self.predict_stage2(out_stage2)
115 |
116 | concat_stage3 = torch.cat([L1_stage2, feature], 1)
117 | out_stage3 = self.block_stage3(concat_stage3)
118 | L1_stage3 = self.predict_stage3(out_stage3)
119 |
120 |
121 | concat_stage4 = torch.cat([L1_stage3, feature], 1)
122 | out_stage4 = self.block_stage4(concat_stage4)
123 | L1_stage4 = self.predict_stage4(out_stage4)
124 |
125 | concat_stage5 = torch.cat([L1_stage4, feature], 1)
126 | out_stage5 = self.block_stage5(concat_stage5)
127 | L1_stage5 = self.predict_stage5(out_stage5)
128 |
129 | concat_stage6 = torch.cat([L1_stage5, feature], 1)
130 | out_stage6 = self.block_stage6(concat_stage6)
131 | L1_stage6 = self.predict_stage6(out_stage6)
132 |
133 | return L1_stage1, L1_stage2, L1_stage3, L1_stage4, L1_stage5, L1_stage6
134 |
135 | def PoseModel(num_point, num_stages=6, batch_norm=False, pretrained=False):
136 | net_dict = make_net_dict()
137 | model = CPM(net_dict, batch_norm)
138 |
139 | if pretrained:
140 | parameter_num = 10
141 | if batch_norm:
142 | vgg19 = models.vgg19_bn(pretrained=True)
143 | parameter_num *= 6
144 | else:
145 | vgg19 = models.vgg19(pretrained=True)
146 | parameter_num *= 2
147 |
148 | vgg19_state_dict = vgg19.state_dict()
149 | vgg19_keys = vgg19_state_dict.keys()
150 |
151 | model_dict = model.state_dict()
152 | from collections import OrderedDict
153 | weights_load = OrderedDict()
154 |
155 | for i in range(parameter_num):
156 | weights_load[model.state_dict().keys()[i]] = vgg19_state_dict[vgg19_keys[i]]
157 | model_dict.update(weights_load)
158 | model.load_state_dict(model_dict)
159 |
160 | return model
161 |
162 |
163 | if __name__ == '__main__':
164 | print(PoseModel(25, 6, batch_norm=False))
165 |
--------------------------------------------------------------------------------
/models/CPM_FPN.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | from __future__ import division
3 | from __future__ import print_function
4 |
5 | import torch
6 | import torch.nn as nn
7 | import math
8 | import torch.nn.functional as F
9 |
10 | ############################################################
11 | # ResNet
12 | ############################################################
13 | def conv3x3(in_planes, out_planes, stride=1):
14 | "3x3 convolution with padding"
15 | return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
16 | padding=1, bias=False)
17 |
18 | class BasicBlock(nn.Module):
19 | expansion = 1
20 |
21 | def __init__(self, inplanes, planes, stride=1, downsample=None):
22 | super(BasicBlock, self).__init__()
23 | self.conv1 = conv3x3(inplanes, planes, stride)
24 | self.bn1 = nn.BatchNorm2d(planes)
25 | self.relu = nn.ReLU(inplace=True)
26 | self.conv2 = conv3x3(planes, planes)
27 | self.bn2 = nn.BatchNorm2d(planes)
28 | self.downsample = downsample
29 | self.stride = stride
30 |
31 | def forward(self, x):
32 | residual = x
33 |
34 | out = self.conv1(x)
35 | out = self.bn1(out)
36 | out = self.relu(out)
37 |
38 | out = self.conv2(out)
39 | out = self.bn2(out)
40 |
41 | if self.downsample is not None:
42 | residual = self.downsample(x)
43 |
44 | out += residual
45 | out = self.relu(out)
46 |
47 | return out
48 |
49 |
50 | class Bottleneck(nn.Module):
51 | expansion = 4
52 |
53 | def __init__(self, inplanes, planes, stride=1, downsample=None):
54 | super(Bottleneck, self).__init__()
55 | self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False)
56 | self.bn1 = nn.BatchNorm2d(planes)
57 | self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride,
58 | padding=1, bias=False)
59 | self.bn2 = nn.BatchNorm2d(planes)
60 | self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False)
61 | self.bn3 = nn.BatchNorm2d(planes * 4)
62 | self.relu = nn.ReLU(inplace=True)
63 | self.downsample = downsample
64 | self.stride = stride
65 |
66 | def forward(self, x):
67 | residual = x
68 |
69 | out = self.conv1(x)
70 | out = self.bn1(out)
71 | out = self.relu(out)
72 |
73 | out = self.conv2(out)
74 | out = self.bn2(out)
75 | out = self.relu(out)
76 |
77 | out = self.conv3(out)
78 | out = self.bn3(out)
79 |
80 | if self.downsample is not None:
81 | residual = self.downsample(x)
82 |
83 | out += residual
84 | out = self.relu(out)
85 |
86 | return out
87 |
88 |
89 | class ResNet(nn.Module):
90 |
91 | def __init__(self, block, layers, num_classes=1000):
92 | self.inplanes = 64
93 | super(ResNet, self).__init__()
94 | self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3,
95 | bias=False)
96 | self.bn1 = nn.BatchNorm2d(64)
97 | self.relu = nn.ReLU(inplace=True)
98 | self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
99 | self.layer1 = self._make_layer(block, 64, layers[0])
100 | self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
101 | self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
102 | self.layer4 = self._make_layer(block, 512, layers[3], stride=2)
103 |
104 | for m in self.modules():
105 | if isinstance(m, nn.Conv2d):
106 | n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
107 | m.weight.data.normal_(0, math.sqrt(2. / n))
108 | elif isinstance(m, nn.BatchNorm2d):
109 | m.weight.data.fill_(1)
110 | m.bias.data.zero_()
111 |
112 | def _make_layer(self, block, planes, blocks, stride=1):
113 | downsample = None
114 | if stride != 1 or self.inplanes != planes * block.expansion:
115 | downsample = nn.Sequential(
116 | nn.Conv2d(self.inplanes, planes * block.expansion,
117 | kernel_size=1, stride=stride, bias=False),
118 | nn.BatchNorm2d(planes * block.expansion),
119 | )
120 |
121 | layers = []
122 | layers.append(block(self.inplanes, planes, stride, downsample))
123 | self.inplanes = planes * block.expansion
124 | for i in range(1, blocks):
125 | layers.append(block(self.inplanes, planes))
126 |
127 | return nn.Sequential(*layers)
128 |
129 | def load_weights(self, path):
130 | model_dict = self.state_dict()
131 | print('loading model from {}'.format(path))
132 | try:
133 | #state_dict = torch.load(self.path)
134 | # self.load_state_dict({k: v for k, v in state_dict.items() if k in self.state_dict()})
135 | pretrained_dict = torch.load(path)
136 | from collections import OrderedDict
137 | tmp = OrderedDict()
138 | for k,v in pretrained_dict.items():
139 | if k in model_dict:
140 | tmp[k] = v
141 | # pretrained_dict = {k: v for k, v in pretrained_dict.items() if k in model_dict}
142 | # model_dict.update(pretrained_dict)
143 | model_dict.update(tmp)
144 | self.load_state_dict(model_dict)
145 | except:
146 | print ('loading model failed, {} may not exist'.format(path))
147 |
148 | def forward(self, x):
149 | x = self.conv1(x)
150 | x = self.bn1(x)
151 | x = self.relu(x)
152 | C1 = self.maxpool(x)
153 |
154 | C2 = self.layer1(C1)
155 | C3 = self.layer2(C2)
156 | C4 = self.layer3(C3)
157 | C5 = self.layer4(C4)
158 |
159 | return C1, C2, C3, C4, C5
160 |
161 | ############################################################
162 | # FPN Graph
163 | ############################################################
164 |
165 | class FPN(nn.Module): # xavier_fill as default
166 | def __init__(self, out_channels):
167 | super(FPN, self).__init__()
168 | self.out_channels = out_channels
169 | self.P6 = nn.MaxPool2d(kernel_size=1, stride=2, padding=0, ceil_mode=False)
170 | self.P5_conv1 = nn.Conv2d(2048, self.out_channels, kernel_size=1, stride=1)
171 |
172 | self.P4_conv1 = nn.Conv2d(1024, self.out_channels, kernel_size=1, stride=1)
173 | self.P4_conv2 = nn.Conv2d(self.out_channels, self.out_channels, kernel_size=3, stride=1, padding=1)
174 |
175 | self.P3_conv1 = nn.Conv2d(512, self.out_channels, kernel_size=1, stride=1)
176 | self.P3_conv2 = nn.Conv2d(self.out_channels, self.out_channels, kernel_size=3, stride=1, padding=1)
177 | self.P2_conv1 = nn.Conv2d(256, self.out_channels, kernel_size=1, stride=1)
178 | self.P2_conv2 = nn.Conv2d(self.out_channels, self.out_channels, kernel_size=3, stride=1, padding=1)
179 |
180 | def forward(self, C1, C2, C3 ,C4, C5):
181 |
182 | p5_out = self.P5_conv1(C5)
183 |
184 | p4_out = torch.add(self.P4_conv1(C4), F.upsample_nearest(p5_out, scale_factor=2))
185 | p3_out = torch.add(self.P3_conv1(C3), F.upsample_nearest(p4_out, scale_factor=2))
186 | p2_out = torch.add(self.P2_conv1(C2), F.upsample_nearest(p3_out, scale_factor=2))
187 |
188 | p4_out = self.P4_conv2(p4_out)
189 | p3_out = self.P3_conv2(p3_out)
190 | p2_out = self.P2_conv2(p2_out)
191 |
192 | # P6 is used for the 5th anchor scale in RPN. Generated by
193 | # subsampling from P5 with stride of 2.
194 | p6_out = self.P6(p5_out)
195 |
196 | return p2_out, p3_out, p4_out, p5_out, p6_out
197 |
198 |
199 | ############################################################
200 | # Pose Estimation Graph
201 | ############################################################
202 |
203 | class pose_estimation(nn.Module):
204 | def __init__(self, class_num, pretrain=True):
205 | super(pose_estimation, self).__init__()
206 | self.resnet = ResNet(Bottleneck, [3, 4, 6, 3]) # resnet50
207 | if pretrain == True:
208 | self.model_path = '/data/xiaobing.wang/.torch/models/resnet50-19c8e357.pth'
209 | self.resnet.load_weights(self.model_path)
210 | self.apply_fix()
211 | self.out_channels = 256
212 | self.fpn = FPN(self.out_channels)
213 |
214 | # self.block = nn.Sequential(nn.Conv2d(256, 128, 1, 1, 0), nn.ReLU(inplace=True),
215 | # nn.Conv2d(128, 128, 3, 1, 1), nn.ReLU(inplace=True),
216 | # nn.Conv2d(128, 128, 3, 1, 1), nn.ReLU(inplace=True),
217 | # nn.Conv2d(128, 128, 3, 1, 1), nn.ReLU(inplace=True),
218 | # nn.Conv2d(128, 256, 3, 1, 1), nn.ReLU(inplace=True)
219 | # )
220 | # self._init_weights(self.block)
221 |
222 | self.predict = nn.Sequential(nn.Conv2d(768, 128, 3, 1, 1), nn.ReLU(inplace=True),
223 | nn.Conv2d(128, 128, 3, 1, 1), nn.ReLU(inplace=True),
224 | nn.Conv2d(128, 128, 3, 1, 1), nn.ReLU(inplace=True),
225 | nn.Conv2d(128, 128, 3, 1, 1), nn.ReLU(inplace=True),
226 | nn.Conv2d(128, 25, 3, 1, 1))
227 | self._init_weights(self.predict)
228 |
229 |
230 |
231 | def _gaussian_init_conv(self, conv):
232 | if isinstance(conv, nn.Conv2d):
233 | conv.weight.data.normal_(0, 0.01)
234 | if conv.bias is not None:
235 | conv.bias.data.zero_()
236 |
237 | def _init_weights(self, model):
238 | for m in model:
239 | if isinstance(m, nn.Conv2d):
240 | m.weight.data.normal_(0, 0.01)
241 | if m.bias is not None:
242 | m.bias.data.zero_()
243 |
244 |
245 | def apply_fix(self):
246 | # 1. fix bn
247 | # 2. fix conv1 conv2
248 | for param in self.resnet.conv1.parameters():
249 | param.requires_grad = False
250 | for param in self.resnet.layer1.parameters():
251 | param.requires_grad = False
252 |
253 | def forward(self, x):
254 | C1, C2, C3, C4, C5 = self.resnet(x)
255 | P2, P3, P4, P5, P6 = self.fpn(C1, C2, C3, C4, C5)
256 |
257 | P4_x2 = F.upsample(P4, scale_factor=2)
258 | P5_x4 = F.upsample(P5, scale_factor=4)
259 | featuer_cat = torch.cat([P3, P4_x2, P5_x4],1)
260 | out = self.predict(featuer_cat)
261 | return out
262 |
--------------------------------------------------------------------------------
/models/CPM_ResNet.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | from __future__ import division
3 | from __future__ import print_function
4 |
5 | import torch
6 | import torch.nn as nn
7 | import math
8 | import torch.nn.functional as F
9 |
10 | ############################################################
11 | # ResNet
12 | ############################################################
13 | def conv3x3(in_planes, out_planes, stride=1):
14 | "3x3 convolution with padding"
15 | return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
16 | padding=1, bias=False)
17 |
18 | class BasicBlock(nn.Module):
19 | expansion = 1
20 |
21 | def __init__(self, inplanes, planes, stride=1, downsample=None):
22 | super(BasicBlock, self).__init__()
23 | self.conv1 = conv3x3(inplanes, planes, stride)
24 | self.bn1 = nn.BatchNorm2d(planes)
25 | self.relu = nn.ReLU(inplace=True)
26 | self.conv2 = conv3x3(planes, planes)
27 | self.bn2 = nn.BatchNorm2d(planes)
28 | self.downsample = downsample
29 | self.stride = stride
30 |
31 | def forward(self, x):
32 | residual = x
33 |
34 | out = self.conv1(x)
35 | out = self.bn1(out)
36 | out = self.relu(out)
37 |
38 | out = self.conv2(out)
39 | out = self.bn2(out)
40 |
41 | if self.downsample is not None:
42 | residual = self.downsample(x)
43 |
44 | out += residual
45 | out = self.relu(out)
46 |
47 | return out
48 |
49 |
50 | class Bottleneck(nn.Module):
51 | expansion = 4
52 |
53 | def __init__(self, inplanes, planes, stride=1, downsample=None):
54 | super(Bottleneck, self).__init__()
55 | self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False)
56 | self.bn1 = nn.BatchNorm2d(planes)
57 | self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride,
58 | padding=1, bias=False)
59 | self.bn2 = nn.BatchNorm2d(planes)
60 | self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False)
61 | self.bn3 = nn.BatchNorm2d(planes * 4)
62 | self.relu = nn.ReLU(inplace=True)
63 | self.downsample = downsample
64 | self.stride = stride
65 |
66 | def forward(self, x):
67 | residual = x
68 |
69 | out = self.conv1(x)
70 | out = self.bn1(out)
71 | out = self.relu(out)
72 |
73 | out = self.conv2(out)
74 | out = self.bn2(out)
75 | out = self.relu(out)
76 |
77 | out = self.conv3(out)
78 | out = self.bn3(out)
79 |
80 | if self.downsample is not None:
81 | residual = self.downsample(x)
82 |
83 | out += residual
84 | out = self.relu(out)
85 |
86 | return out
87 |
88 |
89 | class ResNet(nn.Module):
90 |
91 | def __init__(self, block, layers, num_classes=1000):
92 | self.inplanes = 64
93 | super(ResNet, self).__init__()
94 | self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3,
95 | bias=False)
96 | self.bn1 = nn.BatchNorm2d(64)
97 | self.relu = nn.ReLU(inplace=True)
98 | self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
99 | self.layer1 = self._make_layer(block, 64, layers[0])
100 | self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
101 | self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
102 | self.layer4 = self._make_layer(block, 512, layers[3], stride=2)
103 |
104 | for m in self.modules():
105 | if isinstance(m, nn.Conv2d):
106 | n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
107 | m.weight.data.normal_(0, math.sqrt(2. / n))
108 | elif isinstance(m, nn.BatchNorm2d):
109 | m.weight.data.fill_(1)
110 | m.bias.data.zero_()
111 |
112 | def _make_layer(self, block, planes, blocks, stride=1):
113 | downsample = None
114 | if stride != 1 or self.inplanes != planes * block.expansion:
115 | downsample = nn.Sequential(
116 | nn.Conv2d(self.inplanes, planes * block.expansion,
117 | kernel_size=1, stride=stride, bias=False),
118 | nn.BatchNorm2d(planes * block.expansion),
119 | )
120 |
121 | layers = []
122 | layers.append(block(self.inplanes, planes, stride, downsample))
123 | self.inplanes = planes * block.expansion
124 | for i in range(1, blocks):
125 | layers.append(block(self.inplanes, planes))
126 |
127 | return nn.Sequential(*layers)
128 |
129 | def load_weights(self, path):
130 | model_dict = self.state_dict()
131 | print('loading model from {}'.format(path))
132 | try:
133 | #state_dict = torch.load(self.path)
134 | # self.load_state_dict({k: v for k, v in state_dict.items() if k in self.state_dict()})
135 | pretrained_dict = torch.load(path)
136 | from collections import OrderedDict
137 | tmp = OrderedDict()
138 | for k,v in pretrained_dict.items():
139 | if k in model_dict:
140 | tmp[k] = v
141 | # pretrained_dict = {k: v for k, v in pretrained_dict.items() if k in model_dict}
142 | # model_dict.update(pretrained_dict)
143 | model_dict.update(tmp)
144 | self.load_state_dict(model_dict)
145 | except:
146 | print ('loading model failed, {} may not exist'.format(path))
147 |
148 | def forward(self, x):
149 | x = self.conv1(x)
150 | x = self.bn1(x)
151 | x = self.relu(x)
152 | C1 = self.maxpool(x)
153 |
154 | C2 = self.layer1(C1)
155 | C3 = self.layer2(C2)
156 | C4 = self.layer3(C3)
157 | C5 = self.layer4(C4)
158 |
159 | return C1, C2, C3, C4, C5
160 |
161 | ############################################################
162 | # Pose Estimation Graph
163 | ############################################################
164 |
165 | class pose_estimation(nn.Module):
166 | def __init__(self, class_num, pretrain=True):
167 | super(pose_estimation, self).__init__()
168 | self.resnet = ResNet(Bottleneck, [3, 4, 6, 3]) # resnet50
169 | if pretrain == True:
170 | self.model_path = '/data/xiaobing.wang/.torch/models/resnet50-19c8e357.pth'
171 | self.resnet.load_weights(self.model_path)
172 | self.apply_fix()
173 | self.reduce_C4 = nn.Sequential(nn.Conv2d(1024, 256, 1, 1, 0))
174 | self.reduce_C5 = nn.Sequential(nn.Conv2d(2048, 256, 1, 1, 0))
175 |
176 | self.block = nn.Sequential(nn.Conv2d(256, 128, 1, 1, 0),nn.ReLU(inplace=True),
177 | nn.Conv2d(128, 128, 3, 1, 1), nn.ReLU(inplace=True),
178 | nn.Conv2d(128, 128, 3, 1, 1), nn.ReLU(inplace=True),
179 | nn.Conv2d(128, 128, 3, 1, 1), nn.ReLU(inplace=True),
180 | nn.Conv2d(128, 512, 3, 1, 1), nn.ReLU(inplace=True)
181 | )
182 | self._init_weights(self.block)
183 | self.predict = nn.Conv2d(512, 25, 1, 1, 0)
184 |
185 | def _init_weights(self, model):
186 | for m in model:
187 | if isinstance(m, nn.Conv2d):
188 | m.weight.data.normal_(0, 0.01)
189 | if m.bias is not None:
190 | m.bias.data.zero_()
191 | def apply_fix(self):
192 | # 1. fix bn
193 | # 2. fix conv1 conv2
194 | for param in self.resnet.conv1.parameters():
195 | param.requires_grad = False
196 | for param in self.resnet.layer1.parameters():
197 | param.requires_grad = False
198 |
199 |
200 | def forward(self, x):
201 | C1, C2, C3, C4, C5 = self.resnet(x)
202 | C4 = self.reduce_C4(C4)
203 | C4 = F.upsample(C4, scale_factor=2)
204 |
205 | C5 = self.reduce_C5(C5)
206 | C5 = F.upsample(C5, scale_factor=4)
207 |
208 | P4 = C5 + C4
209 |
210 | out = self.block(P4)
211 | predict = self.predict(out)
212 | return predict
--------------------------------------------------------------------------------
/models/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Xiangyu-CAS/FashionAI_Keypoints/dab6cbd975ba6071b070fb7da2fb163d01e2e2e4/models/__init__.py
--------------------------------------------------------------------------------
/models/bk/CPM.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 | import os
4 | import sys
5 | import math
6 | import torchvision.models as models
7 |
8 | def make_net_dict():
9 |
10 | feature = [{'conv1_1': [3, 64, 3, 1, 1]}, {'conv1_2': [64, 64, 3, 1, 1]}, {'pool1': [2, 2, 0]},
11 | {'conv2_1': [64, 128, 3, 1, 1]}, {'conv2_2': [128, 128, 3, 1, 1]}, {'pool2': [2, 2, 0]},
12 | {'conv3_1': [128, 256, 3, 1, 1]}, {'conv3_2': [256, 256, 3, 1, 1]}, {'conv3_3': [256, 256, 3, 1, 1]}, {'conv3_4': [256, 256, 3, 1, 1]}, {'pool3': [2, 2, 0]},
13 | {'conv4_1': [256, 512, 3, 1, 1]}, {'conv4_2': [512, 512, 3, 1, 1]}, {'conv4_3_cpm': [512, 256, 3, 1, 1]}, {'conv4_4_cpm': [256, 128, 3, 1, 1]}]
14 |
15 |
16 | block1 = [{'conv5_1_CPM': [128, 128, 3, 1, 1]},{'conv5_2_CPM': [128, 128, 3, 1, 1]},{'conv5_3_CPM': [128, 128, 3, 1, 1]},
17 | {'conv5_4_CPM': [128, 512, 1, 1, 0]}]
18 |
19 |
20 | block2 = [{'Mconv1': [128+25, 128, 7, 1, 3]}, {'Mconv2': [128, 128, 7, 1, 3]},
21 | {'Mconv3': [128, 128, 7, 1, 3]},{'Mconv4': [128, 128, 7, 1, 3]},
22 | {'Mconv5': [128, 128, 7, 1, 3]},
23 | {'Mconv6': [128, 128, 1, 1, 0]}
24 | ]
25 | predict_layers_stage1 = [{'predict_L1': [512, 25, 1, 1, 0]}]
26 |
27 | predict_layers_stageN = [{'predict_L1': [128, 25, 1, 1, 0]}]
28 |
29 | net_dict = [feature,block1,predict_layers_stage1,block2,predict_layers_stageN]
30 |
31 | return net_dict
32 |
33 |
34 | class CPM(nn.Module):
35 |
36 | def __init__(self, net_dict, batch_norm=False):
37 |
38 | super(CPM, self).__init__()
39 |
40 | self.feature = self._make_layer(net_dict[0])
41 |
42 | self.block = self._make_layer(net_dict[1])
43 |
44 | self.predict = self._make_layer(net_dict[2])
45 |
46 | # repeate
47 | self.block_stage2 = self._make_layer(net_dict[3])
48 |
49 | self.predict_stage2 = self._make_layer(net_dict[4])
50 |
51 | self.block_stage3 = self._make_layer(net_dict[3])
52 |
53 | self.predict_stage3 = self._make_layer(net_dict[4])
54 |
55 | self.block_stage4 = self._make_layer(net_dict[3])
56 |
57 | self.predict_stage4 = self._make_layer(net_dict[4])
58 |
59 | self.block_stage5 = self._make_layer(net_dict[3])
60 |
61 | self.predict_stage5 = self._make_layer(net_dict[4])
62 |
63 | self.block_stage6 = self._make_layer(net_dict[3])
64 |
65 | self.predict_stage6 = self._make_layer(net_dict[4])
66 |
67 | self._init_weights()
68 |
69 | def _init_weights(self):
70 | for m in self.modules():
71 | if isinstance(m, nn.Conv2d):
72 | m.weight.data.normal_(0, 0.01)
73 | if m.bias is not None:
74 | m.bias.data.zero_()
75 |
76 | def _make_layer(self, net_dict, batch_norm=False):
77 | layers = []
78 | length = len(net_dict)
79 | for i in range(length):
80 | one_layer = net_dict[i]
81 | key = one_layer.keys()[0]
82 | v = one_layer[key]
83 |
84 | if 'pool' in key:
85 | layers += [nn.MaxPool2d(kernel_size=v[0], stride=v[1], padding=v[2])]
86 | elif 'predict' in key:
87 | conv2d = nn.Conv2d(in_channels=v[0], out_channels=v[1], kernel_size=v[2], stride=v[3], padding=v[4])
88 | layers += [conv2d]
89 | else:
90 | conv2d = nn.Conv2d(in_channels=v[0], out_channels=v[1], kernel_size=v[2], stride=v[3], padding=v[4])
91 | if batch_norm:
92 | layers += [conv2d, nn.BatchNorm2d(v[1]), nn.ReLU(inplace=True)]
93 | else:
94 | layers += [conv2d, nn.ReLU(inplace=True)]
95 |
96 | return nn.Sequential(*layers)
97 |
98 | def forward(self, x):
99 | # define forward flow
100 | feature = self.feature(x)
101 |
102 | out_stage1 = self.block(feature)
103 | L1_stage1 = self.predict(out_stage1)
104 |
105 |
106 | concat_stage2 = torch.cat([L1_stage1, feature], 1)
107 | out_stage2 = self.block_stage2(concat_stage2)
108 | L1_stage2 = self.predict_stage2(out_stage2)
109 |
110 | concat_stage3 = torch.cat([L1_stage2, feature], 1)
111 | out_stage3 = self.block_stage3(concat_stage3)
112 | L1_stage3 = self.predict_stage3(out_stage3)
113 |
114 |
115 | concat_stage4 = torch.cat([L1_stage3, feature], 1)
116 | out_stage4 = self.block_stage4(concat_stage4)
117 | L1_stage4 = self.predict_stage4(out_stage4)
118 |
119 | concat_stage5 = torch.cat([L1_stage4, feature], 1)
120 | out_stage5 = self.block_stage5(concat_stage5)
121 | L1_stage5 = self.predict_stage5(out_stage5)
122 |
123 | concat_stage6 = torch.cat([L1_stage5, feature], 1)
124 | out_stage6 = self.block_stage6(concat_stage6)
125 | L1_stage6 = self.predict_stage6(out_stage6)
126 |
127 | return L1_stage1, L1_stage2, L1_stage3, L1_stage4, L1_stage5, L1_stage6
128 |
129 | def PoseModel(num_point, num_stages=6, batch_norm=False, pretrained=False):
130 | net_dict = make_net_dict()
131 | model = CPM(net_dict, batch_norm)
132 |
133 | if pretrained:
134 | parameter_num = 10
135 | if batch_norm:
136 | vgg19 = models.vgg19_bn(pretrained=True)
137 | parameter_num *= 6
138 | else:
139 | vgg19 = models.vgg19(pretrained=True)
140 | parameter_num *= 2
141 |
142 | vgg19_state_dict = vgg19.state_dict()
143 | vgg19_keys = vgg19_state_dict.keys()
144 |
145 | model_dict = model.state_dict()
146 | from collections import OrderedDict
147 | weights_load = OrderedDict()
148 |
149 | for i in range(parameter_num):
150 | weights_load[model.state_dict().keys()[i]] = vgg19_state_dict[vgg19_keys[i]]
151 | model_dict.update(weights_load)
152 | model.load_state_dict(model_dict)
153 |
154 | return model
155 |
156 |
157 | if __name__ == '__main__':
158 | print PoseModel(25, 6, batch_norm=False)
159 |
--------------------------------------------------------------------------------
/models/bk/CPM_FPN.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | from __future__ import division
3 | from __future__ import print_function
4 |
5 | import torch
6 | import torch.nn as nn
7 | import math
8 | import torch.nn.functional as F
9 |
10 | ############################################################
11 | # ResNet
12 | ############################################################
13 | def conv3x3(in_planes, out_planes, stride=1):
14 | "3x3 convolution with padding"
15 | return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
16 | padding=1, bias=False)
17 |
18 | class BasicBlock(nn.Module):
19 | expansion = 1
20 |
21 | def __init__(self, inplanes, planes, stride=1, downsample=None):
22 | super(BasicBlock, self).__init__()
23 | self.conv1 = conv3x3(inplanes, planes, stride)
24 | self.bn1 = nn.BatchNorm2d(planes)
25 | self.relu = nn.ReLU(inplace=True)
26 | self.conv2 = conv3x3(planes, planes)
27 | self.bn2 = nn.BatchNorm2d(planes)
28 | self.downsample = downsample
29 | self.stride = stride
30 |
31 | def forward(self, x):
32 | residual = x
33 |
34 | out = self.conv1(x)
35 | out = self.bn1(out)
36 | out = self.relu(out)
37 |
38 | out = self.conv2(out)
39 | out = self.bn2(out)
40 |
41 | if self.downsample is not None:
42 | residual = self.downsample(x)
43 |
44 | out += residual
45 | out = self.relu(out)
46 |
47 | return out
48 |
49 |
50 | class Bottleneck(nn.Module):
51 | expansion = 4
52 |
53 | def __init__(self, inplanes, planes, stride=1, downsample=None):
54 | super(Bottleneck, self).__init__()
55 | self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False)
56 | self.bn1 = nn.BatchNorm2d(planes)
57 | self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride,
58 | padding=1, bias=False)
59 | self.bn2 = nn.BatchNorm2d(planes)
60 | self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False)
61 | self.bn3 = nn.BatchNorm2d(planes * 4)
62 | self.relu = nn.ReLU(inplace=True)
63 | self.downsample = downsample
64 | self.stride = stride
65 |
66 | def forward(self, x):
67 | residual = x
68 |
69 | out = self.conv1(x)
70 | out = self.bn1(out)
71 | out = self.relu(out)
72 |
73 | out = self.conv2(out)
74 | out = self.bn2(out)
75 | out = self.relu(out)
76 |
77 | out = self.conv3(out)
78 | out = self.bn3(out)
79 |
80 | if self.downsample is not None:
81 | residual = self.downsample(x)
82 |
83 | out += residual
84 | out = self.relu(out)
85 |
86 | return out
87 |
88 |
89 | class ResNet(nn.Module):
90 |
91 | def __init__(self, block, layers, num_classes=1000):
92 | self.inplanes = 64
93 | super(ResNet, self).__init__()
94 | self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3,
95 | bias=False)
96 | self.bn1 = nn.BatchNorm2d(64)
97 | self.relu = nn.ReLU(inplace=True)
98 | self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
99 | self.layer1 = self._make_layer(block, 64, layers[0])
100 | self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
101 | self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
102 | self.layer4 = self._make_layer(block, 512, layers[3], stride=2)
103 |
104 | for m in self.modules():
105 | if isinstance(m, nn.Conv2d):
106 | n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
107 | m.weight.data.normal_(0, math.sqrt(2. / n))
108 | elif isinstance(m, nn.BatchNorm2d):
109 | m.weight.data.fill_(1)
110 | m.bias.data.zero_()
111 |
112 | def _make_layer(self, block, planes, blocks, stride=1):
113 | downsample = None
114 | if stride != 1 or self.inplanes != planes * block.expansion:
115 | downsample = nn.Sequential(
116 | nn.Conv2d(self.inplanes, planes * block.expansion,
117 | kernel_size=1, stride=stride, bias=False),
118 | nn.BatchNorm2d(planes * block.expansion),
119 | )
120 |
121 | layers = []
122 | layers.append(block(self.inplanes, planes, stride, downsample))
123 | self.inplanes = planes * block.expansion
124 | for i in range(1, blocks):
125 | layers.append(block(self.inplanes, planes))
126 |
127 | return nn.Sequential(*layers)
128 |
129 | def load_weights(self, path):
130 | model_dict = self.state_dict()
131 | print('loading model from {}'.format(path))
132 | try:
133 | #state_dict = torch.load(self.path)
134 | # self.load_state_dict({k: v for k, v in state_dict.items() if k in self.state_dict()})
135 | pretrained_dict = torch.load(path)
136 | from collections import OrderedDict
137 | tmp = OrderedDict()
138 | for k,v in pretrained_dict.items():
139 | if k in model_dict:
140 | tmp[k] = v
141 | # pretrained_dict = {k: v for k, v in pretrained_dict.items() if k in model_dict}
142 | # model_dict.update(pretrained_dict)
143 | model_dict.update(tmp)
144 | self.load_state_dict(model_dict)
145 | except:
146 | print ('loading model failed, {} may not exist'.format(path))
147 |
148 | def forward(self, x):
149 | x = self.conv1(x)
150 | x = self.bn1(x)
151 | x = self.relu(x)
152 | C1 = self.maxpool(x)
153 |
154 | C2 = self.layer1(C1)
155 | C3 = self.layer2(C2)
156 | C4 = self.layer3(C3)
157 | C5 = self.layer4(C4)
158 |
159 | return C1, C2, C3, C4, C5
160 |
161 | ############################################################
162 | # FPN Graph
163 | ############################################################
164 |
165 | class FPN(nn.Module):
166 | def __init__(self, out_channels):
167 | super(FPN, self).__init__()
168 | self.out_channels = out_channels
169 | self.P6 = nn.MaxPool2d(kernel_size=1, stride=2, padding=0, ceil_mode=False)
170 | self.P5_conv1 = nn.Conv2d(2048, self.out_channels, kernel_size=1, stride=1)
171 | self.P5_conv2 = nn.Conv2d(self.out_channels, self.out_channels, kernel_size=3, stride=1, padding=1)
172 | self.P4_conv1 = nn.Conv2d(1024, self.out_channels, kernel_size=1, stride=1)
173 | self.P4_conv2 = nn.Conv2d(self.out_channels, self.out_channels, kernel_size=3, stride=1, padding=1)
174 |
175 | self.P3_conv1 = nn.Conv2d(512, self.out_channels, kernel_size=1, stride=1)
176 | self.P3_conv2 = nn.Conv2d(self.out_channels, self.out_channels, kernel_size=3, stride=1, padding=1)
177 | self.P2_conv1 = nn.Conv2d(256, self.out_channels, kernel_size=1, stride=1)
178 | self.P2_conv2 = nn.Conv2d(self.out_channels, self.out_channels, kernel_size=3, stride=1, padding=1)
179 |
180 | self.init_weights()
181 |
182 | def init_weights(self):
183 | for m in self.modules():
184 | if isinstance(m, nn.Conv2d):
185 | n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
186 | m.weight.data.normal_(0, math.sqrt(2. / n))
187 | if m.bias is not None:
188 | m.bias.data.zero_()
189 | elif isinstance(m, nn.BatchNorm2d):
190 | m.weight.data.fill_(1)
191 | m.bias.data.zero_()
192 | elif isinstance(m, nn.Linear):
193 | m.weight.data.normal_(0, 0.01)
194 | m.bias.data.zero_()
195 |
196 | def forward(self, C1, C2, C3, C4, C5):
197 |
198 | p5_out = self.P5_conv1(C5)
199 | p4_out = torch.add(self.P4_conv1(C4), F.upsample(p5_out, scale_factor=2))
200 | p3_out = torch.add(self.P3_conv1(C3), F.upsample(p4_out, scale_factor=2))
201 | p2_out = torch.add(self.P2_conv1(C2), F.upsample(p3_out, scale_factor=2))
202 |
203 | p5_out = self.P5_conv2(p5_out)
204 | p4_out = self.P4_conv2(p4_out)
205 | p3_out = self.P3_conv2(p3_out)
206 | p2_out = self.P2_conv2(p2_out)
207 |
208 | # P6 is used for the 5th anchor scale in RPN. Generated by
209 | # subsampling from P5 with stride of 2.
210 | p6_out = self.P6(p5_out)
211 |
212 | return p2_out, p3_out, p4_out, p5_out, p6_out
213 | ############################################################
214 | # Pose Estimation Graph
215 | ############################################################
216 |
217 | class pose_estimation(nn.Module):
218 | def __init__(self, class_num, pretrain=True):
219 | super(pose_estimation, self).__init__()
220 | self.resnet = ResNet(Bottleneck, [3, 4, 6, 3]) # resnet50
221 | if pretrain == True:
222 | self.model_path = '/data/xiaobing.wang/.torch/models/resnet50-19c8e357.pth'
223 | self.resnet.load_weights(self.model_path)
224 | self.apply_fix()
225 | self.out_channels = 256
226 | self.P5_conv1 = nn.Conv2d(2048, self.out_channels, kernel_size=1, stride=1)
227 | self.P5_conv2 = nn.Conv2d(self.out_channels, self.out_channels, kernel_size=3, stride=1, padding=1)
228 | self.P4_conv1 = nn.Conv2d(1024, self.out_channels, kernel_size=1, stride=1)
229 | self.P4_conv2 = nn.Conv2d(self.out_channels, self.out_channels, kernel_size=3, stride=1, padding=1)
230 |
231 | self.P3_conv1 = nn.Conv2d(512, self.out_channels, kernel_size=1, stride=1)
232 | self.P3_conv2 = nn.Conv2d(self.out_channels, self.out_channels, kernel_size=3, stride=1, padding=1)
233 | self.P2_conv1 = nn.Conv2d(256, self.out_channels, kernel_size=1, stride=1)
234 | self.P2_conv2 = nn.Conv2d(self.out_channels, self.out_channels, kernel_size=3, stride=1, padding=1)
235 |
236 |
237 | self.block = nn.Sequential(nn.Conv2d(256, 128, 1, 1, 0),nn.ReLU(inplace=True),
238 | nn.Conv2d(128, 128, 3, 1, 1), nn.ReLU(inplace=True),
239 | nn.Conv2d(128, 128, 3, 1, 1), nn.ReLU(inplace=True),
240 | nn.Conv2d(128, 128, 3, 1, 1), nn.ReLU(inplace=True),
241 | nn.Conv2d(128, 512, 3, 1, 1), nn.ReLU(inplace=True)
242 | )
243 | self._init_weights(self.block)
244 | self.predict = nn.Conv2d(512, 25, 1, 1, 0)
245 |
246 | def _init_weights(self, model):
247 | for m in model:
248 | if isinstance(m, nn.Conv2d):
249 | m.weight.data.normal_(0, 0.01)
250 | if m.bias is not None:
251 | m.bias.data.zero_()
252 | def apply_fix(self):
253 | # 1. fix bn
254 | # 2. fix conv1 conv2
255 | for param in self.resnet.conv1.parameters():
256 | param.requires_grad = False
257 | for param in self.resnet.layer1.parameters():
258 | param.requires_grad = False
259 |
260 |
261 | def forward(self, x):
262 | C1, C2, C3, C4, C5 = self.resnet(x)
263 |
264 | p5_out = self.P5_conv1(C5)
265 | p4_out = torch.add(self.P4_conv1(C4), F.upsample(p5_out, scale_factor=2))
266 | p3_out = torch.add(self.P3_conv1(C3), F.upsample(p4_out, scale_factor=2))
267 | p2_out = torch.add(self.P2_conv1(C2), F.upsample(p3_out, scale_factor=2))
268 |
269 | p5_out = self.P5_conv2(p5_out)
270 | p4_out = self.P4_conv2(p4_out)
271 | p3_out = self.P3_conv2(p3_out)
272 | p2_out = self.P2_conv2(p2_out)
273 |
274 |
275 | out = self.block(p3_out)
276 | predict = self.predict(out)
277 | return predict
--------------------------------------------------------------------------------
/models/bk/CPM_FPN3.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | from __future__ import division
3 | from __future__ import print_function
4 |
5 | import torch
6 | import torch.nn as nn
7 | import math
8 | import torch.nn.functional as F
9 |
10 | ############################################################
11 | # ResNet
12 | ############################################################
13 | def conv3x3(in_planes, out_planes, stride=1):
14 | "3x3 convolution with padding"
15 | return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
16 | padding=1, bias=False)
17 |
18 | class BasicBlock(nn.Module):
19 | expansion = 1
20 |
21 | def __init__(self, inplanes, planes, stride=1, downsample=None):
22 | super(BasicBlock, self).__init__()
23 | self.conv1 = conv3x3(inplanes, planes, stride)
24 | self.bn1 = nn.BatchNorm2d(planes)
25 | self.relu = nn.ReLU(inplace=True)
26 | self.conv2 = conv3x3(planes, planes)
27 | self.bn2 = nn.BatchNorm2d(planes)
28 | self.downsample = downsample
29 | self.stride = stride
30 |
31 | def forward(self, x):
32 | residual = x
33 |
34 | out = self.conv1(x)
35 | out = self.bn1(out)
36 | out = self.relu(out)
37 |
38 | out = self.conv2(out)
39 | out = self.bn2(out)
40 |
41 | if self.downsample is not None:
42 | residual = self.downsample(x)
43 |
44 | out += residual
45 | out = self.relu(out)
46 |
47 | return out
48 |
49 |
50 | class Bottleneck(nn.Module):
51 | expansion = 4
52 |
53 | def __init__(self, inplanes, planes, stride=1, downsample=None):
54 | super(Bottleneck, self).__init__()
55 | self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False)
56 | self.bn1 = nn.BatchNorm2d(planes)
57 | self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride,
58 | padding=1, bias=False)
59 | self.bn2 = nn.BatchNorm2d(planes)
60 | self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False)
61 | self.bn3 = nn.BatchNorm2d(planes * 4)
62 | self.relu = nn.ReLU(inplace=True)
63 | self.downsample = downsample
64 | self.stride = stride
65 |
66 | def forward(self, x):
67 | residual = x
68 |
69 | out = self.conv1(x)
70 | out = self.bn1(out)
71 | out = self.relu(out)
72 |
73 | out = self.conv2(out)
74 | out = self.bn2(out)
75 | out = self.relu(out)
76 |
77 | out = self.conv3(out)
78 | out = self.bn3(out)
79 |
80 | if self.downsample is not None:
81 | residual = self.downsample(x)
82 |
83 | out += residual
84 | out = self.relu(out)
85 |
86 | return out
87 |
88 |
89 | class ResNet(nn.Module):
90 |
91 | def __init__(self, block, layers, num_classes=1000):
92 | self.inplanes = 64
93 | super(ResNet, self).__init__()
94 | self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3,
95 | bias=False)
96 | self.bn1 = nn.BatchNorm2d(64)
97 | self.relu = nn.ReLU(inplace=True)
98 | self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
99 | self.layer1 = self._make_layer(block, 64, layers[0])
100 | self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
101 | self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
102 | self.layer4 = self._make_layer(block, 512, layers[3], stride=2)
103 |
104 | for m in self.modules():
105 | if isinstance(m, nn.Conv2d):
106 | n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
107 | m.weight.data.normal_(0, math.sqrt(2. / n))
108 | elif isinstance(m, nn.BatchNorm2d):
109 | m.weight.data.fill_(1)
110 | m.bias.data.zero_()
111 |
112 | def _make_layer(self, block, planes, blocks, stride=1):
113 | downsample = None
114 | if stride != 1 or self.inplanes != planes * block.expansion:
115 | downsample = nn.Sequential(
116 | nn.Conv2d(self.inplanes, planes * block.expansion,
117 | kernel_size=1, stride=stride, bias=False),
118 | nn.BatchNorm2d(planes * block.expansion),
119 | )
120 |
121 | layers = []
122 | layers.append(block(self.inplanes, planes, stride, downsample))
123 | self.inplanes = planes * block.expansion
124 | for i in range(1, blocks):
125 | layers.append(block(self.inplanes, planes))
126 |
127 | return nn.Sequential(*layers)
128 |
129 | def load_weights(self, path):
130 | model_dict = self.state_dict()
131 | print('loading model from {}'.format(path))
132 | try:
133 | #state_dict = torch.load(self.path)
134 | # self.load_state_dict({k: v for k, v in state_dict.items() if k in self.state_dict()})
135 | pretrained_dict = torch.load(path)
136 | from collections import OrderedDict
137 | tmp = OrderedDict()
138 | for k,v in pretrained_dict.items():
139 | if k in model_dict:
140 | tmp[k] = v
141 | # pretrained_dict = {k: v for k, v in pretrained_dict.items() if k in model_dict}
142 | # model_dict.update(pretrained_dict)
143 | model_dict.update(tmp)
144 | self.load_state_dict(model_dict)
145 | except:
146 | print ('loading model failed, {} may not exist'.format(path))
147 |
148 | def forward(self, x):
149 | x = self.conv1(x)
150 | x = self.bn1(x)
151 | x = self.relu(x)
152 | C1 = self.maxpool(x)
153 |
154 | C2 = self.layer1(C1)
155 | C3 = self.layer2(C2)
156 | C4 = self.layer3(C3)
157 | C5 = self.layer4(C4)
158 |
159 | return C1, C2, C3, C4, C5
160 |
161 | ############################################################
162 | # FPN Graph
163 | ############################################################
164 |
165 | class FPN(nn.Module): # xavier_fill as default
166 | def __init__(self, out_channels):
167 | super(FPN, self).__init__()
168 | self.out_channels = out_channels
169 | self.P6 = nn.MaxPool2d(kernel_size=1, stride=2, padding=0, ceil_mode=False)
170 | self.P5_conv1 = nn.Conv2d(2048, self.out_channels, kernel_size=1, stride=1)
171 |
172 | self.P4_conv1 = nn.Conv2d(1024, self.out_channels, kernel_size=1, stride=1)
173 | self.P4_conv2 = nn.Conv2d(self.out_channels, self.out_channels, kernel_size=3, stride=1, padding=1)
174 |
175 | self.P3_conv1 = nn.Conv2d(512, self.out_channels, kernel_size=1, stride=1)
176 | self.P3_conv2 = nn.Conv2d(self.out_channels, self.out_channels, kernel_size=3, stride=1, padding=1)
177 | self.P2_conv1 = nn.Conv2d(256, self.out_channels, kernel_size=1, stride=1)
178 | self.P2_conv2 = nn.Conv2d(self.out_channels, self.out_channels, kernel_size=3, stride=1, padding=1)
179 |
180 | def forward(self, C1, C2, C3 ,C4, C5):
181 |
182 | p5_out = self.P5_conv1(C5)
183 |
184 | p4_out = torch.add(self.P4_conv1(C4), F.upsample_nearest(p5_out, scale_factor=2))
185 | p3_out = torch.add(self.P3_conv1(C3), F.upsample_nearest(p4_out, scale_factor=2))
186 | p2_out = torch.add(self.P2_conv1(C2), F.upsample_nearest(p3_out, scale_factor=2))
187 |
188 | p4_out = self.P4_conv2(p4_out)
189 | p3_out = self.P3_conv2(p3_out)
190 | p2_out = self.P2_conv2(p2_out)
191 |
192 | # P6 is used for the 5th anchor scale in RPN. Generated by
193 | # subsampling from P5 with stride of 2.
194 | p6_out = self.P6(p5_out)
195 |
196 | return p2_out, p3_out, p4_out, p5_out, p6_out
197 |
198 |
199 | ############################################################
200 | # Pose Estimation Graph
201 | ############################################################
202 |
203 | class pose_estimation(nn.Module):
204 | def __init__(self, class_num, pretrain=True):
205 | super(pose_estimation, self).__init__()
206 | self.resnet = ResNet(Bottleneck, [3, 4, 6, 3]) # resnet50
207 | if pretrain == True:
208 | self.model_path = '/data/xiaobing.wang/.torch/models/resnet50-19c8e357.pth'
209 | self.resnet.load_weights(self.model_path)
210 | self.apply_fix()
211 | self.out_channels = 256
212 | self.fpn = FPN(self.out_channels)
213 |
214 | # self.block = nn.Sequential(nn.Conv2d(256, 128, 1, 1, 0), nn.ReLU(inplace=True),
215 | # nn.Conv2d(128, 128, 3, 1, 1), nn.ReLU(inplace=True),
216 | # nn.Conv2d(128, 128, 3, 1, 1), nn.ReLU(inplace=True),
217 | # nn.Conv2d(128, 128, 3, 1, 1), nn.ReLU(inplace=True),
218 | # nn.Conv2d(128, 256, 3, 1, 1), nn.ReLU(inplace=True)
219 | # )
220 | # self._init_weights(self.block)
221 |
222 | self.predict = nn.Sequential(nn.Conv2d(768, 128, 3, 1, 1), nn.ReLU(inplace=True),
223 | nn.Conv2d(128, 128, 3, 1, 1), nn.ReLU(inplace=True),
224 | nn.Conv2d(128, 128, 3, 1, 1), nn.ReLU(inplace=True),
225 | nn.Conv2d(128, 128, 3, 1, 1), nn.ReLU(inplace=True),
226 | nn.Conv2d(128, 25, 3, 1, 1))
227 | self._init_weights(self.predict)
228 |
229 |
230 |
231 | def _gaussian_init_conv(self, conv):
232 | if isinstance(conv, nn.Conv2d):
233 | conv.weight.data.normal_(0, 0.01)
234 | if conv.bias is not None:
235 | conv.bias.data.zero_()
236 |
237 | def _init_weights(self, model):
238 | for m in model:
239 | if isinstance(m, nn.Conv2d):
240 | m.weight.data.normal_(0, 0.01)
241 | if m.bias is not None:
242 | m.bias.data.zero_()
243 |
244 |
245 | def apply_fix(self):
246 | # 1. fix bn
247 | # 2. fix conv1 conv2
248 | for param in self.resnet.conv1.parameters():
249 | param.requires_grad = False
250 | for param in self.resnet.layer1.parameters():
251 | param.requires_grad = False
252 |
253 | def forward(self, x):
254 | C1, C2, C3, C4, C5 = self.resnet(x)
255 | P2, P3, P4, P5, P6 = self.fpn(C1, C2, C3, C4, C5)
256 |
257 | P4_x2 = F.upsample(P4, scale_factor=2)
258 | P5_x4 = F.upsample(P5, scale_factor=4)
259 | featuer_cat = torch.cat([P3, P4_x2, P5_x4],1)
260 | out = self.predict(featuer_cat)
261 | return out
262 |
--------------------------------------------------------------------------------
/models/bk/CPM_FPN4.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | from __future__ import division
3 | from __future__ import print_function
4 |
5 | import torch
6 | import torch.nn as nn
7 | import math
8 | import torch.nn.functional as F
9 |
10 | ############################################################
11 | # ResNet
12 | ############################################################
13 | def conv3x3(in_planes, out_planes, stride=1):
14 | "3x3 convolution with padding"
15 | return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
16 | padding=1, bias=False)
17 |
18 | class BasicBlock(nn.Module):
19 | expansion = 1
20 |
21 | def __init__(self, inplanes, planes, stride=1, downsample=None):
22 | super(BasicBlock, self).__init__()
23 | self.conv1 = conv3x3(inplanes, planes, stride)
24 | self.bn1 = nn.BatchNorm2d(planes)
25 | self.relu = nn.ReLU(inplace=True)
26 | self.conv2 = conv3x3(planes, planes)
27 | self.bn2 = nn.BatchNorm2d(planes)
28 | self.downsample = downsample
29 | self.stride = stride
30 |
31 | def forward(self, x):
32 | residual = x
33 |
34 | out = self.conv1(x)
35 | out = self.bn1(out)
36 | out = self.relu(out)
37 |
38 | out = self.conv2(out)
39 | out = self.bn2(out)
40 |
41 | if self.downsample is not None:
42 | residual = self.downsample(x)
43 |
44 | out += residual
45 | out = self.relu(out)
46 |
47 | return out
48 |
49 |
50 | class Bottleneck(nn.Module):
51 | expansion = 4
52 |
53 | def __init__(self, inplanes, planes, stride=1, downsample=None):
54 | super(Bottleneck, self).__init__()
55 | self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False)
56 | self.bn1 = nn.BatchNorm2d(planes)
57 | self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride,
58 | padding=1, bias=False)
59 | self.bn2 = nn.BatchNorm2d(planes)
60 | self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False)
61 | self.bn3 = nn.BatchNorm2d(planes * 4)
62 | self.relu = nn.ReLU(inplace=True)
63 | self.downsample = downsample
64 | self.stride = stride
65 |
66 | def forward(self, x):
67 | residual = x
68 |
69 | out = self.conv1(x)
70 | out = self.bn1(out)
71 | out = self.relu(out)
72 |
73 | out = self.conv2(out)
74 | out = self.bn2(out)
75 | out = self.relu(out)
76 |
77 | out = self.conv3(out)
78 | out = self.bn3(out)
79 |
80 | if self.downsample is not None:
81 | residual = self.downsample(x)
82 |
83 | out += residual
84 | out = self.relu(out)
85 |
86 | return out
87 |
88 |
89 | class ResNet(nn.Module):
90 |
91 | def __init__(self, block, layers, num_classes=1000):
92 | self.inplanes = 64
93 | super(ResNet, self).__init__()
94 | self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3,
95 | bias=False)
96 | self.bn1 = nn.BatchNorm2d(64)
97 | self.relu = nn.ReLU(inplace=True)
98 | self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
99 | self.layer1 = self._make_layer(block, 64, layers[0])
100 | self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
101 | self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
102 | self.layer4 = self._make_layer(block, 512, layers[3], stride=2)
103 |
104 | for m in self.modules():
105 | if isinstance(m, nn.Conv2d):
106 | n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
107 | m.weight.data.normal_(0, math.sqrt(2. / n))
108 | elif isinstance(m, nn.BatchNorm2d):
109 | m.weight.data.fill_(1)
110 | m.bias.data.zero_()
111 |
112 | def _make_layer(self, block, planes, blocks, stride=1):
113 | downsample = None
114 | if stride != 1 or self.inplanes != planes * block.expansion:
115 | downsample = nn.Sequential(
116 | nn.Conv2d(self.inplanes, planes * block.expansion,
117 | kernel_size=1, stride=stride, bias=False),
118 | nn.BatchNorm2d(planes * block.expansion),
119 | )
120 |
121 | layers = []
122 | layers.append(block(self.inplanes, planes, stride, downsample))
123 | self.inplanes = planes * block.expansion
124 | for i in range(1, blocks):
125 | layers.append(block(self.inplanes, planes))
126 |
127 | return nn.Sequential(*layers)
128 |
129 | def load_weights(self, path):
130 | model_dict = self.state_dict()
131 | print('loading model from {}'.format(path))
132 | try:
133 | #state_dict = torch.load(self.path)
134 | # self.load_state_dict({k: v for k, v in state_dict.items() if k in self.state_dict()})
135 | pretrained_dict = torch.load(path)
136 | from collections import OrderedDict
137 | tmp = OrderedDict()
138 | for k,v in pretrained_dict.items():
139 | if k in model_dict:
140 | tmp[k] = v
141 | # pretrained_dict = {k: v for k, v in pretrained_dict.items() if k in model_dict}
142 | # model_dict.update(pretrained_dict)
143 | model_dict.update(tmp)
144 | self.load_state_dict(model_dict)
145 | except:
146 | print ('loading model failed, {} may not exist'.format(path))
147 |
148 | def forward(self, x):
149 | x = self.conv1(x)
150 | x = self.bn1(x)
151 | x = self.relu(x)
152 | C1 = self.maxpool(x)
153 |
154 | C2 = self.layer1(C1)
155 | C3 = self.layer2(C2)
156 | C4 = self.layer3(C3)
157 | C5 = self.layer4(C4)
158 |
159 | return C1, C2, C3, C4, C5
160 |
161 | ############################################################
162 | # FPN Graph
163 | ############################################################
164 |
165 | class FPN(nn.Module): # xavier_fill as default
166 | def __init__(self, out_channels):
167 | super(FPN, self).__init__()
168 | self.out_channels = out_channels
169 | self.P6 = nn.MaxPool2d(kernel_size=1, stride=2, padding=0, ceil_mode=False)
170 | self.P5_conv1 = nn.Conv2d(2048, self.out_channels, kernel_size=1, stride=1)
171 |
172 | self.P4_conv1 = nn.Conv2d(1024, self.out_channels, kernel_size=1, stride=1)
173 | self.P4_conv2 = nn.Conv2d(self.out_channels, self.out_channels, kernel_size=3, stride=1, padding=1)
174 |
175 | self.P3_conv1 = nn.Conv2d(512, self.out_channels, kernel_size=1, stride=1)
176 | self.P3_conv2 = nn.Conv2d(self.out_channels, self.out_channels, kernel_size=3, stride=1, padding=1)
177 | self.P2_conv1 = nn.Conv2d(256, self.out_channels, kernel_size=1, stride=1)
178 | self.P2_conv2 = nn.Conv2d(self.out_channels, self.out_channels, kernel_size=3, stride=1, padding=1)
179 |
180 | def forward(self, C1, C2, C3 ,C4, C5):
181 |
182 | p5_out = self.P5_conv1(C5)
183 |
184 | p4_out = torch.add(self.P4_conv1(C4), F.upsample_nearest(p5_out, scale_factor=2))
185 | p3_out = torch.add(self.P3_conv1(C3), F.upsample_nearest(p4_out, scale_factor=2))
186 | p2_out = torch.add(self.P2_conv1(C2), F.upsample_nearest(p3_out, scale_factor=2))
187 |
188 | p4_out = self.P4_conv2(p4_out)
189 | p3_out = self.P3_conv2(p3_out)
190 | p2_out = self.P2_conv2(p2_out)
191 |
192 | # P6 is used for the 5th anchor scale in RPN. Generated by
193 | # subsampling from P5 with stride of 2.
194 | p6_out = self.P6(p5_out)
195 |
196 | return p2_out, p3_out, p4_out, p5_out, p6_out
197 |
198 |
199 | ############################################################
200 | # Pose Estimation Graph
201 | ############################################################
202 |
203 | class pose_estimation(nn.Module):
204 | def __init__(self, class_num, pretrain=True):
205 | super(pose_estimation, self).__init__()
206 | # self.resnet = ResNet(Bottleneck, [3, 4, 6, 3]) # resnet50
207 | self.resnet = ResNet(Bottleneck, [3, 4, 23, 3]) #resnet101
208 | if pretrain == True:
209 | self.model_path = '/data/xiaobing.wang/.torch/models/resnet101-5d3b4d8f.pth'
210 | self.resnet.load_weights(self.model_path)
211 | self.apply_fix()
212 | self.out_channels = 256
213 | self.fpn = FPN(self.out_channels)
214 |
215 | # self.block = nn.Sequential(nn.Conv2d(256, 128, 1, 1, 0), nn.ReLU(inplace=True),
216 | # nn.Conv2d(128, 128, 3, 1, 1), nn.ReLU(inplace=True),
217 | # nn.Conv2d(128, 128, 3, 1, 1), nn.ReLU(inplace=True),
218 | # nn.Conv2d(128, 128, 3, 1, 1), nn.ReLU(inplace=True),
219 | # nn.Conv2d(128, 256, 3, 1, 1), nn.ReLU(inplace=True)
220 | # )
221 | # self._init_weights(self.block)
222 |
223 | self.predict = nn.Sequential(nn.Conv2d(768, 128, 3, 1, 1), nn.ReLU(inplace=True),
224 | nn.Conv2d(128, 128, 3, 1, 1), nn.ReLU(inplace=True),
225 | nn.Conv2d(128, 128, 3, 1, 1), nn.ReLU(inplace=True),
226 | nn.Conv2d(128, 128, 3, 1, 1), nn.ReLU(inplace=True),
227 | nn.Conv2d(128, 25, 3, 1, 1))
228 | self._init_weights(self.predict)
229 |
230 |
231 |
232 | def _gaussian_init_conv(self, conv):
233 | if isinstance(conv, nn.Conv2d):
234 | conv.weight.data.normal_(0, 0.01)
235 | if conv.bias is not None:
236 | conv.bias.data.zero_()
237 |
238 | def _init_weights(self, model):
239 | for m in model:
240 | if isinstance(m, nn.Conv2d):
241 | m.weight.data.normal_(0, 0.01)
242 | if m.bias is not None:
243 | m.bias.data.zero_()
244 |
245 |
246 | def apply_fix(self):
247 | # 1. fix bn
248 | # 2. fix conv1 conv2
249 | for param in self.resnet.conv1.parameters():
250 | param.requires_grad = False
251 | for param in self.resnet.layer1.parameters():
252 | param.requires_grad = False
253 |
254 | def forward(self, x):
255 | C1, C2, C3, C4, C5 = self.resnet(x)
256 | P2, P3, P4, P5, P6 = self.fpn(C1, C2, C3, C4, C5)
257 |
258 | P4_x2 = F.upsample(P4, scale_factor=2)
259 | P5_x4 = F.upsample(P5, scale_factor=4)
260 | featuer_cat = torch.cat([P3, P4_x2, P5_x4],1)
261 | out = self.predict(featuer_cat)
262 | return out
263 |
--------------------------------------------------------------------------------
/models/bk/CPM_ResNet.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | from __future__ import division
3 | from __future__ import print_function
4 |
5 | import torch
6 | import torch.nn as nn
7 | import math
8 | import torch.nn.functional as F
9 |
10 | ############################################################
11 | # ResNet
12 | ############################################################
13 | def conv3x3(in_planes, out_planes, stride=1):
14 | "3x3 convolution with padding"
15 | return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
16 | padding=1, bias=False)
17 |
18 | class BasicBlock(nn.Module):
19 | expansion = 1
20 |
21 | def __init__(self, inplanes, planes, stride=1, downsample=None):
22 | super(BasicBlock, self).__init__()
23 | self.conv1 = conv3x3(inplanes, planes, stride)
24 | self.bn1 = nn.BatchNorm2d(planes)
25 | self.relu = nn.ReLU(inplace=True)
26 | self.conv2 = conv3x3(planes, planes)
27 | self.bn2 = nn.BatchNorm2d(planes)
28 | self.downsample = downsample
29 | self.stride = stride
30 |
31 | def forward(self, x):
32 | residual = x
33 |
34 | out = self.conv1(x)
35 | out = self.bn1(out)
36 | out = self.relu(out)
37 |
38 | out = self.conv2(out)
39 | out = self.bn2(out)
40 |
41 | if self.downsample is not None:
42 | residual = self.downsample(x)
43 |
44 | out += residual
45 | out = self.relu(out)
46 |
47 | return out
48 |
49 |
50 | class Bottleneck(nn.Module):
51 | expansion = 4
52 |
53 | def __init__(self, inplanes, planes, stride=1, downsample=None):
54 | super(Bottleneck, self).__init__()
55 | self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False)
56 | self.bn1 = nn.BatchNorm2d(planes)
57 | self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride,
58 | padding=1, bias=False)
59 | self.bn2 = nn.BatchNorm2d(planes)
60 | self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False)
61 | self.bn3 = nn.BatchNorm2d(planes * 4)
62 | self.relu = nn.ReLU(inplace=True)
63 | self.downsample = downsample
64 | self.stride = stride
65 |
66 | def forward(self, x):
67 | residual = x
68 |
69 | out = self.conv1(x)
70 | out = self.bn1(out)
71 | out = self.relu(out)
72 |
73 | out = self.conv2(out)
74 | out = self.bn2(out)
75 | out = self.relu(out)
76 |
77 | out = self.conv3(out)
78 | out = self.bn3(out)
79 |
80 | if self.downsample is not None:
81 | residual = self.downsample(x)
82 |
83 | out += residual
84 | out = self.relu(out)
85 |
86 | return out
87 |
88 |
89 | class ResNet(nn.Module):
90 |
91 | def __init__(self, block, layers, num_classes=1000):
92 | self.inplanes = 64
93 | super(ResNet, self).__init__()
94 | self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3,
95 | bias=False)
96 | self.bn1 = nn.BatchNorm2d(64)
97 | self.relu = nn.ReLU(inplace=True)
98 | self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
99 | self.layer1 = self._make_layer(block, 64, layers[0])
100 | self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
101 | self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
102 | self.layer4 = self._make_layer(block, 512, layers[3], stride=2)
103 |
104 | for m in self.modules():
105 | if isinstance(m, nn.Conv2d):
106 | n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
107 | m.weight.data.normal_(0, math.sqrt(2. / n))
108 | elif isinstance(m, nn.BatchNorm2d):
109 | m.weight.data.fill_(1)
110 | m.bias.data.zero_()
111 |
112 | def _make_layer(self, block, planes, blocks, stride=1):
113 | downsample = None
114 | if stride != 1 or self.inplanes != planes * block.expansion:
115 | downsample = nn.Sequential(
116 | nn.Conv2d(self.inplanes, planes * block.expansion,
117 | kernel_size=1, stride=stride, bias=False),
118 | nn.BatchNorm2d(planes * block.expansion),
119 | )
120 |
121 | layers = []
122 | layers.append(block(self.inplanes, planes, stride, downsample))
123 | self.inplanes = planes * block.expansion
124 | for i in range(1, blocks):
125 | layers.append(block(self.inplanes, planes))
126 |
127 | return nn.Sequential(*layers)
128 |
129 | def load_weights(self, path):
130 | model_dict = self.state_dict()
131 | print('loading model from {}'.format(path))
132 | try:
133 | #state_dict = torch.load(self.path)
134 | # self.load_state_dict({k: v for k, v in state_dict.items() if k in self.state_dict()})
135 | pretrained_dict = torch.load(path)
136 | from collections import OrderedDict
137 | tmp = OrderedDict()
138 | for k,v in pretrained_dict.items():
139 | if k in model_dict:
140 | tmp[k] = v
141 | # pretrained_dict = {k: v for k, v in pretrained_dict.items() if k in model_dict}
142 | # model_dict.update(pretrained_dict)
143 | model_dict.update(tmp)
144 | self.load_state_dict(model_dict)
145 | except:
146 | print ('loading model failed, {} may not exist'.format(path))
147 |
148 | def forward(self, x):
149 | x = self.conv1(x)
150 | x = self.bn1(x)
151 | x = self.relu(x)
152 | C1 = self.maxpool(x)
153 |
154 | C2 = self.layer1(C1)
155 | C3 = self.layer2(C2)
156 | C4 = self.layer3(C3)
157 | C5 = self.layer4(C4)
158 |
159 | return C1, C2, C3, C4, C5
160 |
161 | ############################################################
162 | # Pose Estimation Graph
163 | ############################################################
164 |
165 | class pose_estimation(nn.Module):
166 | def __init__(self, class_num, pretrain=True):
167 | super(pose_estimation, self).__init__()
168 | self.resnet = ResNet(Bottleneck, [3, 4, 6, 3]) # resnet50
169 | if pretrain == True:
170 | self.model_path = '/data/xiaobing.wang/.torch/models/resnet50-19c8e357.pth'
171 | self.resnet.load_weights(self.model_path)
172 | self.apply_fix()
173 | self.block = nn.Sequential(nn.Conv2d(512, 128, 1, 1, 0),nn.ReLU(inplace=True),
174 | nn.Conv2d(128, 128, 3, 1, 1), nn.ReLU(inplace=True),
175 | nn.Conv2d(128, 128, 3, 1, 1), nn.ReLU(inplace=True),
176 | nn.Conv2d(128, 128, 3, 1, 1), nn.ReLU(inplace=True),
177 | nn.Conv2d(128, 512, 3, 1, 1), nn.ReLU(inplace=True)
178 | )
179 | self._init_weights(self.block)
180 | self.predict = nn.Conv2d(512, 25, 1, 1, 0)
181 |
182 | def _init_weights(self, model):
183 | for m in model:
184 | if isinstance(m, nn.Conv2d):
185 | m.weight.data.normal_(0, 0.01)
186 | if m.bias is not None:
187 | m.bias.data.zero_()
188 | def apply_fix(self):
189 | # 1. fix bn
190 | # 2. fix conv1 conv2
191 | for param in self.resnet.conv1.parameters():
192 | param.requires_grad = False
193 | for param in self.resnet.layer1.parameters():
194 | param.requires_grad = False
195 |
196 |
197 | def forward(self, x):
198 | C1, C2, C3, C4, C5 = self.resnet(x)
199 |
200 | out = self.block(C3)
201 | predict = self.predict(out)
202 | return predict
--------------------------------------------------------------------------------
/models/bk/CPM_ResNet2.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | from __future__ import division
3 | from __future__ import print_function
4 |
5 | import torch
6 | import torch.nn as nn
7 | import math
8 | import torch.nn.functional as F
9 |
10 | ############################################################
11 | # ResNet
12 | ############################################################
13 | def conv3x3(in_planes, out_planes, stride=1):
14 | "3x3 convolution with padding"
15 | return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
16 | padding=1, bias=False)
17 |
18 | class BasicBlock(nn.Module):
19 | expansion = 1
20 |
21 | def __init__(self, inplanes, planes, stride=1, downsample=None):
22 | super(BasicBlock, self).__init__()
23 | self.conv1 = conv3x3(inplanes, planes, stride)
24 | self.bn1 = nn.BatchNorm2d(planes)
25 | self.relu = nn.ReLU(inplace=True)
26 | self.conv2 = conv3x3(planes, planes)
27 | self.bn2 = nn.BatchNorm2d(planes)
28 | self.downsample = downsample
29 | self.stride = stride
30 |
31 | def forward(self, x):
32 | residual = x
33 |
34 | out = self.conv1(x)
35 | out = self.bn1(out)
36 | out = self.relu(out)
37 |
38 | out = self.conv2(out)
39 | out = self.bn2(out)
40 |
41 | if self.downsample is not None:
42 | residual = self.downsample(x)
43 |
44 | out += residual
45 | out = self.relu(out)
46 |
47 | return out
48 |
49 |
50 | class Bottleneck(nn.Module):
51 | expansion = 4
52 |
53 | def __init__(self, inplanes, planes, stride=1, downsample=None):
54 | super(Bottleneck, self).__init__()
55 | self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False)
56 | self.bn1 = nn.BatchNorm2d(planes)
57 | self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride,
58 | padding=1, bias=False)
59 | self.bn2 = nn.BatchNorm2d(planes)
60 | self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False)
61 | self.bn3 = nn.BatchNorm2d(planes * 4)
62 | self.relu = nn.ReLU(inplace=True)
63 | self.downsample = downsample
64 | self.stride = stride
65 |
66 | def forward(self, x):
67 | residual = x
68 |
69 | out = self.conv1(x)
70 | out = self.bn1(out)
71 | out = self.relu(out)
72 |
73 | out = self.conv2(out)
74 | out = self.bn2(out)
75 | out = self.relu(out)
76 |
77 | out = self.conv3(out)
78 | out = self.bn3(out)
79 |
80 | if self.downsample is not None:
81 | residual = self.downsample(x)
82 |
83 | out += residual
84 | out = self.relu(out)
85 |
86 | return out
87 |
88 |
89 | class ResNet(nn.Module):
90 |
91 | def __init__(self, block, layers, num_classes=1000):
92 | self.inplanes = 64
93 | super(ResNet, self).__init__()
94 | self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3,
95 | bias=False)
96 | self.bn1 = nn.BatchNorm2d(64)
97 | self.relu = nn.ReLU(inplace=True)
98 | self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
99 | self.layer1 = self._make_layer(block, 64, layers[0])
100 | self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
101 | self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
102 | self.layer4 = self._make_layer(block, 512, layers[3], stride=2)
103 |
104 | for m in self.modules():
105 | if isinstance(m, nn.Conv2d):
106 | n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
107 | m.weight.data.normal_(0, math.sqrt(2. / n))
108 | elif isinstance(m, nn.BatchNorm2d):
109 | m.weight.data.fill_(1)
110 | m.bias.data.zero_()
111 |
112 | def _make_layer(self, block, planes, blocks, stride=1):
113 | downsample = None
114 | if stride != 1 or self.inplanes != planes * block.expansion:
115 | downsample = nn.Sequential(
116 | nn.Conv2d(self.inplanes, planes * block.expansion,
117 | kernel_size=1, stride=stride, bias=False),
118 | nn.BatchNorm2d(planes * block.expansion),
119 | )
120 |
121 | layers = []
122 | layers.append(block(self.inplanes, planes, stride, downsample))
123 | self.inplanes = planes * block.expansion
124 | for i in range(1, blocks):
125 | layers.append(block(self.inplanes, planes))
126 |
127 | return nn.Sequential(*layers)
128 |
129 | def load_weights(self, path):
130 | model_dict = self.state_dict()
131 | print('loading model from {}'.format(path))
132 | try:
133 | #state_dict = torch.load(self.path)
134 | # self.load_state_dict({k: v for k, v in state_dict.items() if k in self.state_dict()})
135 | pretrained_dict = torch.load(path)
136 | from collections import OrderedDict
137 | tmp = OrderedDict()
138 | for k,v in pretrained_dict.items():
139 | if k in model_dict:
140 | tmp[k] = v
141 | # pretrained_dict = {k: v for k, v in pretrained_dict.items() if k in model_dict}
142 | # model_dict.update(pretrained_dict)
143 | model_dict.update(tmp)
144 | self.load_state_dict(model_dict)
145 | except:
146 | print ('loading model failed, {} may not exist'.format(path))
147 |
148 | def forward(self, x):
149 | x = self.conv1(x)
150 | x = self.bn1(x)
151 | x = self.relu(x)
152 | C1 = self.maxpool(x)
153 |
154 | C2 = self.layer1(C1)
155 | C3 = self.layer2(C2)
156 | C4 = self.layer3(C3)
157 | C5 = self.layer4(C4)
158 |
159 | return C1, C2, C3, C4, C5
160 |
161 | ############################################################
162 | # Pose Estimation Graph
163 | ############################################################
164 |
165 | class pose_estimation(nn.Module):
166 | def __init__(self, class_num, pretrain=True):
167 | super(pose_estimation, self).__init__()
168 | self.resnet = ResNet(Bottleneck, [3, 4, 6, 3]) # resnet50
169 | if pretrain == True:
170 | self.model_path = '/data/xiaobing.wang/.torch/models/resnet50-19c8e357.pth'
171 | self.resnet.load_weights(self.model_path)
172 | self.apply_fix()
173 | self.block = nn.Sequential(nn.Conv2d(512, 128, 1, 1, 0),nn.ReLU(inplace=True),
174 | )
175 | self._init_weights(self.block)
176 | self.predict = nn.Conv2d(128, 25, 1, 1, 0)
177 |
178 | def _init_weights(self, model):
179 | for m in model:
180 | if isinstance(m, nn.Conv2d):
181 | m.weight.data.normal_(0, 0.01)
182 | if m.bias is not None:
183 | m.bias.data.zero_()
184 | def apply_fix(self):
185 | # 1. fix bn
186 | # 2. fix conv1 conv2
187 | for param in self.resnet.conv1.parameters():
188 | param.requires_grad = False
189 | for param in self.resnet.layer1.parameters():
190 | param.requires_grad = False
191 |
192 |
193 | def forward(self, x):
194 | C1, C2, C3, C4, C5 = self.resnet(x)
195 |
196 | out = self.block(C3)
197 | predict = self.predict(out)
198 | return predict
--------------------------------------------------------------------------------
/models/bk/CPM_ResNet3.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | from __future__ import division
3 | from __future__ import print_function
4 |
5 | import torch
6 | import torch.nn as nn
7 | import math
8 | import torch.nn.functional as F
9 |
10 | ############################################################
11 | # ResNet
12 | ############################################################
13 | def conv3x3(in_planes, out_planes, stride=1):
14 | "3x3 convolution with padding"
15 | return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
16 | padding=1, bias=False)
17 |
18 | class BasicBlock(nn.Module):
19 | expansion = 1
20 |
21 | def __init__(self, inplanes, planes, stride=1, downsample=None):
22 | super(BasicBlock, self).__init__()
23 | self.conv1 = conv3x3(inplanes, planes, stride)
24 | self.bn1 = nn.BatchNorm2d(planes)
25 | self.relu = nn.ReLU(inplace=True)
26 | self.conv2 = conv3x3(planes, planes)
27 | self.bn2 = nn.BatchNorm2d(planes)
28 | self.downsample = downsample
29 | self.stride = stride
30 |
31 | def forward(self, x):
32 | residual = x
33 |
34 | out = self.conv1(x)
35 | out = self.bn1(out)
36 | out = self.relu(out)
37 |
38 | out = self.conv2(out)
39 | out = self.bn2(out)
40 |
41 | if self.downsample is not None:
42 | residual = self.downsample(x)
43 |
44 | out += residual
45 | out = self.relu(out)
46 |
47 | return out
48 |
49 |
50 | class Bottleneck(nn.Module):
51 | expansion = 4
52 |
53 | def __init__(self, inplanes, planes, stride=1, downsample=None):
54 | super(Bottleneck, self).__init__()
55 | self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False)
56 | self.bn1 = nn.BatchNorm2d(planes)
57 | self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride,
58 | padding=1, bias=False)
59 | self.bn2 = nn.BatchNorm2d(planes)
60 | self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False)
61 | self.bn3 = nn.BatchNorm2d(planes * 4)
62 | self.relu = nn.ReLU(inplace=True)
63 | self.downsample = downsample
64 | self.stride = stride
65 |
66 | def forward(self, x):
67 | residual = x
68 |
69 | out = self.conv1(x)
70 | out = self.bn1(out)
71 | out = self.relu(out)
72 |
73 | out = self.conv2(out)
74 | out = self.bn2(out)
75 | out = self.relu(out)
76 |
77 | out = self.conv3(out)
78 | out = self.bn3(out)
79 |
80 | if self.downsample is not None:
81 | residual = self.downsample(x)
82 |
83 | out += residual
84 | out = self.relu(out)
85 |
86 | return out
87 |
88 |
89 | class ResNet(nn.Module):
90 |
91 | def __init__(self, block, layers, num_classes=1000):
92 | self.inplanes = 64
93 | super(ResNet, self).__init__()
94 | self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3,
95 | bias=False)
96 | self.bn1 = nn.BatchNorm2d(64)
97 | self.relu = nn.ReLU(inplace=True)
98 | self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
99 | self.layer1 = self._make_layer(block, 64, layers[0])
100 | self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
101 | self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
102 | self.layer4 = self._make_layer(block, 512, layers[3], stride=2)
103 |
104 | for m in self.modules():
105 | if isinstance(m, nn.Conv2d):
106 | n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
107 | m.weight.data.normal_(0, math.sqrt(2. / n))
108 | elif isinstance(m, nn.BatchNorm2d):
109 | m.weight.data.fill_(1)
110 | m.bias.data.zero_()
111 |
112 | def _make_layer(self, block, planes, blocks, stride=1):
113 | downsample = None
114 | if stride != 1 or self.inplanes != planes * block.expansion:
115 | downsample = nn.Sequential(
116 | nn.Conv2d(self.inplanes, planes * block.expansion,
117 | kernel_size=1, stride=stride, bias=False),
118 | nn.BatchNorm2d(planes * block.expansion),
119 | )
120 |
121 | layers = []
122 | layers.append(block(self.inplanes, planes, stride, downsample))
123 | self.inplanes = planes * block.expansion
124 | for i in range(1, blocks):
125 | layers.append(block(self.inplanes, planes))
126 |
127 | return nn.Sequential(*layers)
128 |
129 | def load_weights(self, path):
130 | model_dict = self.state_dict()
131 | print('loading model from {}'.format(path))
132 | try:
133 | #state_dict = torch.load(self.path)
134 | # self.load_state_dict({k: v for k, v in state_dict.items() if k in self.state_dict()})
135 | pretrained_dict = torch.load(path)
136 | from collections import OrderedDict
137 | tmp = OrderedDict()
138 | for k,v in pretrained_dict.items():
139 | if k in model_dict:
140 | tmp[k] = v
141 | # pretrained_dict = {k: v for k, v in pretrained_dict.items() if k in model_dict}
142 | # model_dict.update(pretrained_dict)
143 | model_dict.update(tmp)
144 | self.load_state_dict(model_dict)
145 | except:
146 | print ('loading model failed, {} may not exist'.format(path))
147 |
148 | def forward(self, x):
149 | x = self.conv1(x)
150 | x = self.bn1(x)
151 | x = self.relu(x)
152 | C1 = self.maxpool(x)
153 |
154 | C2 = self.layer1(C1)
155 | C3 = self.layer2(C2)
156 | C4 = self.layer3(C3)
157 | C5 = self.layer4(C4)
158 |
159 | return C1, C2, C3, C4, C5
160 |
161 | ############################################################
162 | # Pose Estimation Graph
163 | ############################################################
164 |
165 | class pose_estimation(nn.Module):
166 | def __init__(self, class_num, pretrain=True):
167 | super(pose_estimation, self).__init__()
168 | self.resnet = ResNet(Bottleneck, [3, 4, 6, 3]) # resnet50
169 | if pretrain == True:
170 | self.model_path = '/data/xiaobing.wang/.torch/models/resnet50-19c8e357.pth'
171 | self.resnet.load_weights(self.model_path)
172 | self.apply_fix()
173 | self.reduce = nn.Sequential(nn.Conv2d(1024, 256, 1, 1, 0))
174 |
175 | self.block = nn.Sequential(nn.Conv2d(256, 128, 1, 1, 0),nn.ReLU(inplace=True),
176 | nn.Conv2d(128, 128, 3, 1, 1), nn.ReLU(inplace=True),
177 | nn.Conv2d(128, 128, 3, 1, 1), nn.ReLU(inplace=True),
178 | nn.Conv2d(128, 128, 3, 1, 1), nn.ReLU(inplace=True),
179 | nn.Conv2d(128, 512, 3, 1, 1), nn.ReLU(inplace=True)
180 | )
181 | self._init_weights(self.block)
182 | self.predict = nn.Conv2d(512, 25, 1, 1, 0)
183 |
184 | def _init_weights(self, model):
185 | for m in model:
186 | if isinstance(m, nn.Conv2d):
187 | m.weight.data.normal_(0, 0.01)
188 | if m.bias is not None:
189 | m.bias.data.zero_()
190 | def apply_fix(self):
191 | # 1. fix bn
192 | # 2. fix conv1 conv2
193 | for param in self.resnet.conv1.parameters():
194 | param.requires_grad = False
195 | for param in self.resnet.layer1.parameters():
196 | param.requires_grad = False
197 |
198 |
199 | def forward(self, x):
200 | C1, C2, C3, C4, C5 = self.resnet(x)
201 | C4 = self.reduce(C4)
202 | C4 = F.upsample(C4, scale_factor=2)
203 |
204 | out = self.block(C4)
205 | predict = self.predict(out)
206 | return predict
--------------------------------------------------------------------------------
/models/bk/CPM_ResNet4.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | from __future__ import division
3 | from __future__ import print_function
4 |
5 | import torch
6 | import torch.nn as nn
7 | import math
8 | import torch.nn.functional as F
9 |
10 | ############################################################
11 | # ResNet
12 | ############################################################
13 | def conv3x3(in_planes, out_planes, stride=1):
14 | "3x3 convolution with padding"
15 | return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
16 | padding=1, bias=False)
17 |
18 | class BasicBlock(nn.Module):
19 | expansion = 1
20 |
21 | def __init__(self, inplanes, planes, stride=1, downsample=None):
22 | super(BasicBlock, self).__init__()
23 | self.conv1 = conv3x3(inplanes, planes, stride)
24 | self.bn1 = nn.BatchNorm2d(planes)
25 | self.relu = nn.ReLU(inplace=True)
26 | self.conv2 = conv3x3(planes, planes)
27 | self.bn2 = nn.BatchNorm2d(planes)
28 | self.downsample = downsample
29 | self.stride = stride
30 |
31 | def forward(self, x):
32 | residual = x
33 |
34 | out = self.conv1(x)
35 | out = self.bn1(out)
36 | out = self.relu(out)
37 |
38 | out = self.conv2(out)
39 | out = self.bn2(out)
40 |
41 | if self.downsample is not None:
42 | residual = self.downsample(x)
43 |
44 | out += residual
45 | out = self.relu(out)
46 |
47 | return out
48 |
49 |
50 | class Bottleneck(nn.Module):
51 | expansion = 4
52 |
53 | def __init__(self, inplanes, planes, stride=1, downsample=None):
54 | super(Bottleneck, self).__init__()
55 | self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False)
56 | self.bn1 = nn.BatchNorm2d(planes)
57 | self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride,
58 | padding=1, bias=False)
59 | self.bn2 = nn.BatchNorm2d(planes)
60 | self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False)
61 | self.bn3 = nn.BatchNorm2d(planes * 4)
62 | self.relu = nn.ReLU(inplace=True)
63 | self.downsample = downsample
64 | self.stride = stride
65 |
66 | def forward(self, x):
67 | residual = x
68 |
69 | out = self.conv1(x)
70 | out = self.bn1(out)
71 | out = self.relu(out)
72 |
73 | out = self.conv2(out)
74 | out = self.bn2(out)
75 | out = self.relu(out)
76 |
77 | out = self.conv3(out)
78 | out = self.bn3(out)
79 |
80 | if self.downsample is not None:
81 | residual = self.downsample(x)
82 |
83 | out += residual
84 | out = self.relu(out)
85 |
86 | return out
87 |
88 |
89 | class ResNet(nn.Module):
90 |
91 | def __init__(self, block, layers, num_classes=1000):
92 | self.inplanes = 64
93 | super(ResNet, self).__init__()
94 | self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3,
95 | bias=False)
96 | self.bn1 = nn.BatchNorm2d(64)
97 | self.relu = nn.ReLU(inplace=True)
98 | self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
99 | self.layer1 = self._make_layer(block, 64, layers[0])
100 | self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
101 | self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
102 | self.layer4 = self._make_layer(block, 512, layers[3], stride=2)
103 |
104 | for m in self.modules():
105 | if isinstance(m, nn.Conv2d):
106 | n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
107 | m.weight.data.normal_(0, math.sqrt(2. / n))
108 | elif isinstance(m, nn.BatchNorm2d):
109 | m.weight.data.fill_(1)
110 | m.bias.data.zero_()
111 |
112 | def _make_layer(self, block, planes, blocks, stride=1):
113 | downsample = None
114 | if stride != 1 or self.inplanes != planes * block.expansion:
115 | downsample = nn.Sequential(
116 | nn.Conv2d(self.inplanes, planes * block.expansion,
117 | kernel_size=1, stride=stride, bias=False),
118 | nn.BatchNorm2d(planes * block.expansion),
119 | )
120 |
121 | layers = []
122 | layers.append(block(self.inplanes, planes, stride, downsample))
123 | self.inplanes = planes * block.expansion
124 | for i in range(1, blocks):
125 | layers.append(block(self.inplanes, planes))
126 |
127 | return nn.Sequential(*layers)
128 |
129 | def load_weights(self, path):
130 | model_dict = self.state_dict()
131 | print('loading model from {}'.format(path))
132 | try:
133 | #state_dict = torch.load(self.path)
134 | # self.load_state_dict({k: v for k, v in state_dict.items() if k in self.state_dict()})
135 | pretrained_dict = torch.load(path)
136 | from collections import OrderedDict
137 | tmp = OrderedDict()
138 | for k,v in pretrained_dict.items():
139 | if k in model_dict:
140 | tmp[k] = v
141 | # pretrained_dict = {k: v for k, v in pretrained_dict.items() if k in model_dict}
142 | # model_dict.update(pretrained_dict)
143 | model_dict.update(tmp)
144 | self.load_state_dict(model_dict)
145 | except:
146 | print ('loading model failed, {} may not exist'.format(path))
147 |
148 | def forward(self, x):
149 | x = self.conv1(x)
150 | x = self.bn1(x)
151 | x = self.relu(x)
152 | C1 = self.maxpool(x)
153 |
154 | C2 = self.layer1(C1)
155 | C3 = self.layer2(C2)
156 | C4 = self.layer3(C3)
157 | C5 = self.layer4(C4)
158 |
159 | return C1, C2, C3, C4, C5
160 |
161 | ############################################################
162 | # Pose Estimation Graph
163 | ############################################################
164 |
165 | class pose_estimation(nn.Module):
166 | def __init__(self, class_num, pretrain=True):
167 | super(pose_estimation, self).__init__()
168 | self.resnet = ResNet(Bottleneck, [3, 4, 6, 3]) # resnet50
169 | if pretrain == True:
170 | self.model_path = '/data/xiaobing.wang/.torch/models/resnet50-19c8e357.pth'
171 | self.resnet.load_weights(self.model_path)
172 | self.apply_fix()
173 | self.reduce = nn.Sequential(nn.Conv2d(1024, 256, 1, 1, 0))
174 |
175 | self.block = nn.Sequential(nn.Conv2d(256, 128, 1, 1, 0),nn.ReLU(inplace=True),
176 | nn.Conv2d(128, 128, 3, 1, 1), nn.ReLU(inplace=True),
177 | nn.Conv2d(128, 128, 3, 1, 1), nn.ReLU(inplace=True),
178 | nn.Conv2d(128, 128, 3, 1, 1), nn.ReLU(inplace=True),
179 | nn.Conv2d(128, 512, 3, 1, 1), nn.ReLU(inplace=True)
180 | )
181 | self._init_weights(self.block)
182 | self.predict = nn.Conv2d(512, 25, 1, 1, 0)
183 |
184 | def _init_weights(self, model):
185 | for m in model:
186 | if isinstance(m, nn.Conv2d):
187 | m.weight.data.normal_(0, 0.01)
188 | if m.bias is not None:
189 | m.bias.data.zero_()
190 | def apply_fix(self):
191 | # 1. fix bn
192 | # 2. fix conv1 conv2
193 | for param in self.resnet.conv1.parameters():
194 | param.requires_grad = False
195 | for param in self.resnet.layer1.parameters():
196 | param.requires_grad = False
197 |
198 |
199 | def forward(self, x):
200 | C1, C2, C3, C4, C5 = self.resnet(x)
201 | C4 = self.reduce(C4)
202 | C4 = F.upsample(C4, scale_factor=2)
203 |
204 | out = self.block(C4)
205 | predict = self.predict(out)
206 | return predict
--------------------------------------------------------------------------------
/models/bk/CPM_ResNet5.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | from __future__ import division
3 | from __future__ import print_function
4 |
5 | import torch
6 | import torch.nn as nn
7 | import math
8 | import torch.nn.functional as F
9 |
10 | ############################################################
11 | # ResNet
12 | ############################################################
13 | def conv3x3(in_planes, out_planes, stride=1):
14 | "3x3 convolution with padding"
15 | return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
16 | padding=1, bias=False)
17 |
18 | class BasicBlock(nn.Module):
19 | expansion = 1
20 |
21 | def __init__(self, inplanes, planes, stride=1, downsample=None):
22 | super(BasicBlock, self).__init__()
23 | self.conv1 = conv3x3(inplanes, planes, stride)
24 | self.bn1 = nn.BatchNorm2d(planes)
25 | self.relu = nn.ReLU(inplace=True)
26 | self.conv2 = conv3x3(planes, planes)
27 | self.bn2 = nn.BatchNorm2d(planes)
28 | self.downsample = downsample
29 | self.stride = stride
30 |
31 | def forward(self, x):
32 | residual = x
33 |
34 | out = self.conv1(x)
35 | out = self.bn1(out)
36 | out = self.relu(out)
37 |
38 | out = self.conv2(out)
39 | out = self.bn2(out)
40 |
41 | if self.downsample is not None:
42 | residual = self.downsample(x)
43 |
44 | out += residual
45 | out = self.relu(out)
46 |
47 | return out
48 |
49 |
50 | class Bottleneck(nn.Module):
51 | expansion = 4
52 |
53 | def __init__(self, inplanes, planes, stride=1, downsample=None):
54 | super(Bottleneck, self).__init__()
55 | self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False)
56 | self.bn1 = nn.BatchNorm2d(planes)
57 | self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride,
58 | padding=1, bias=False)
59 | self.bn2 = nn.BatchNorm2d(planes)
60 | self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False)
61 | self.bn3 = nn.BatchNorm2d(planes * 4)
62 | self.relu = nn.ReLU(inplace=True)
63 | self.downsample = downsample
64 | self.stride = stride
65 |
66 | def forward(self, x):
67 | residual = x
68 |
69 | out = self.conv1(x)
70 | out = self.bn1(out)
71 | out = self.relu(out)
72 |
73 | out = self.conv2(out)
74 | out = self.bn2(out)
75 | out = self.relu(out)
76 |
77 | out = self.conv3(out)
78 | out = self.bn3(out)
79 |
80 | if self.downsample is not None:
81 | residual = self.downsample(x)
82 |
83 | out += residual
84 | out = self.relu(out)
85 |
86 | return out
87 |
88 |
89 | class ResNet(nn.Module):
90 |
91 | def __init__(self, block, layers, num_classes=1000):
92 | self.inplanes = 64
93 | super(ResNet, self).__init__()
94 | self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3,
95 | bias=False)
96 | self.bn1 = nn.BatchNorm2d(64)
97 | self.relu = nn.ReLU(inplace=True)
98 | self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
99 | self.layer1 = self._make_layer(block, 64, layers[0])
100 | self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
101 | self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
102 | self.layer4 = self._make_layer(block, 512, layers[3], stride=2)
103 |
104 | for m in self.modules():
105 | if isinstance(m, nn.Conv2d):
106 | n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
107 | m.weight.data.normal_(0, math.sqrt(2. / n))
108 | elif isinstance(m, nn.BatchNorm2d):
109 | m.weight.data.fill_(1)
110 | m.bias.data.zero_()
111 |
112 | def _make_layer(self, block, planes, blocks, stride=1):
113 | downsample = None
114 | if stride != 1 or self.inplanes != planes * block.expansion:
115 | downsample = nn.Sequential(
116 | nn.Conv2d(self.inplanes, planes * block.expansion,
117 | kernel_size=1, stride=stride, bias=False),
118 | nn.BatchNorm2d(planes * block.expansion),
119 | )
120 |
121 | layers = []
122 | layers.append(block(self.inplanes, planes, stride, downsample))
123 | self.inplanes = planes * block.expansion
124 | for i in range(1, blocks):
125 | layers.append(block(self.inplanes, planes))
126 |
127 | return nn.Sequential(*layers)
128 |
129 | def load_weights(self, path):
130 | model_dict = self.state_dict()
131 | print('loading model from {}'.format(path))
132 | try:
133 | #state_dict = torch.load(self.path)
134 | # self.load_state_dict({k: v for k, v in state_dict.items() if k in self.state_dict()})
135 | pretrained_dict = torch.load(path)
136 | from collections import OrderedDict
137 | tmp = OrderedDict()
138 | for k,v in pretrained_dict.items():
139 | if k in model_dict:
140 | tmp[k] = v
141 | # pretrained_dict = {k: v for k, v in pretrained_dict.items() if k in model_dict}
142 | # model_dict.update(pretrained_dict)
143 | model_dict.update(tmp)
144 | self.load_state_dict(model_dict)
145 | except:
146 | print ('loading model failed, {} may not exist'.format(path))
147 |
148 | def forward(self, x):
149 | x = self.conv1(x)
150 | x = self.bn1(x)
151 | x = self.relu(x)
152 | C1 = self.maxpool(x)
153 |
154 | C2 = self.layer1(C1)
155 | C3 = self.layer2(C2)
156 | C4 = self.layer3(C3)
157 | C5 = self.layer4(C4)
158 |
159 | return C1, C2, C3, C4, C5
160 |
161 | ############################################################
162 | # Pose Estimation Graph
163 | ############################################################
164 |
165 | class pose_estimation(nn.Module):
166 | def __init__(self, class_num, pretrain=True):
167 | super(pose_estimation, self).__init__()
168 | self.resnet = ResNet(Bottleneck, [3, 4, 6, 3]) # resnet50
169 | if pretrain == True:
170 | self.model_path = '/data/xiaobing.wang/.torch/models/resnet50-19c8e357.pth'
171 | self.resnet.load_weights(self.model_path)
172 | self.apply_fix()
173 | self.reduce_C4 = nn.Sequential(nn.Conv2d(1024, 256, 1, 1, 0))
174 | self.reduce_C5 = nn.Sequential(nn.Conv2d(2048, 256, 1, 1, 0))
175 |
176 | self.block = nn.Sequential(nn.Conv2d(256, 128, 1, 1, 0),nn.ReLU(inplace=True),
177 | nn.Conv2d(128, 128, 3, 1, 1), nn.ReLU(inplace=True),
178 | nn.Conv2d(128, 128, 3, 1, 1), nn.ReLU(inplace=True),
179 | nn.Conv2d(128, 128, 3, 1, 1), nn.ReLU(inplace=True),
180 | nn.Conv2d(128, 512, 3, 1, 1), nn.ReLU(inplace=True)
181 | )
182 | self._init_weights(self.block)
183 | self.predict = nn.Conv2d(512, 25, 1, 1, 0)
184 |
185 | def _init_weights(self, model):
186 | for m in model:
187 | if isinstance(m, nn.Conv2d):
188 | m.weight.data.normal_(0, 0.01)
189 | if m.bias is not None:
190 | m.bias.data.zero_()
191 | def apply_fix(self):
192 | # 1. fix bn
193 | # 2. fix conv1 conv2
194 | for param in self.resnet.conv1.parameters():
195 | param.requires_grad = False
196 | for param in self.resnet.layer1.parameters():
197 | param.requires_grad = False
198 |
199 |
200 | def forward(self, x):
201 | C1, C2, C3, C4, C5 = self.resnet(x)
202 | # C4 = self.reduce_C4(C4)
203 | # C4 = F.upsample(C4, scale_factor=2)
204 |
205 | C5 = self.reduce_C5(C5)
206 | C5 = F.upsample(C5, scale_factor=4)
207 |
208 | out = self.block(C5)
209 | predict = self.predict(out)
210 | return predict
--------------------------------------------------------------------------------
/models/bk/CPM_ResNet6.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | from __future__ import division
3 | from __future__ import print_function
4 |
5 | import torch
6 | import torch.nn as nn
7 | import math
8 | import torch.nn.functional as F
9 |
10 | ############################################################
11 | # ResNet
12 | ############################################################
13 | def conv3x3(in_planes, out_planes, stride=1):
14 | "3x3 convolution with padding"
15 | return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
16 | padding=1, bias=False)
17 |
18 | class BasicBlock(nn.Module):
19 | expansion = 1
20 |
21 | def __init__(self, inplanes, planes, stride=1, downsample=None):
22 | super(BasicBlock, self).__init__()
23 | self.conv1 = conv3x3(inplanes, planes, stride)
24 | self.bn1 = nn.BatchNorm2d(planes)
25 | self.relu = nn.ReLU(inplace=True)
26 | self.conv2 = conv3x3(planes, planes)
27 | self.bn2 = nn.BatchNorm2d(planes)
28 | self.downsample = downsample
29 | self.stride = stride
30 |
31 | def forward(self, x):
32 | residual = x
33 |
34 | out = self.conv1(x)
35 | out = self.bn1(out)
36 | out = self.relu(out)
37 |
38 | out = self.conv2(out)
39 | out = self.bn2(out)
40 |
41 | if self.downsample is not None:
42 | residual = self.downsample(x)
43 |
44 | out += residual
45 | out = self.relu(out)
46 |
47 | return out
48 |
49 |
50 | class Bottleneck(nn.Module):
51 | expansion = 4
52 |
53 | def __init__(self, inplanes, planes, stride=1, downsample=None):
54 | super(Bottleneck, self).__init__()
55 | self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False)
56 | self.bn1 = nn.BatchNorm2d(planes)
57 | self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride,
58 | padding=1, bias=False)
59 | self.bn2 = nn.BatchNorm2d(planes)
60 | self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False)
61 | self.bn3 = nn.BatchNorm2d(planes * 4)
62 | self.relu = nn.ReLU(inplace=True)
63 | self.downsample = downsample
64 | self.stride = stride
65 |
66 | def forward(self, x):
67 | residual = x
68 |
69 | out = self.conv1(x)
70 | out = self.bn1(out)
71 | out = self.relu(out)
72 |
73 | out = self.conv2(out)
74 | out = self.bn2(out)
75 | out = self.relu(out)
76 |
77 | out = self.conv3(out)
78 | out = self.bn3(out)
79 |
80 | if self.downsample is not None:
81 | residual = self.downsample(x)
82 |
83 | out += residual
84 | out = self.relu(out)
85 |
86 | return out
87 |
88 |
89 | class ResNet(nn.Module):
90 |
91 | def __init__(self, block, layers, num_classes=1000):
92 | self.inplanes = 64
93 | super(ResNet, self).__init__()
94 | self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3,
95 | bias=False)
96 | self.bn1 = nn.BatchNorm2d(64)
97 | self.relu = nn.ReLU(inplace=True)
98 | self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
99 | self.layer1 = self._make_layer(block, 64, layers[0])
100 | self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
101 | self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
102 | self.layer4 = self._make_layer(block, 512, layers[3], stride=2)
103 |
104 | for m in self.modules():
105 | if isinstance(m, nn.Conv2d):
106 | n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
107 | m.weight.data.normal_(0, math.sqrt(2. / n))
108 | elif isinstance(m, nn.BatchNorm2d):
109 | m.weight.data.fill_(1)
110 | m.bias.data.zero_()
111 |
112 | def _make_layer(self, block, planes, blocks, stride=1):
113 | downsample = None
114 | if stride != 1 or self.inplanes != planes * block.expansion:
115 | downsample = nn.Sequential(
116 | nn.Conv2d(self.inplanes, planes * block.expansion,
117 | kernel_size=1, stride=stride, bias=False),
118 | nn.BatchNorm2d(planes * block.expansion),
119 | )
120 |
121 | layers = []
122 | layers.append(block(self.inplanes, planes, stride, downsample))
123 | self.inplanes = planes * block.expansion
124 | for i in range(1, blocks):
125 | layers.append(block(self.inplanes, planes))
126 |
127 | return nn.Sequential(*layers)
128 |
129 | def load_weights(self, path):
130 | model_dict = self.state_dict()
131 | print('loading model from {}'.format(path))
132 | try:
133 | #state_dict = torch.load(self.path)
134 | # self.load_state_dict({k: v for k, v in state_dict.items() if k in self.state_dict()})
135 | pretrained_dict = torch.load(path)
136 | from collections import OrderedDict
137 | tmp = OrderedDict()
138 | for k,v in pretrained_dict.items():
139 | if k in model_dict:
140 | tmp[k] = v
141 | # pretrained_dict = {k: v for k, v in pretrained_dict.items() if k in model_dict}
142 | # model_dict.update(pretrained_dict)
143 | model_dict.update(tmp)
144 | self.load_state_dict(model_dict)
145 | except:
146 | print ('loading model failed, {} may not exist'.format(path))
147 |
148 | def forward(self, x):
149 | x = self.conv1(x)
150 | x = self.bn1(x)
151 | x = self.relu(x)
152 | C1 = self.maxpool(x)
153 |
154 | C2 = self.layer1(C1)
155 | C3 = self.layer2(C2)
156 | C4 = self.layer3(C3)
157 | C5 = self.layer4(C4)
158 |
159 | return C1, C2, C3, C4, C5
160 |
161 | ############################################################
162 | # Pose Estimation Graph
163 | ############################################################
164 |
165 | class pose_estimation(nn.Module):
166 | def __init__(self, class_num, pretrain=True):
167 | super(pose_estimation, self).__init__()
168 | self.resnet = ResNet(Bottleneck, [3, 4, 6, 3]) # resnet50
169 | if pretrain == True:
170 | self.model_path = '/data/xiaobing.wang/.torch/models/resnet50-19c8e357.pth'
171 | self.resnet.load_weights(self.model_path)
172 | self.apply_fix()
173 | self.reduce_C4 = nn.Sequential(nn.Conv2d(1024, 256, 1, 1, 0))
174 | self.reduce_C5 = nn.Sequential(nn.Conv2d(2048, 256, 1, 1, 0))
175 |
176 | self.block = nn.Sequential(nn.Conv2d(256, 128, 1, 1, 0),nn.ReLU(inplace=True),
177 | nn.Conv2d(128, 128, 3, 1, 1), nn.ReLU(inplace=True),
178 | nn.Conv2d(128, 128, 3, 1, 1), nn.ReLU(inplace=True),
179 | nn.Conv2d(128, 128, 3, 1, 1), nn.ReLU(inplace=True),
180 | nn.Conv2d(128, 512, 3, 1, 1), nn.ReLU(inplace=True)
181 | )
182 | self._init_weights(self.block)
183 | self.predict = nn.Conv2d(512, 25, 1, 1, 0)
184 |
185 | def _init_weights(self, model):
186 | for m in model:
187 | if isinstance(m, nn.Conv2d):
188 | m.weight.data.normal_(0, 0.01)
189 | if m.bias is not None:
190 | m.bias.data.zero_()
191 | def apply_fix(self):
192 | # 1. fix bn
193 | # 2. fix conv1 conv2
194 | for param in self.resnet.conv1.parameters():
195 | param.requires_grad = False
196 | for param in self.resnet.layer1.parameters():
197 | param.requires_grad = False
198 |
199 |
200 | def forward(self, x):
201 | C1, C2, C3, C4, C5 = self.resnet(x)
202 | C4 = self.reduce_C4(C4)
203 | C4 = F.upsample(C4, scale_factor=2)
204 |
205 | C5 = self.reduce_C5(C5)
206 | C5 = F.upsample(C5, scale_factor=4)
207 |
208 | P4 = C5 + C4
209 |
210 | out = self.block(P4)
211 | predict = self.predict(out)
212 | return predict
--------------------------------------------------------------------------------
/models/bk/CPM_ResNet7.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | from __future__ import division
3 | from __future__ import print_function
4 |
5 | import torch
6 | import torch.nn as nn
7 | import math
8 | import torch.nn.functional as F
9 |
10 | ############################################################
11 | # ResNet
12 | ############################################################
13 | def conv3x3(in_planes, out_planes, stride=1):
14 | "3x3 convolution with padding"
15 | return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
16 | padding=1, bias=False)
17 |
18 | class BasicBlock(nn.Module):
19 | expansion = 1
20 |
21 | def __init__(self, inplanes, planes, stride=1, downsample=None):
22 | super(BasicBlock, self).__init__()
23 | self.conv1 = conv3x3(inplanes, planes, stride)
24 | self.bn1 = nn.BatchNorm2d(planes)
25 | self.relu = nn.ReLU(inplace=True)
26 | self.conv2 = conv3x3(planes, planes)
27 | self.bn2 = nn.BatchNorm2d(planes)
28 | self.downsample = downsample
29 | self.stride = stride
30 |
31 | def forward(self, x):
32 | residual = x
33 |
34 | out = self.conv1(x)
35 | out = self.bn1(out)
36 | out = self.relu(out)
37 |
38 | out = self.conv2(out)
39 | out = self.bn2(out)
40 |
41 | if self.downsample is not None:
42 | residual = self.downsample(x)
43 |
44 | out += residual
45 | out = self.relu(out)
46 |
47 | return out
48 |
49 |
50 | class Bottleneck(nn.Module):
51 | expansion = 4
52 |
53 | def __init__(self, inplanes, planes, stride=1, downsample=None):
54 | super(Bottleneck, self).__init__()
55 | self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False)
56 | self.bn1 = nn.BatchNorm2d(planes)
57 | self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride,
58 | padding=1, bias=False)
59 | self.bn2 = nn.BatchNorm2d(planes)
60 | self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False)
61 | self.bn3 = nn.BatchNorm2d(planes * 4)
62 | self.relu = nn.ReLU(inplace=True)
63 | self.downsample = downsample
64 | self.stride = stride
65 |
66 | def forward(self, x):
67 | residual = x
68 |
69 | out = self.conv1(x)
70 | out = self.bn1(out)
71 | out = self.relu(out)
72 |
73 | out = self.conv2(out)
74 | out = self.bn2(out)
75 | out = self.relu(out)
76 |
77 | out = self.conv3(out)
78 | out = self.bn3(out)
79 |
80 | if self.downsample is not None:
81 | residual = self.downsample(x)
82 |
83 | out += residual
84 | out = self.relu(out)
85 |
86 | return out
87 |
88 |
89 | class ResNet(nn.Module):
90 |
91 | def __init__(self, block, layers, num_classes=1000):
92 | self.inplanes = 64
93 | super(ResNet, self).__init__()
94 | self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3,
95 | bias=False)
96 | self.bn1 = nn.BatchNorm2d(64)
97 | self.relu = nn.ReLU(inplace=True)
98 | self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
99 | self.layer1 = self._make_layer(block, 64, layers[0])
100 | self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
101 | self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
102 | self.layer4 = self._make_layer(block, 512, layers[3], stride=2)
103 |
104 | for m in self.modules():
105 | if isinstance(m, nn.Conv2d):
106 | n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
107 | m.weight.data.normal_(0, math.sqrt(2. / n))
108 | elif isinstance(m, nn.BatchNorm2d):
109 | m.weight.data.fill_(1)
110 | m.bias.data.zero_()
111 |
112 | def _make_layer(self, block, planes, blocks, stride=1):
113 | downsample = None
114 | if stride != 1 or self.inplanes != planes * block.expansion:
115 | downsample = nn.Sequential(
116 | nn.Conv2d(self.inplanes, planes * block.expansion,
117 | kernel_size=1, stride=stride, bias=False),
118 | nn.BatchNorm2d(planes * block.expansion),
119 | )
120 |
121 | layers = []
122 | layers.append(block(self.inplanes, planes, stride, downsample))
123 | self.inplanes = planes * block.expansion
124 | for i in range(1, blocks):
125 | layers.append(block(self.inplanes, planes))
126 |
127 | return nn.Sequential(*layers)
128 |
129 | def load_weights(self, path):
130 | model_dict = self.state_dict()
131 | print('loading model from {}'.format(path))
132 | try:
133 | #state_dict = torch.load(self.path)
134 | # self.load_state_dict({k: v for k, v in state_dict.items() if k in self.state_dict()})
135 | pretrained_dict = torch.load(path)
136 | from collections import OrderedDict
137 | tmp = OrderedDict()
138 | for k,v in pretrained_dict.items():
139 | if k in model_dict:
140 | tmp[k] = v
141 | # pretrained_dict = {k: v for k, v in pretrained_dict.items() if k in model_dict}
142 | # model_dict.update(pretrained_dict)
143 | model_dict.update(tmp)
144 | self.load_state_dict(model_dict)
145 | except:
146 | print ('loading model failed, {} may not exist'.format(path))
147 |
148 | def forward(self, x):
149 | x = self.conv1(x)
150 | x = self.bn1(x)
151 | x = self.relu(x)
152 | C1 = self.maxpool(x)
153 |
154 | C2 = self.layer1(C1)
155 | C3 = self.layer2(C2)
156 | C4 = self.layer3(C3)
157 | C5 = self.layer4(C4)
158 |
159 | return C1, C2, C3, C4, C5
160 |
161 | ############################################################
162 | # Pose Estimation Graph
163 | ############################################################
164 |
165 | class pose_estimation(nn.Module):
166 | def __init__(self, class_num, pretrain=True):
167 | super(pose_estimation, self).__init__()
168 | self.resnet = ResNet(Bottleneck, [3, 4, 6, 3]) # resnet50
169 | if pretrain == True:
170 | self.model_path = '/data/xiaobing.wang/.torch/models/resnet50-19c8e357.pth'
171 | self.resnet.load_weights(self.model_path)
172 | self.apply_fix()
173 | self.reduce_C4 = nn.Sequential(nn.Conv2d(1024, 256, 1, 1, 0))
174 | self.reduce_C5 = nn.Sequential(nn.Conv2d(2048, 256, 1, 1, 0))
175 |
176 | self.block = nn.Sequential(nn.Conv2d(256, 128, 1, 1, 0),nn.ReLU(inplace=True),
177 | nn.Conv2d(128, 128, 3, 1, 1), nn.ReLU(inplace=True),
178 | nn.Conv2d(128, 128, 3, 1, 1), nn.ReLU(inplace=True),
179 | nn.Conv2d(128, 128, 3, 1, 1), nn.ReLU(inplace=True),
180 | nn.Conv2d(128, 512, 3, 1, 1), nn.ReLU(inplace=True)
181 | )
182 | self._init_weights(self.block)
183 | self.predict = nn.Conv2d(512, 25, 1, 1, 0)
184 |
185 | def _init_weights(self, model):
186 | for m in model:
187 | if isinstance(m, nn.Conv2d):
188 | m.weight.data.normal_(0, 0.01)
189 | if m.bias is not None:
190 | m.bias.data.zero_()
191 | def apply_fix(self):
192 | # 1. fix bn
193 | # 2. fix conv1 conv2
194 | for param in self.resnet.conv1.parameters():
195 | param.requires_grad = False
196 | for param in self.resnet.layer1.parameters():
197 | param.requires_grad = False
198 |
199 |
200 | def forward(self, x):
201 | C1, C2, C3, C4, C5 = self.resnet(x)
202 | C4 = self.reduce_C4(C4)
203 | C4 = F.upsample(C4, scale_factor=2)
204 |
205 | C5 = self.reduce_C5(C5)
206 | C5 = F.upsample(C5, scale_factor=4)
207 |
208 | P4 = C5 + C4
209 |
210 | out = self.block(P4)
211 | predict = self.predict(out)
212 | return predict
--------------------------------------------------------------------------------
/models/bk/FPN.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 | import math
4 | import torch.nn.functional as F
5 |
6 | ############################################################
7 | # ResNet
8 | ############################################################
9 | def conv3x3(in_planes, out_planes, stride=1):
10 | "3x3 convolution with padding"
11 | return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
12 | padding=1, bias=False)
13 |
14 | class BasicBlock(nn.Module):
15 | expansion = 1
16 |
17 | def __init__(self, inplanes, planes, stride=1, downsample=None):
18 | super(BasicBlock, self).__init__()
19 | self.conv1 = conv3x3(inplanes, planes, stride)
20 | self.bn1 = nn.BatchNorm2d(planes)
21 | self.relu = nn.ReLU(inplace=True)
22 | self.conv2 = conv3x3(planes, planes)
23 | self.bn2 = nn.BatchNorm2d(planes)
24 | self.downsample = downsample
25 | self.stride = stride
26 |
27 | def forward(self, x):
28 | residual = x
29 |
30 | out = self.conv1(x)
31 | out = self.bn1(out)
32 | out = self.relu(out)
33 |
34 | out = self.conv2(out)
35 | out = self.bn2(out)
36 |
37 | if self.downsample is not None:
38 | residual = self.downsample(x)
39 |
40 | out += residual
41 | out = self.relu(out)
42 |
43 | return out
44 |
45 |
46 | class Bottleneck(nn.Module):
47 | expansion = 4
48 |
49 | def __init__(self, inplanes, planes, stride=1, downsample=None):
50 | super(Bottleneck, self).__init__()
51 | self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False)
52 | self.bn1 = nn.BatchNorm2d(planes)
53 | self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride,
54 | padding=1, bias=False)
55 | self.bn2 = nn.BatchNorm2d(planes)
56 | self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False)
57 | self.bn3 = nn.BatchNorm2d(planes * 4)
58 | self.relu = nn.ReLU(inplace=True)
59 | self.downsample = downsample
60 | self.stride = stride
61 |
62 | def forward(self, x):
63 | residual = x
64 |
65 | out = self.conv1(x)
66 | out = self.bn1(out)
67 | out = self.relu(out)
68 |
69 | out = self.conv2(out)
70 | out = self.bn2(out)
71 | out = self.relu(out)
72 |
73 | out = self.conv3(out)
74 | out = self.bn3(out)
75 |
76 | if self.downsample is not None:
77 | residual = self.downsample(x)
78 |
79 | out += residual
80 | out = self.relu(out)
81 |
82 | return out
83 |
84 |
85 | class ResNet(nn.Module):
86 |
87 | def __init__(self, block, layers, num_classes=1000):
88 | self.inplanes = 64
89 | super(ResNet, self).__init__()
90 | self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3,
91 | bias=False)
92 | self.bn1 = nn.BatchNorm2d(64)
93 | self.relu = nn.ReLU(inplace=True)
94 | self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
95 | self.layer1 = self._make_layer(block, 64, layers[0])
96 | self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
97 | self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
98 | self.layer4 = self._make_layer(block, 512, layers[3], stride=2)
99 |
100 | for m in self.modules():
101 | if isinstance(m, nn.Conv2d):
102 | n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
103 | m.weight.data.normal_(0, math.sqrt(2. / n))
104 | elif isinstance(m, nn.BatchNorm2d):
105 | m.weight.data.fill_(1)
106 | m.bias.data.zero_()
107 |
108 | def _make_layer(self, block, planes, blocks, stride=1):
109 | downsample = None
110 | if stride != 1 or self.inplanes != planes * block.expansion:
111 | downsample = nn.Sequential(
112 | nn.Conv2d(self.inplanes, planes * block.expansion,
113 | kernel_size=1, stride=stride, bias=False),
114 | nn.BatchNorm2d(planes * block.expansion),
115 | )
116 |
117 | layers = []
118 | layers.append(block(self.inplanes, planes, stride, downsample))
119 | self.inplanes = planes * block.expansion
120 | for i in range(1, blocks):
121 | layers.append(block(self.inplanes, planes))
122 |
123 | return nn.Sequential(*layers)
124 |
125 | def forward(self, x):
126 | x = self.conv1(x)
127 | x = self.bn1(x)
128 | x = self.relu(x)
129 | C1 = self.maxpool(x)
130 |
131 | C2 = self.layer1(C1)
132 | C3 = self.layer2(C2)
133 | C4 = self.layer3(C3)
134 | C5 = self.layer4(C4)
135 |
136 | return C1, C2, C3, C4, C5
137 |
138 | ############################################################
139 | # FPN Graph
140 | ############################################################
141 |
142 | class FPN(nn.Module):
143 | def __init__(self, out_channels):
144 | super(FPN, self).__init__()
145 | self.out_channels = out_channels
146 | self.P6 = nn.MaxPool2d(kernel_size=1, stride=2, padding=0, ceil_mode=False)
147 | self.P5_conv1 = nn.Conv2d(2048, self.out_channels, kernel_size=1, stride=1)
148 | self.P5_conv2 = nn.Conv2d(self.out_channels, self.out_channels, kernel_size=3, stride=1),
149 | self.P4_conv1 = nn.Conv2d(1024, self.out_channels, kernel_size=1, stride=1)
150 | self.P4_conv2 = nn.Conv2d(self.out_channels, self.out_channels, kernel_size=3, stride=1),
151 |
152 | self.P3_conv1 = nn.Conv2d(512, self.out_channels, kernel_size=1, stride=1)
153 | self.P3_conv2 = nn.Conv2d(self.out_channels, self.out_channels, kernel_size=3, stride=1)
154 | self.P2_conv1 = nn.Conv2d(256, self.out_channels, kernel_size=1, stride=1)
155 | self.P2_conv2 = nn.Conv2d(self.out_channels, self.out_channels, kernel_size=3, stride=1)
156 |
157 |
158 | def forward(self, C1, C2, C3 ,C4, C5):
159 |
160 | p5_out = self.P5_conv1(x)
161 | p4_out = self.P4_conv1(c4_out) + F.upsample(p5_out, scale_factor=2)
162 | p3_out = self.P3_conv1(c3_out) + F.upsample(p4_out, scale_factor=2)
163 | p2_out = self.P2_conv1(c2_out) + F.upsample(p3_out, scale_factor=2)
164 |
165 | p5_out = self.P5_conv2(p5_out)
166 | p4_out = self.P4_conv2(p4_out)
167 | p3_out = self.P3_conv2(p3_out)
168 | p2_out = self.P2_conv2(p2_out)
169 |
170 | # P6 is used for the 5th anchor scale in RPN. Generated by
171 | # subsampling from P5 with stride of 2.
172 | p6_out = self.P6(p5_out)
173 |
174 | return p2_out, p3_out, p4_out, p5_out, p6_out
175 |
176 | ############################################################
177 | # Pose Estimation Graph
178 | ############################################################
179 |
180 | class pose_estimation(nn.Module):
181 | def __init__(self, pretrain=True):
182 | self.resnet50 = ResNet(Bottleneck, [3, 4, 6, 3])
183 | if pretrain == True:
184 | self.model_path = '/data/xiaobing.wang/.torch/models/resnet50-19c8e357.pth'
185 | state_dict = torch.load(self.model_path)
186 | self.resnet50.load_state_dict(state_dict)
187 | self.fpn = FPN(out_channels)
188 |
189 |
190 |
--------------------------------------------------------------------------------
/models/bk/hourglass.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 | import torchvision.models as models
4 | import torch.nn.functional as F
5 |
6 |
7 | __all__ = ['HourglassNet', 'hg']
8 |
9 | class Bottleneck(nn.Module):
10 | expansion = 2
11 |
12 | def __init__(self, inplanes, planes, stride=1, downsample=None):
13 | super(Bottleneck, self).__init__()
14 |
15 | self.bn1 = nn.BatchNorm2d(inplanes)
16 | self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=True)
17 | self.bn2 = nn.BatchNorm2d(planes)
18 | self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride,
19 | padding=1, bias=True)
20 | self.bn3 = nn.BatchNorm2d(planes)
21 | self.conv3 = nn.Conv2d(planes, planes * 2, kernel_size=1, bias=True)
22 | self.relu = nn.ReLU(inplace=True)
23 | self.downsample = downsample
24 | self.stride = stride
25 |
26 | def forward(self, x):
27 | residual = x
28 |
29 | out = self.bn1(x)
30 | out = self.relu(out)
31 | out = self.conv1(out)
32 |
33 | out = self.bn2(out)
34 | out = self.relu(out)
35 | out = self.conv2(out)
36 |
37 | out = self.bn3(out)
38 | out = self.relu(out)
39 | out = self.conv3(out)
40 |
41 | if self.downsample is not None:
42 | residual = self.downsample(x)
43 |
44 | out += residual
45 |
46 | return out
47 |
48 |
49 | class Hourglass(nn.Module):
50 | def __init__(self, block, num_blocks, planes, depth):
51 | super(Hourglass, self).__init__()
52 | self.depth = depth
53 | self.block = block
54 | self.upsample = nn.Upsample(scale_factor=2)
55 | self.hg = self._make_hour_glass(block, num_blocks, planes, depth)
56 |
57 | def _make_residual(self, block, num_blocks, planes):
58 | layers = []
59 | for i in range(0, num_blocks):
60 | layers.append(block(planes*block.expansion, planes))
61 | return nn.Sequential(*layers)
62 |
63 | def _make_hour_glass(self, block, num_blocks, planes, depth):
64 | hg = []
65 | for i in range(depth):
66 | res = []
67 | for j in range(3):
68 | res.append(self._make_residual(block, num_blocks, planes))
69 | if i == 0:
70 | res.append(self._make_residual(block, num_blocks, planes))
71 | hg.append(nn.ModuleList(res))
72 | return nn.ModuleList(hg)
73 |
74 | def _hour_glass_forward(self, n, x):
75 | up1 = self.hg[n-1][0](x)
76 | low1 = F.max_pool2d(x, 2, stride=2)
77 | low1 = self.hg[n-1][1](low1)
78 |
79 | if n > 1:
80 | low2 = self._hour_glass_forward(n-1, low1)
81 | else:
82 | low2 = self.hg[n-1][3](low1)
83 | low3 = self.hg[n-1][2](low2)
84 | up2 = self.upsample(low3)
85 | out = up1 + up2
86 | return out
87 |
88 | def forward(self, x):
89 | return self._hour_glass_forward(self.depth, x)
90 |
91 |
92 | class HourglassNet(nn.Module):
93 | '''Hourglass model from Newell et al ECCV 2016'''
94 | def __init__(self, block, num_stacks=2, num_blocks=4, num_classes=16):
95 | super(HourglassNet, self).__init__()
96 |
97 | self.inplanes = 64
98 | self.num_feats = 128
99 | self.num_stacks = num_stacks
100 | self.conv1 = nn.Conv2d(3, self.inplanes, kernel_size=7, stride=2, padding=3,
101 | bias=True)
102 | self.bn1 = nn.BatchNorm2d(self.inplanes)
103 | self.relu = nn.ReLU(inplace=True)
104 | self.layer1 = self._make_residual(block, self.inplanes, 1)
105 | self.layer2 = self._make_residual(block, self.inplanes, 1)
106 | self.layer3 = self._make_residual(block, self.num_feats, 1)
107 | self.maxpool = nn.MaxPool2d(2, stride=2)
108 |
109 | # build hourglass modules
110 | ch = self.num_feats*block.expansion
111 | hg, res, fc, score, fc_, score_ = [], [], [], [], [], []
112 | for i in range(num_stacks):
113 | hg.append(Hourglass(block, num_blocks, self.num_feats, 4))
114 | res.append(self._make_residual(block, self.num_feats, num_blocks))
115 | fc.append(self._make_fc(ch, ch))
116 | score.append(nn.Conv2d(ch, num_classes, kernel_size=1, bias=True))
117 | if i < num_stacks-1:
118 | fc_.append(nn.Conv2d(ch, ch, kernel_size=1, bias=True))
119 | score_.append(nn.Conv2d(num_classes, ch, kernel_size=1, bias=True))
120 | self.hg = nn.ModuleList(hg)
121 | self.res = nn.ModuleList(res)
122 | self.fc = nn.ModuleList(fc)
123 | self.score = nn.ModuleList(score)
124 | self.fc_ = nn.ModuleList(fc_)
125 | self.score_ = nn.ModuleList(score_)
126 |
127 | def _make_residual(self, block, planes, blocks, stride=1):
128 | downsample = None
129 | if stride != 1 or self.inplanes != planes * block.expansion:
130 | downsample = nn.Sequential(
131 | nn.Conv2d(self.inplanes, planes * block.expansion,
132 | kernel_size=1, stride=stride, bias=True),
133 | )
134 |
135 | layers = []
136 | layers.append(block(self.inplanes, planes, stride, downsample))
137 | self.inplanes = planes * block.expansion
138 | for i in range(1, blocks):
139 | layers.append(block(self.inplanes, planes))
140 |
141 | return nn.Sequential(*layers)
142 |
143 | def _make_fc(self, inplanes, outplanes):
144 | bn = nn.BatchNorm2d(inplanes)
145 | conv = nn.Conv2d(inplanes, outplanes, kernel_size=1, bias=True)
146 | return nn.Sequential(
147 | conv,
148 | bn,
149 | self.relu,
150 | )
151 |
152 | def forward(self, x):
153 | out = []
154 | x = self.conv1(x)
155 | x = self.bn1(x)
156 | x = self.relu(x)
157 |
158 | x = self.layer1(x)
159 | x = self.maxpool(x)
160 | x = self.layer2(x)
161 | x = self.layer3(x)
162 |
163 | for i in range(self.num_stacks):
164 | y = self.hg[i](x)
165 | y = self.res[i](y)
166 | y = self.fc[i](y)
167 | score = self.score[i](y)
168 | out.append(score)
169 | if i < self.num_stacks-1:
170 | fc_ = self.fc_[i](y)
171 | score_ = self.score_[i](score)
172 | x = x + fc_ + score_
173 |
174 | return out
175 |
176 |
177 | def hg(num_stacks, num_blocks, num_classes):
178 | model = HourglassNet(Bottleneck, num_stacks=num_stacks, num_blocks=num_blocks,
179 | num_classes=num_classes)
180 | return model
--------------------------------------------------------------------------------
/models/hourglass.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 | import torchvision.models as models
4 | import torch.nn.functional as F
5 |
6 |
7 | __all__ = ['HourglassNet', 'hg']
8 |
9 | class Bottleneck(nn.Module):
10 | expansion = 2
11 |
12 | def __init__(self, inplanes, planes, stride=1, downsample=None):
13 | super(Bottleneck, self).__init__()
14 |
15 | self.bn1 = nn.BatchNorm2d(inplanes)
16 | self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=True)
17 | self.bn2 = nn.BatchNorm2d(planes)
18 | self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride,
19 | padding=1, bias=True)
20 | self.bn3 = nn.BatchNorm2d(planes)
21 | self.conv3 = nn.Conv2d(planes, planes * 2, kernel_size=1, bias=True)
22 | self.relu = nn.ReLU(inplace=True)
23 | self.downsample = downsample
24 | self.stride = stride
25 |
26 | def forward(self, x):
27 | residual = x
28 |
29 | out = self.bn1(x)
30 | out = self.relu(out)
31 | out = self.conv1(out)
32 |
33 | out = self.bn2(out)
34 | out = self.relu(out)
35 | out = self.conv2(out)
36 |
37 | out = self.bn3(out)
38 | out = self.relu(out)
39 | out = self.conv3(out)
40 |
41 | if self.downsample is not None:
42 | residual = self.downsample(x)
43 |
44 | out += residual
45 |
46 | return out
47 |
48 |
49 | class Hourglass(nn.Module):
50 | def __init__(self, block, num_blocks, planes, depth):
51 | super(Hourglass, self).__init__()
52 | self.depth = depth
53 | self.block = block
54 | self.upsample = nn.Upsample(scale_factor=2)
55 | self.hg = self._make_hour_glass(block, num_blocks, planes, depth)
56 |
57 | def _make_residual(self, block, num_blocks, planes):
58 | layers = []
59 | for i in range(0, num_blocks):
60 | layers.append(block(planes*block.expansion, planes))
61 | return nn.Sequential(*layers)
62 |
63 | def _make_hour_glass(self, block, num_blocks, planes, depth):
64 | hg = []
65 | for i in range(depth):
66 | res = []
67 | for j in range(3):
68 | res.append(self._make_residual(block, num_blocks, planes))
69 | if i == 0:
70 | res.append(self._make_residual(block, num_blocks, planes))
71 | hg.append(nn.ModuleList(res))
72 | return nn.ModuleList(hg)
73 |
74 | def _hour_glass_forward(self, n, x):
75 | up1 = self.hg[n-1][0](x)
76 | low1 = F.max_pool2d(x, 2, stride=2)
77 | low1 = self.hg[n-1][1](low1)
78 |
79 | if n > 1:
80 | low2 = self._hour_glass_forward(n-1, low1)
81 | else:
82 | low2 = self.hg[n-1][3](low1)
83 | low3 = self.hg[n-1][2](low2)
84 | up2 = self.upsample(low3)
85 | out = up1 + up2
86 | return out
87 |
88 | def forward(self, x):
89 | return self._hour_glass_forward(self.depth, x)
90 |
91 |
92 | class HourglassNet(nn.Module):
93 | '''Hourglass model from Newell et al ECCV 2016'''
94 | def __init__(self, block, num_stacks=2, num_blocks=4, num_classes=16):
95 | super(HourglassNet, self).__init__()
96 |
97 | self.inplanes = 64
98 | self.num_feats = 128
99 | self.num_stacks = num_stacks
100 | self.conv1 = nn.Conv2d(3, self.inplanes, kernel_size=7, stride=2, padding=3,
101 | bias=True)
102 | self.bn1 = nn.BatchNorm2d(self.inplanes)
103 | self.relu = nn.ReLU(inplace=True)
104 | self.layer1 = self._make_residual(block, self.inplanes, 1)
105 | self.layer2 = self._make_residual(block, self.inplanes, 1)
106 | self.layer3 = self._make_residual(block, self.num_feats, 1)
107 | self.maxpool = nn.MaxPool2d(2, stride=2)
108 |
109 | # build hourglass modules
110 | ch = self.num_feats*block.expansion
111 | hg, res, fc, score, fc_, score_ = [], [], [], [], [], []
112 | for i in range(num_stacks):
113 | hg.append(Hourglass(block, num_blocks, self.num_feats, 4))
114 | res.append(self._make_residual(block, self.num_feats, num_blocks))
115 | fc.append(self._make_fc(ch, ch))
116 | score.append(nn.Conv2d(ch, num_classes, kernel_size=1, bias=True))
117 | if i < num_stacks-1:
118 | fc_.append(nn.Conv2d(ch, ch, kernel_size=1, bias=True))
119 | score_.append(nn.Conv2d(num_classes, ch, kernel_size=1, bias=True))
120 | self.hg = nn.ModuleList(hg)
121 | self.res = nn.ModuleList(res)
122 | self.fc = nn.ModuleList(fc)
123 | self.score = nn.ModuleList(score)
124 | self.fc_ = nn.ModuleList(fc_)
125 | self.score_ = nn.ModuleList(score_)
126 |
127 | def _make_residual(self, block, planes, blocks, stride=1):
128 | downsample = None
129 | if stride != 1 or self.inplanes != planes * block.expansion:
130 | downsample = nn.Sequential(
131 | nn.Conv2d(self.inplanes, planes * block.expansion,
132 | kernel_size=1, stride=stride, bias=True),
133 | )
134 |
135 | layers = []
136 | layers.append(block(self.inplanes, planes, stride, downsample))
137 | self.inplanes = planes * block.expansion
138 | for i in range(1, blocks):
139 | layers.append(block(self.inplanes, planes))
140 |
141 | return nn.Sequential(*layers)
142 |
143 | def _make_fc(self, inplanes, outplanes):
144 | bn = nn.BatchNorm2d(inplanes)
145 | conv = nn.Conv2d(inplanes, outplanes, kernel_size=1, bias=True)
146 | return nn.Sequential(
147 | conv,
148 | bn,
149 | self.relu,
150 | )
151 |
152 | def forward(self, x):
153 | out = []
154 | x = self.conv1(x)
155 | x = self.bn1(x)
156 | x = self.relu(x)
157 |
158 | x = self.layer1(x)
159 | x = self.maxpool(x)
160 | x = self.layer2(x)
161 | x = self.layer3(x)
162 |
163 | for i in range(self.num_stacks):
164 | y = self.hg[i](x)
165 | y = self.res[i](y)
166 | y = self.fc[i](y)
167 | score = self.score[i](y)
168 | out.append(score)
169 | if i < self.num_stacks-1:
170 | fc_ = self.fc_[i](y)
171 | score_ = self.score_[i](score)
172 | x = x + fc_ + score_
173 |
174 | return out
175 |
176 |
177 | def hg(num_stacks, num_blocks, num_classes):
178 | model = HourglassNet(Bottleneck, num_stacks=num_stacks, num_blocks=num_blocks,
179 | num_classes=num_classes)
180 | return model
--------------------------------------------------------------------------------
/util.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from easydict import EasyDict as edict
3 | import yaml
4 | import math
5 | def padRightDownCorner(img, stride, padValue):
6 | h = img.shape[0]
7 | w = img.shape[1]
8 |
9 | pad = 4 * [None]
10 | pad[0] = 0 # up
11 | pad[1] = 0 # left
12 | pad[2] = 0 if (h%stride==0) else stride - (h % stride) # down
13 | pad[3] = 0 if (w%stride==0) else stride - (w % stride) # right
14 |
15 | img_padded = img
16 | pad_up = np.tile(img_padded[0:1,:,:]*0 + padValue, (pad[0], 1, 1))
17 | img_padded = np.concatenate((pad_up, img_padded), axis=0)
18 | pad_left = np.tile(img_padded[:,0:1,:]*0 + padValue, (1, pad[1], 1))
19 | img_padded = np.concatenate((pad_left, img_padded), axis=1)
20 | pad_down = np.tile(img_padded[-2:-1,:,:]*0 + padValue, (pad[2], 1, 1))
21 | img_padded = np.concatenate((img_padded, pad_down), axis=0)
22 | pad_right = np.tile(img_padded[:,-2:-1,:]*0 + padValue, (1, pad[3], 1))
23 | img_padded = np.concatenate((img_padded, pad_right), axis=1)
24 |
25 | return img_padded, pad
26 |
27 |
28 | def get_transform(center, scale, res, rot=0):
29 | # Generate transformation matrix
30 | h = 200 * scale
31 | t = np.zeros((3, 3))
32 | t[0, 0] = float(res[1]) / h
33 | t[1, 1] = float(res[0]) / h
34 | t[0, 2] = res[1] * (-float(center[0]) / h + .5)
35 | t[1, 2] = res[0] * (-float(center[1]) / h + .5)
36 | t[2, 2] = 1
37 | if not rot == 0:
38 | rot = -rot # To match direction of rotation from cropping
39 | rot_mat = np.zeros((3,3))
40 | rot_rad = rot * np.pi / 180
41 | sn,cs = np.sin(rot_rad), np.cos(rot_rad)
42 | rot_mat[0,:2] = [cs, -sn]
43 | rot_mat[1,:2] = [sn, cs]
44 | rot_mat[2,2] = 1
45 | # Need to rotate around center
46 | t_mat = np.eye(3)
47 | t_mat[0,2] = -res[1]/2
48 | t_mat[1,2] = -res[0]/2
49 | t_inv = t_mat.copy()
50 | t_inv[:2,2] *= -1
51 | t = np.dot(t_inv,np.dot(rot_mat,np.dot(t_mat,t)))
52 | return t
53 |
54 | def kpt_affine(kpt, mat):
55 | shape = kpt.shape
56 | kpt = kpt.reshape(-1, 2)
57 | return np.dot( np.concatenate((kpt, kpt[:, 0:1]*0+1), axis = 1), mat.T ).reshape(shape)
58 |
59 | def Config(filename):
60 |
61 | with open(filename, 'r') as f:
62 | parser = edict(yaml.load(f))
63 | for x in parser:
64 | print '{}: {}'.format(x, parser[x])
65 | return parser
66 |
67 | def adjust_learning_rate(optimizer, iters, base_lr, policy_parameter, policy='step', multiple=None):
68 |
69 | if policy == 'fixed':
70 | lr = base_lr
71 | elif policy == 'step':
72 | lr = base_lr * (policy_parameter['gamma'] ** (iters // policy_parameter['step_size']))
73 | elif policy == 'exp':
74 | lr = base_lr * (policy_parameter['gamma'] ** iters)
75 | elif policy == 'inv':
76 | lr = base_lr * ((1 + policy_parameter['gamma'] * iters) ** (-policy_parameter['power']))
77 | elif policy == 'multistep':
78 | lr = base_lr
79 | for stepvalue in policy_parameter['stepvalue']:
80 | if iters >= stepvalue:
81 | lr *= policy_parameter['gamma']
82 | else:
83 | break
84 | elif policy == 'poly':
85 | lr = base_lr * ((1 - iters * 1.0 / policy_parameter['max_iter']) ** policy_parameter['power'])
86 | elif policy == 'sigmoid':
87 | lr = base_lr * (1.0 / (1 + math.exp(-policy_parameter['gamma'] * (iters - policy_parameter['stepsize']))))
88 | elif policy == 'multistep-poly':
89 | lr = base_lr
90 | stepstart = 0
91 | stepend = policy_parameter['max_iter']
92 | for stepvalue in policy_parameter['stepvalue']:
93 | if iters >= stepvalue:
94 | lr *= policy_parameter['gamma']
95 | stepstart = stepvalue
96 | else:
97 | stepend = stepvalue
98 | break
99 | lr = max(lr * policy_parameter['gamma'], lr * (1 - (iters - stepstart) * 1.0 / (stepend - stepstart)) ** policy_parameter['power'])
100 |
101 | if multiple != None:
102 | for i, param_group in enumerate(optimizer.param_groups):
103 | param_group['lr'] = lr * multiple[i]
104 | else:
105 | for i, param_group in enumerate(optimizer.param_groups):
106 | param_group['lr'] = lr
107 | return lr
108 |
109 |
110 | class AverageMeter(object):
111 | """ Computes ans stores the average and current value"""
112 | def __init__(self):
113 | self.reset()
114 |
115 | def reset(self):
116 | self.val = 0.
117 | self.avg = 0.
118 | self.sum = 0.
119 | self.count = 0
120 |
121 | def update(self, val, n=1):
122 | self.val = val
123 | self.sum += val * n
124 | self.count += n
125 | self.avg = self.sum / self.count
126 |
--------------------------------------------------------------------------------
/vis_input.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": null,
6 | "metadata": {
7 | "collapsed": true
8 | },
9 | "outputs": [],
10 | "source": [
11 | "import os\n"
12 | ]
13 | }
14 | ],
15 | "metadata": {
16 | "kernelspec": {
17 | "display_name": "Python 2",
18 | "language": "python",
19 | "name": "python2"
20 | },
21 | "language_info": {
22 | "codemirror_mode": {
23 | "name": "ipython",
24 | "version": 2
25 | },
26 | "file_extension": ".py",
27 | "mimetype": "text/x-python",
28 | "name": "python",
29 | "nbconvert_exporter": "python",
30 | "pygments_lexer": "ipython2",
31 | "version": "2.7.6"
32 | }
33 | },
34 | "nbformat": 4,
35 | "nbformat_minor": 0
36 | }
37 |
--------------------------------------------------------------------------------