├── .gitignore ├── README.md ├── RPN ├── config.py ├── inference.py └── model │ ├── pose_deploy.prototxt │ └── pose_iter_265000.caffemodel ├── data ├── format_3dpes.py ├── format_cuhk01.py ├── format_cuhk02.py ├── format_cuhk03.py ├── format_ilids.py ├── format_market1501.py ├── format_prid.py ├── format_psdb.py ├── format_shinpuhkan.py └── format_viper.py ├── models ├── base │ ├── base_solver.prototxt │ ├── base_test.prototxt │ └── base_trainval.prototxt ├── body │ ├── body_solver.prototxt │ ├── body_test.prototxt │ └── body_trainval.prototxt ├── head │ ├── head_solver.prototxt │ ├── head_test.prototxt │ └── head_trainval.prototxt ├── larm │ ├── larm_solver.prototxt │ ├── larm_test.prototxt │ └── larm_trainval.prototxt ├── leg │ ├── leg_solver.prototxt │ ├── leg_test.prototxt │ └── leg_trainval.prototxt ├── lleg │ ├── lleg_solver.prototxt │ ├── lleg_test.prototxt │ └── lleg_trainval.prototxt ├── new │ ├── new_solver.prototxt │ └── new_trainval.prototxt ├── rarm │ ├── rarm_solver.prototxt │ ├── rarm_test.prototxt │ └── rarm_trainval.prototxt ├── rleg │ ├── rleg_solver.prototxt │ ├── rleg_test.prototxt │ └── rleg_trainval.prototxt └── spindlenet │ ├── spindlenet_solver.prototxt │ ├── spindlenet_test.prototxt │ └── spindlenet_trainval.prototxt ├── scripts ├── format_rawdata.sh ├── gen_proposal_datalist.sh ├── make_datalists.sh ├── merge_datalists.sh ├── test.sh ├── train_base.sh ├── train_body.sh ├── train_head.sh ├── train_larm.sh ├── train_leg.sh ├── train_lleg.sh ├── train_rarm.sh ├── train_rleg.sh └── train_spindlenet.sh ├── tools ├── check_jstltrainlist.py ├── convert_lmdb_to_numpy.py ├── evaluation.py ├── make_lists.py └── merge_lists.py └── utils ├── __init__.py └── core.py /.gitignore: -------------------------------------------------------------------------------- 1 | external/ 2 | logs/ 3 | 4 | # Editor temporaries 5 | *.swp 6 | *~ 7 | 8 | # Compiled python 9 | *.pyc 10 | 11 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # SpindleNet 2 | 3 | mkdir external 4 | cd external 5 | ln -sf /path/to/the/root/of/datasets raw_data 6 | ln -sf /path/to/your/experiments/directory exp 7 | cd .. 8 | ./scripts/format_rawdata.sh 9 | ./scripts/make_datalists.sh 10 | ./scripts/merge_datalists.sh 11 | ./scripts/gen_proposal_datalist.sh 12 | 13 | ./scripts/train_base.sh 14 | 15 | ./scripts/train_head.sh 16 | ./scripts/train_body.sh 17 | ./scripts/train_leg.sh 18 | ./scripts/train_rarm.sh 19 | ./scripts/train_larm.sh 20 | ./scripts/train_rleg.sh 21 | ./scripts/train_lleg.sh 22 | 23 | ./scripts/train_spindlenet.sh 24 | 25 | ./scripts/test.sh 26 | -------------------------------------------------------------------------------- /RPN/config.py: -------------------------------------------------------------------------------- 1 | def config(): 2 | param = {} 3 | param['use_gpu'] = 0 4 | param['caffe_model'] = 'RPN/model/pose_iter_265000.caffemodel' 5 | param['deploy_file'] = 'RPN/model/pose_deploy.prototxt' 6 | param['box_size'] = 256 7 | param['pad_value'] = 128 8 | param['magic'] = 0.2 9 | param['sigma'] = 21 10 | param['caffe_path'] = 'external/caffe/python/' 11 | param['map_layer_name'] = 'Mconv5_stage3' 12 | return param 13 | 14 | -------------------------------------------------------------------------------- /RPN/inference.py: -------------------------------------------------------------------------------- 1 | from argparse import ArgumentParser 2 | import sys 3 | import numpy as np 4 | import scipy.misc 5 | from config import config 6 | from skimage.transform import resize 7 | import os 8 | import os.path as osp 9 | 10 | param = config() 11 | if param['caffe_path'] not in sys.path: 12 | sys.path.insert(0, param['caffe_path']) 13 | import caffe 14 | 15 | 16 | def init_caffe_model(param): 17 | caffe.set_mode_gpu() 18 | caffe.set_device(param['use_gpu']) 19 | net = caffe.Net(param['deploy_file'], param['caffe_model'], caffe.TEST) 20 | return net 21 | 22 | 23 | def preprocess_bbox(rect): 24 | centerx = rect['x'] + rect['width'] / 2.0 25 | centery = rect['y'] + rect['height'] / 2.0 26 | sz = np.max([rect['width'], rect['height']]) 27 | rect['x'] = centerx - sz / 2.0 28 | rect['y'] = centery - sz / 2.0 29 | rect['width'] = sz 30 | rect['height'] = sz 31 | return rect 32 | 33 | 34 | def crop_image_with_padding(img, left, top, right, bottom, pad_value): 35 | pad = np.array([0, 0, 0, 0]) 36 | if right >= img.shape[1]: 37 | pad[1] = right - img.shape[1] + 1 38 | if left < 0: 39 | pad[0] = -left 40 | left = 0 41 | right += pad[0] 42 | if bottom >= img.shape[0]: 43 | pad[3] = bottom - img.shape[0] + 1 44 | if top < 0: 45 | pad[2] = -top 46 | top = 0 47 | bottom += pad[3] 48 | if np.sum(pad) > 0: 49 | img = np.pad(img, ((pad[2], pad[3]), (pad[0], pad[1]), (0, 0)), 'constant', constant_values=(pad_value,)) 50 | img = img[int(top):int(bottom), int(left):int(right),:] 51 | return img 52 | 53 | 54 | def produce_center_label_map(im_size, x, y, param): 55 | xv, yv = np.meshgrid(range(im_size), range(im_size)) 56 | xv = xv - x 57 | yv = yv - y 58 | D = xv * xv + yv * yv 59 | return np.exp(-D / 2.0 / param['sigma'] / param['sigma']) 60 | 61 | 62 | def preprocess(img, mean, param): 63 | img_out = scipy.misc.imresize(img, (param['box_size'], param['box_size'], 3)) 64 | img_out = img_out / 256.0 65 | img_out -= mean 66 | 67 | img_out = np.transpose(img_out, (1, 0, 2)) 68 | img_out = img_out[:, :, [2,1,0]] 69 | label_map = produce_center_label_map(param['box_size'], param['box_size'] / 2, param['box_size'] / 2, param) 70 | label_map = np.expand_dims(label_map, axis=2) 71 | img_out = np.expand_dims(np.concatenate((img_out, label_map), axis=2), axis=0) 72 | img_out = np.transpose(img_out, (0, 3, 2, 1)) 73 | return img_out 74 | 75 | 76 | def postprocess(map_origin, param): 77 | nparts = map_origin.shape[0] 78 | map_processed = np.zeros((param['box_size'], param['box_size'], nparts)) 79 | for i in range(nparts): 80 | m = np.squeeze(map_origin[i, :, :]) 81 | m[m > 1] = 1 82 | m[m < -1] = -1 83 | m = resize(m, (param['box_size'], param['box_size'])) 84 | map_processed[:, :, i] = m 85 | return map_processed 86 | 87 | 88 | def get_joints_from_map(joint_map, rect): 89 | nparts = joint_map.shape[2] 90 | map_sz = joint_map.shape[0] 91 | locs = np.zeros((3, nparts)) 92 | for i in range(nparts): 93 | loc = np.argmax(joint_map[:, :, i]) 94 | loc = np.unravel_index(loc, np.squeeze(joint_map[:, :, i]).shape) 95 | loc = np.asarray(loc) 96 | locs[2, i] = joint_map[loc[0], loc[1], i] 97 | loc = loc / float(map_sz) 98 | locs[0:2, i] = loc 99 | locs[0:2, :] *= rect['width'] 100 | locs[0, :] += rect['y'] 101 | locs[1, :] += rect['x'] 102 | return locs 103 | 104 | 105 | def macro(points, landmark, img, param, filename, bodyname, wf): 106 | minx = np.inf 107 | miny = np.inf 108 | maxx = -np.inf 109 | maxy = -np.inf 110 | for i in xrange(len(points)): 111 | minx = min(minx, landmark[0][points[i]]) 112 | miny = min(miny, landmark[1][points[i]]) 113 | maxx = max(maxx, landmark[0][points[i]]) 114 | maxy = max(maxy, landmark[1][points[i]]) 115 | 116 | dx = param['magic'] * (maxx - minx) 117 | dy = param['magic'] * (maxy - miny) 118 | minx = max(0, minx - dx) 119 | miny = max(0, miny - dy) 120 | maxx = min(img.shape[0], maxx + dx) 121 | maxy = min(img.shape[1], maxy + dy) 122 | 123 | if maxy - miny > maxx - minx: 124 | L = maxy - miny 125 | dx = (L - (maxx - minx)) / 2.0 126 | minx = max(0, minx - dx) 127 | maxx = min(img.shape[0], maxx + dx) 128 | else: 129 | L = maxx - minx 130 | dy = (L - (maxy - miny)) / 2.0 131 | miny = max(0, miny - dy) 132 | maxy = min(img.shape[1], maxy + dy) 133 | 134 | mag = img.shape[0] / 14.0 135 | if min(maxy - miny, maxx - minx) < mag * 2.0: 136 | cenx = (maxx + minx) / 2.0 137 | ceny = (maxy + miny) / 2.0 138 | maxx = min(img.shape[0], cenx + mag) 139 | minx = max(0, cenx - mag) 140 | maxy = min(img.shape[1], ceny + mag) 141 | miny = max(0, ceny - mag) 142 | 143 | wf.write(str(miny) + ' ' + str(minx) + ' ' + str(maxy) + ' ' + str(maxx) + '\n') 144 | 145 | #image = img[int(minx):int(maxx), int(miny):int(maxy)] 146 | #rr = {} 147 | #rr['x'] = 0 148 | #rr['y'] = 0 149 | #rr['width'] = image.shape[1] 150 | #rr['height'] = image.shape[0] 151 | 152 | #rect = preprocess_bbox(rr) 153 | #crop_img = crop_image_with_padding(image,rr['x'],rr['y'],rr['x']+rr['width'],rr['y']+rr['height'],param['pad_value']) 154 | #crop_img = scipy.misc.imresize(crop_img,(128,128,3)) 155 | #scipy.misc.imsave(filename.split('.')[0] + '#' + bodyname + '.' + filename.split('.')[1], crop_img) 156 | 157 | 158 | def micro(points, landmark, img, param, filename, bodyname, wf): 159 | minx = np.inf 160 | miny = np.inf 161 | maxx = -np.inf 162 | maxy = -np.inf 163 | for i in xrange(len(points)): 164 | minx = min(minx, landmark[0][points[i]]) 165 | miny = min(miny, landmark[1][points[i]]) 166 | maxx = max(maxx, landmark[0][points[i]]) 167 | maxy = max(maxy, landmark[1][points[i]]) 168 | 169 | dx = param['magic'] * (maxx - minx) 170 | dy = param['magic'] * (maxy - miny) 171 | minx = max(0, minx - dx) 172 | miny = max(0, miny - dy) 173 | maxx = min(img.shape[0], maxx + dx) 174 | maxy = min(img.shape[1], maxy + dy) 175 | 176 | mag = img.shape[0] / 14.0 177 | if min(maxy - miny, maxx - minx) < mag * 2.0: 178 | cenx = (maxx + minx) / 2.0 179 | ceny = (maxy + miny) / 2.0 180 | maxx = max(min(img.shape[0], cenx + mag), maxx) 181 | minx = min(max(0, cenx - mag), minx) 182 | maxy = max(min(img.shape[1], ceny + mag), maxy) 183 | miny = min(max(0, ceny - mag), miny) 184 | 185 | wf.write(str(miny) + ' ' + str(minx) + ' ' + str(maxy) + ' ' + str(maxx) + '\n') 186 | 187 | 188 | def apply_model(image, net, param, rect, filename, wf): 189 | #print 'Doing ' + filename 190 | rect = preprocess_bbox(rect) 191 | cropped_img = crop_image_with_padding(image, rect['x'], rect['y'], rect['x'] + rect['width'], rect['y'] + rect['height'], param['pad_value']) 192 | input_img = preprocess(cropped_img, 0.5, param) 193 | 194 | net.blobs['data'].reshape(1, 4, param['box_size'], param['box_size']) 195 | net.blobs['data'].data[...] = input_img 196 | output = net.forward() 197 | joint_map_origin = np.squeeze(output[param['map_layer_name']]) 198 | joint_map_processed = postprocess(joint_map_origin, param) 199 | joints_loc = get_joints_from_map(joint_map_processed, rect) 200 | 201 | points = [0, 1, 2, 5] 202 | macro(points, joints_loc, image, param, filename, 'head', wf) 203 | points = [2, 3, 4, 5, 6, 7, 8, 11] 204 | macro(points, joints_loc, image, param, filename, 'body', wf) 205 | points = [8, 9, 10, 11, 12, 13] 206 | macro(points, joints_loc, image, param, filename, 'leg', wf) 207 | points = [2, 3, 4] 208 | micro(points, joints_loc, image, param, filename, 'rarm', wf) 209 | points = [5, 6, 7] 210 | micro(points, joints_loc, image, param, filename, 'larm', wf) 211 | points = [8, 9, 10] 212 | micro(points, joints_loc, image, param, filename, 'rleg', wf) 213 | points = [11, 12, 13] 214 | micro(points, joints_loc, image, param, filename, 'lleg', wf) 215 | 216 | #foo = np.copy(image) 217 | #for k in xrange(14): 218 | # for i in xrange(3): 219 | # for j in xrange(3): 220 | # xxx = i - 1 + int(joints_loc[0][k]) 221 | # xxx = max(0, xxx) 222 | # xxx = min(xxx, foo.shape[0] - 1) 223 | # yyy = j - 1 + int(joints_loc[1][k]) 224 | # yyy = max(0, yyy) 225 | # yyy = min(yyy, foo.shape[1] - 1) 226 | # foo[xxx][yyy][0] = 255 227 | # foo[xxx][yyy][1] = 0 228 | # foo[xxx][yyy][2] = 0 229 | #scipy.misc.imsave(filename.split('.')[0] + '##.' + filename.split('.')[1], foo) 230 | 231 | 232 | def main(args): 233 | net = init_caffe_model(param) 234 | f = open(args.datalist) 235 | wf = open(args.output_datalist, 'w') 236 | cnt = 0 237 | for line in f: 238 | wf.write('# ' + str(cnt) + '\n') 239 | image_name = line.split(' ')[0] 240 | wf.write(image_name + '\n') 241 | label = int(line.split(' ')[1]) 242 | wf.write(str(label) + '\n') 243 | #if 'jstl' not in args.dataset: 244 | # image_name = 'external/exp/datasets/' + args.dataset + '/' + image_name 245 | img = scipy.misc.imread(image_name) 246 | rect = {} 247 | rect['x'] = 0 248 | rect['y'] = 0 249 | rect['width'] = img.shape[1] 250 | rect['height'] = img.shape[0] 251 | apply_model(img, net, param, rect, image_name, wf) 252 | cnt += 1 253 | if cnt % 1000 == 0: 254 | print str(cnt) + ' done!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!' 255 | 256 | 257 | if __name__ == '__main__': 258 | parser = ArgumentParser( 259 | description="Gen region proposal datalist") 260 | parser.add_argument( 261 | 'datalist', 262 | help="The datalist which need to gen region proposal") 263 | parser.add_argument( 264 | 'output_datalist', 265 | help="Output datalist") 266 | parser.add_argument('dataset') 267 | args = parser.parse_args() 268 | main(args) 269 | 270 | -------------------------------------------------------------------------------- /RPN/model/pose_deploy.prototxt: -------------------------------------------------------------------------------- 1 | input: "data" 2 | input_dim: 1 3 | input_dim: 4 4 | input_dim: 256 5 | input_dim: 256 6 | layer { 7 | name: "image" 8 | type: "Slice" 9 | bottom: "data" 10 | top: "image" 11 | top: "center_map" 12 | slice_param { 13 | slice_point: 3 14 | axis: 1 15 | } 16 | } 17 | layer { 18 | name: "pool_center_lower" 19 | type: "Pooling" 20 | bottom: "center_map" 21 | top: "pool_center_lower" 22 | pooling_param { 23 | pool: AVE 24 | kernel_size: 17 25 | stride: 16 26 | } 27 | } 28 | layer { 29 | name: "conv1_stage1" 30 | type: "Convolution" 31 | bottom: "image" 32 | top: "conv1_stage1" 33 | param { 34 | lr_mult: 1 35 | decay_mult: 1 36 | } 37 | param { 38 | lr_mult: 2 39 | decay_mult: 0 40 | } 41 | convolution_param { 42 | num_output: 128 43 | pad: 2 44 | kernel_size: 5 45 | stride: 2 46 | weight_filler { 47 | type: "gaussian" 48 | std: 0.01 49 | } 50 | bias_filler { 51 | type: "constant" 52 | } 53 | } 54 | } 55 | layer { 56 | name: "relu1_stage1" 57 | type: "ReLU" 58 | bottom: "conv1_stage1" 59 | top: "conv1_stage1" 60 | } 61 | layer { 62 | name: "pool1_stage1" 63 | type: "Pooling" 64 | bottom: "conv1_stage1" 65 | top: "pool1_stage1" 66 | pooling_param { 67 | pool: MAX 68 | kernel_size: 3 69 | stride: 2 70 | } 71 | } 72 | layer { 73 | name: "new_conv2_stage1" 74 | type: "Convolution" 75 | bottom: "pool1_stage1" 76 | top: "new_conv2_stage1" 77 | param { 78 | lr_mult: 1 79 | decay_mult: 1 80 | } 81 | param { 82 | lr_mult: 2 83 | decay_mult: 0 84 | } 85 | convolution_param { 86 | num_output: 128 87 | pad: 2 88 | kernel_size: 5 89 | stride: 2 90 | weight_filler { 91 | type: "gaussian" 92 | std: 0.01 93 | } 94 | bias_filler { 95 | type: "constant" 96 | } 97 | } 98 | } 99 | layer { 100 | name: "relu2_stage1" 101 | type: "ReLU" 102 | bottom: "new_conv2_stage1" 103 | top: "new_conv2_stage1" 104 | } 105 | layer { 106 | name: "conv3_stage1" 107 | type: "Convolution" 108 | bottom: "new_conv2_stage1" 109 | top: "conv3_stage1" 110 | param { 111 | lr_mult: 1 112 | decay_mult: 1 113 | } 114 | param { 115 | lr_mult: 2 116 | decay_mult: 0 117 | } 118 | convolution_param { 119 | num_output: 128 120 | pad: 2 121 | kernel_size: 5 122 | weight_filler { 123 | type: "gaussian" 124 | std: 0.01 125 | } 126 | bias_filler { 127 | type: "constant" 128 | } 129 | } 130 | } 131 | layer { 132 | name: "relu3_stage1" 133 | type: "ReLU" 134 | bottom: "conv3_stage1" 135 | top: "conv3_stage1" 136 | } 137 | layer { 138 | name: "pool3_stage1" 139 | type: "Pooling" 140 | bottom: "conv3_stage1" 141 | top: "pool3_stage1" 142 | pooling_param { 143 | pool: MAX 144 | kernel_size: 3 145 | stride: 2 146 | } 147 | } 148 | layer { 149 | name: "conv4_stage1" 150 | type: "Convolution" 151 | bottom: "pool3_stage1" 152 | top: "conv4_stage1" 153 | param { 154 | lr_mult: 1 155 | decay_mult: 1 156 | } 157 | param { 158 | lr_mult: 2 159 | decay_mult: 0 160 | } 161 | convolution_param { 162 | num_output: 32 163 | pad: 2 164 | kernel_size: 5 165 | weight_filler { 166 | type: "gaussian" 167 | std: 0.01 168 | } 169 | bias_filler { 170 | type: "constant" 171 | } 172 | } 173 | } 174 | layer { 175 | name: "relu4_stage1" 176 | type: "ReLU" 177 | bottom: "conv4_stage1" 178 | top: "conv4_stage1" 179 | } 180 | layer { 181 | name: "concat_stage2" 182 | type: "Concat" 183 | bottom: "conv4_stage1" 184 | bottom: "pool_center_lower" 185 | top: "concat_stage2" 186 | concat_param { 187 | axis: 1 188 | } 189 | } 190 | layer { 191 | name: "Mconv1_stage2" 192 | type: "Convolution" 193 | bottom: "concat_stage2" 194 | top: "Mconv1_stage2" 195 | param { 196 | lr_mult: 1 197 | decay_mult: 1 198 | } 199 | param { 200 | lr_mult: 2 201 | decay_mult: 0 202 | } 203 | convolution_param { 204 | num_output: 128 205 | pad: 2 206 | kernel_size: 5 207 | weight_filler { 208 | type: "gaussian" 209 | std: 0.01 210 | } 211 | bias_filler { 212 | type: "constant" 213 | } 214 | } 215 | } 216 | layer { 217 | name: "Mrelu1_stage2" 218 | type: "ReLU" 219 | bottom: "Mconv1_stage2" 220 | top: "Mconv1_stage2" 221 | } 222 | layer { 223 | name: "Mconv2_stage2" 224 | type: "Convolution" 225 | bottom: "Mconv1_stage2" 226 | top: "Mconv2_stage2" 227 | param { 228 | lr_mult: 1 229 | decay_mult: 1 230 | } 231 | param { 232 | lr_mult: 2 233 | decay_mult: 0 234 | } 235 | convolution_param { 236 | num_output: 128 237 | pad: 2 238 | kernel_size: 5 239 | weight_filler { 240 | type: "gaussian" 241 | std: 0.01 242 | } 243 | bias_filler { 244 | type: "constant" 245 | } 246 | } 247 | } 248 | layer { 249 | name: "Mrelu2_stage2" 250 | type: "ReLU" 251 | bottom: "Mconv2_stage2" 252 | top: "Mconv2_stage2" 253 | } 254 | layer { 255 | name: "Mconv3_stage2" 256 | type: "Convolution" 257 | bottom: "Mconv2_stage2" 258 | top: "Mconv3_stage2" 259 | param { 260 | lr_mult: 1 261 | decay_mult: 1 262 | } 263 | param { 264 | lr_mult: 2 265 | decay_mult: 0 266 | } 267 | convolution_param { 268 | num_output: 128 269 | pad: 2 270 | kernel_size: 5 271 | weight_filler { 272 | type: "gaussian" 273 | std: 0.01 274 | } 275 | bias_filler { 276 | type: "constant" 277 | } 278 | } 279 | } 280 | layer { 281 | name: "Mrelu3_stage2" 282 | type: "ReLU" 283 | bottom: "Mconv3_stage2" 284 | top: "Mconv3_stage2" 285 | } 286 | layer { 287 | name: "Mconv4_stage2" 288 | type: "Convolution" 289 | bottom: "Mconv3_stage2" 290 | top: "Mconv4_stage2" 291 | param { 292 | lr_mult: 1 293 | decay_mult: 1 294 | } 295 | param { 296 | lr_mult: 2 297 | decay_mult: 0 298 | } 299 | convolution_param { 300 | num_output: 128 301 | pad: 0 302 | kernel_size: 1 303 | weight_filler { 304 | type: "gaussian" 305 | std: 0.01 306 | } 307 | bias_filler { 308 | type: "constant" 309 | } 310 | } 311 | } 312 | layer { 313 | name: "Mrelu4_stage2" 314 | type: "ReLU" 315 | bottom: "Mconv4_stage2" 316 | top: "Mconv4_stage2" 317 | } 318 | layer { 319 | name: "Mconv5_stage2" 320 | type: "Convolution" 321 | bottom: "Mconv4_stage2" 322 | top: "Mconv5_stage2" 323 | param { 324 | lr_mult: 1 325 | decay_mult: 1 326 | } 327 | param { 328 | lr_mult: 2 329 | decay_mult: 0 330 | } 331 | convolution_param { 332 | num_output: 15 333 | pad: 0 334 | kernel_size: 1 335 | weight_filler { 336 | type: "gaussian" 337 | std: 0.01 338 | } 339 | bias_filler { 340 | type: "constant" 341 | } 342 | } 343 | } 344 | layer { 345 | name: "conv1_stage3" 346 | type: "Convolution" 347 | bottom: "pool3_stage1" 348 | top: "conv1_stage3" 349 | param { 350 | lr_mult: 1 351 | decay_mult: 1 352 | } 353 | param { 354 | lr_mult: 2 355 | decay_mult: 0 356 | } 357 | convolution_param { 358 | num_output: 32 359 | pad: 2 360 | kernel_size: 5 361 | weight_filler { 362 | type: "gaussian" 363 | std: 0.01 364 | } 365 | bias_filler { 366 | type: "constant" 367 | } 368 | } 369 | } 370 | layer { 371 | name: "relu1_stage3" 372 | type: "ReLU" 373 | bottom: "conv1_stage3" 374 | top: "conv1_stage3" 375 | } 376 | layer { 377 | name: "concat_stage3" 378 | type: "Concat" 379 | bottom: "conv1_stage3" 380 | bottom: "Mconv5_stage2" 381 | bottom: "pool_center_lower" 382 | top: "concat_stage3" 383 | concat_param { 384 | axis: 1 385 | } 386 | } 387 | layer { 388 | name: "Mconv1_stage3" 389 | type: "Convolution" 390 | bottom: "concat_stage3" 391 | top: "Mconv1_stage3" 392 | param { 393 | lr_mult: 1 394 | decay_mult: 1 395 | } 396 | param { 397 | lr_mult: 2 398 | decay_mult: 0 399 | } 400 | convolution_param { 401 | num_output: 128 402 | pad: 2 403 | kernel_size: 5 404 | weight_filler { 405 | type: "gaussian" 406 | std: 0.01 407 | } 408 | bias_filler { 409 | type: "constant" 410 | } 411 | } 412 | } 413 | layer { 414 | name: "Mrelu1_stage3" 415 | type: "ReLU" 416 | bottom: "Mconv1_stage3" 417 | top: "Mconv1_stage3" 418 | } 419 | layer { 420 | name: "Mconv2_stage3" 421 | type: "Convolution" 422 | bottom: "Mconv1_stage3" 423 | top: "Mconv2_stage3" 424 | param { 425 | lr_mult: 1 426 | decay_mult: 1 427 | } 428 | param { 429 | lr_mult: 2 430 | decay_mult: 0 431 | } 432 | convolution_param { 433 | num_output: 128 434 | pad: 2 435 | kernel_size: 5 436 | weight_filler { 437 | type: "gaussian" 438 | std: 0.01 439 | } 440 | bias_filler { 441 | type: "constant" 442 | } 443 | } 444 | } 445 | layer { 446 | name: "Mrelu2_stage3" 447 | type: "ReLU" 448 | bottom: "Mconv2_stage3" 449 | top: "Mconv2_stage3" 450 | } 451 | layer { 452 | name: "Mconv3_stage3" 453 | type: "Convolution" 454 | bottom: "Mconv2_stage3" 455 | top: "Mconv3_stage3" 456 | param { 457 | lr_mult: 1 458 | decay_mult: 1 459 | } 460 | param { 461 | lr_mult: 2 462 | decay_mult: 0 463 | } 464 | convolution_param { 465 | num_output: 128 466 | pad: 2 467 | kernel_size: 5 468 | weight_filler { 469 | type: "gaussian" 470 | std: 0.01 471 | } 472 | bias_filler { 473 | type: "constant" 474 | } 475 | } 476 | } 477 | layer { 478 | name: "Mrelu3_stage3" 479 | type: "ReLU" 480 | bottom: "Mconv3_stage3" 481 | top: "Mconv3_stage3" 482 | } 483 | layer { 484 | name: "Mconv4_stage3" 485 | type: "Convolution" 486 | bottom: "Mconv3_stage3" 487 | top: "Mconv4_stage3" 488 | param { 489 | lr_mult: 1 490 | decay_mult: 1 491 | } 492 | param { 493 | lr_mult: 2 494 | decay_mult: 0 495 | } 496 | convolution_param { 497 | num_output: 128 498 | pad: 0 499 | kernel_size: 1 500 | weight_filler { 501 | type: "gaussian" 502 | std: 0.01 503 | } 504 | bias_filler { 505 | type: "constant" 506 | } 507 | } 508 | } 509 | layer { 510 | name: "Mrelu4_stage3" 511 | type: "ReLU" 512 | bottom: "Mconv4_stage3" 513 | top: "Mconv4_stage3" 514 | } 515 | layer { 516 | name: "Mconv5_stage3" 517 | type: "Convolution" 518 | bottom: "Mconv4_stage3" 519 | top: "Mconv5_stage3" 520 | param { 521 | lr_mult: 1 522 | decay_mult: 1 523 | } 524 | param { 525 | lr_mult: 2 526 | decay_mult: 0 527 | } 528 | convolution_param { 529 | num_output: 15 530 | pad: 0 531 | kernel_size: 1 532 | weight_filler { 533 | type: "gaussian" 534 | std: 0.01 535 | } 536 | bias_filler { 537 | type: "constant" 538 | } 539 | } 540 | } -------------------------------------------------------------------------------- /RPN/model/pose_iter_265000.caffemodel: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yokattame/SpindleNet/102b6defb1b0e309a361a62817044424bcfad6bb/RPN/model/pose_iter_265000.caffemodel -------------------------------------------------------------------------------- /data/format_3dpes.py: -------------------------------------------------------------------------------- 1 | import shutil 2 | import numpy as np 3 | from argparse import ArgumentParser 4 | from glob import glob 5 | from collections import defaultdict 6 | 7 | from utils import * 8 | 9 | 10 | def main(args): 11 | mkdir_if_missing(osp.join(args.output_dir, 'cam_0')) 12 | mkdir_if_missing(osp.join(args.output_dir, 'cam_1')) 13 | # Collect the person_id and view_id into dict 14 | images = glob(osp.join(args.input_dir, 'RGB', '*.bmp')) 15 | pdict = defaultdict(lambda: defaultdict(list)) 16 | for imname in images: 17 | pid, vid = osp.basename(imname).split('_')[0:2] 18 | pdict[pid][vid].append(imname) 19 | # Randomly choose half of the views as cam_0, others as cam_1 20 | identities = [] 21 | for i, pid in enumerate(pdict): 22 | vids = pdict[pid].keys() 23 | num_views = len(vids) 24 | np.random.shuffle(vids) 25 | p_images = [[], []] 26 | for vid in vids[:(num_views // 2)]: 27 | for src_file in pdict[pid][vid]: 28 | tgt_file = 'cam_0/{:05d}_{:05d}.bmp'.format(i, len(p_images[0])) 29 | shutil.copy(src_file, osp.join(args.output_dir, tgt_file)) 30 | p_images[0].append(tgt_file) 31 | for vid in vids[(num_views // 2):]: 32 | for src_file in pdict[pid][vid]: 33 | tgt_file = 'cam_1/{:05d}_{:05d}.bmp'.format(i, len(p_images[1])) 34 | shutil.copy(src_file, osp.join(args.output_dir, tgt_file)) 35 | p_images[1].append(tgt_file) 36 | identities.append(p_images) 37 | # Save meta information into a json file 38 | meta = {'name': '3DPeS', 'shot': 'multiple', 'num_cameras': 2} 39 | meta['identities'] = identities 40 | write_json(meta, osp.join(args.output_dir, 'meta.json')) 41 | # Randomly create a training and test split 42 | num = len(identities) 43 | pids = np.random.permutation(num) 44 | trainval_pids = sorted(pids[100:]) 45 | test_pids = sorted(pids[:100]) 46 | split = { 47 | 'trainval': trainval_pids, 48 | 'test_probe': test_pids, 49 | 'test_gallery': test_pids} 50 | write_json(split, osp.join(args.output_dir, 'split.json')) 51 | 52 | 53 | if __name__ == '__main__': 54 | parser = ArgumentParser( 55 | description="Convert the 3DPeS dataset into the uniform format") 56 | parser.add_argument( 57 | 'input_dir', 58 | help="Root directory of the 3DPeS dataset containing RGB/") 59 | parser.add_argument( 60 | 'output_dir', 61 | help="Output directory for the formatted 3DPeS dataset") 62 | args = parser.parse_args() 63 | main(args) 64 | 65 | -------------------------------------------------------------------------------- /data/format_cuhk01.py: -------------------------------------------------------------------------------- 1 | import shutil 2 | import numpy as np 3 | from argparse import ArgumentParser 4 | from scipy.misc import imsave 5 | 6 | from utils import * 7 | 8 | 9 | def main(args): 10 | mkdir_if_missing(osp.join(args.output_dir, 'cam_0')) 11 | mkdir_if_missing(osp.join(args.output_dir, 'cam_1')) 12 | num_identities = 971 13 | identities = [0] * num_identities 14 | for i in xrange(num_identities): 15 | p_images = [[], []] 16 | for j in xrange(4): 17 | cam_id = j // 2 18 | src_file = 'campus/{:04d}{:03d}.png'.format(i + 1, j + 1) 19 | tgt_file = 'cam_{}/{:05d}_{:05d}.png'.format(cam_id, i, j % 2) 20 | shutil.copy(osp.join(args.cuhk01_dir, src_file), 21 | osp.join(args.output_dir, tgt_file)) 22 | p_images[cam_id].append(tgt_file) 23 | identities[i] = p_images 24 | # Save meta information into a json file 25 | meta = {'name': 'cuhk01', 'shot': 'multiple', 'num_cameras': 2} 26 | meta['identities'] = identities 27 | write_json(meta, osp.join(args.output_dir, 'meta.json')) 28 | # Randomly create a training and test split 29 | num = len(identities) 30 | pids = np.random.permutation(num) 31 | trainval_pids = sorted(pids[num // 2:]) 32 | test_pids = sorted(pids[:num // 2]) 33 | split = {'trainval': trainval_pids, 34 | 'test_probe': test_pids, 35 | 'test_gallery': test_pids} 36 | write_json(split, osp.join(args.output_dir, 'split.json')) 37 | 38 | 39 | if __name__ == '__main__': 40 | parser = ArgumentParser( 41 | description="Convert the CUHK-01 dataset into the uniform format") 42 | parser.add_argument( 43 | 'cuhk01_dir', 44 | help="Root directory of the CUHK-01 dataset containing campus/") 45 | parser.add_argument( 46 | 'output_dir', 47 | help="Output directory for the formatted CUHK-01 dataset") 48 | args = parser.parse_args() 49 | main(args) 50 | 51 | -------------------------------------------------------------------------------- /data/format_cuhk02.py: -------------------------------------------------------------------------------- 1 | import shutil 2 | from argparse import ArgumentParser 3 | from glob import glob 4 | 5 | from utils import * 6 | 7 | 8 | def main(args): 9 | mkdir_if_missing(osp.join(args.output_dir, 'cam_0')) 10 | mkdir_if_missing(osp.join(args.output_dir, 'cam_1')) 11 | identities = [] 12 | # cuhk01 is same as P1 13 | for i in xrange(4): 14 | cam1_images = glob(osp.join(args.cuhk02_dir, 'P' + str(i + 2), 'cam1', '*.png')) 15 | cam2_images = glob(osp.join(args.cuhk02_dir, 'P' + str(i + 2), 'cam2', '*.png')) 16 | cam1_images.sort() 17 | cam2_images.sort(); 18 | assert len(cam1_images) == len(cam2_images) 19 | prev_pid = -1 20 | index = len(identities) - 1 21 | for name in cam1_images: 22 | p_id = int(osp.basename(name)[:3]) 23 | if prev_pid != p_id: 24 | identities.append([[], []]) 25 | p_images = identities[-1] 26 | file_name = 'cam_0/{:05d}_{:05d}.png'.format(len(identities) - 1, len(p_images[0])) 27 | shutil.copy(name, osp.join(args.output_dir, file_name)) 28 | p_images[0].append(file_name) 29 | prev_pid = p_id 30 | prev_pid = -1 31 | for name in cam2_images: 32 | p_id = int(osp.basename(name)[:3]) 33 | if prev_pid != p_id: 34 | index += 1 35 | p_images = identities[index] 36 | file_name = 'cam_1/{:05d}_{:05d}.png'.format(index, len(p_images[1])) 37 | shutil.copy(name, osp.join(args.output_dir, file_name)) 38 | p_images[1].append(file_name) 39 | prev_pid = p_id 40 | # Save meta information into a json file 41 | meta = {'name': 'cuhk02', 'shot': 'multiple', 'num_cameras': 2} 42 | meta['identities'] = identities 43 | write_json(meta, osp.join(args.output_dir, 'meta.json')) 44 | # We don't test on this dataset. Just use all the data for train / val. 45 | split = {'trainval': range(len(identities)), 46 | 'test_probe': [], 47 | 'test_gallery': []} 48 | write_json(split, osp.join(args.output_dir, 'split.json')) 49 | 50 | 51 | if __name__ == '__main__': 52 | parser = ArgumentParser( 53 | description="Convert the CUHK-02 dataset into the uniform format") 54 | parser.add_argument( 55 | 'cuhk02_dir', 56 | help="Root directory of the CUHK-02 dataset containing P2/ - P5/") 57 | parser.add_argument( 58 | 'output_dir', 59 | help="Output directory for the formatted CUHK-02 dataset") 60 | args = parser.parse_args() 61 | main(args) 62 | 63 | -------------------------------------------------------------------------------- /data/format_cuhk03.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from argparse import ArgumentParser 3 | from scipy.misc import imsave 4 | 5 | from utils import * 6 | 7 | 8 | def main(args): 9 | try: 10 | from scipy.io import loadmat 11 | matdata = loadmat(osp.join(args.cuhk03_dir, 'cuhk-03.mat')) 12 | except: 13 | from hdf5storage import loadmat 14 | matdata = loadmat(osp.join(args.cuhk03_dir, 'cuhk-03.mat')) 15 | # Although there are 5 pairs of camera views, we tile them up as one pair. 16 | mkdir_if_missing(osp.join(args.output_dir, 'cam_0')) 17 | mkdir_if_missing(osp.join(args.output_dir, 'cam_1')) 18 | identities = [] 19 | for imgs_labeled, imgs_detected in zip( 20 | matdata['labeled'].squeeze(), matdata['detected'].squeeze()): 21 | # We merge the manually labeled and automatically detected images of the same view. 22 | for i in xrange(imgs_labeled.shape[0]): 23 | pid = len(identities) 24 | p_images = [] 25 | # view-0 26 | v_images = [] 27 | for j in xrange(5): 28 | if imgs_labeled[i, j].size == 0: 29 | break 30 | file_name = 'cam_0/{:05d}_{:05d}.bmp'.format(pid, len(v_images)) 31 | imsave(osp.join(args.output_dir, file_name), imgs_labeled[i, j]) 32 | v_images.append(file_name) 33 | for j in xrange(5): 34 | if imgs_detected[i, j].size == 0: 35 | break 36 | file_name = 'cam_0/{:05d}_{:05d}.bmp'.format(pid, len(v_images)) 37 | imsave(osp.join(args.output_dir, file_name), imgs_detected[i, j]) 38 | v_images.append(file_name) 39 | p_images.append(v_images) 40 | # view-1 41 | v_images = [] 42 | for j in xrange(5, 10): 43 | if imgs_labeled[i, j].size == 0: 44 | break 45 | file_name = 'cam_1/{:05d}_{:05d}.bmp'.format(pid, len(v_images)) 46 | imsave(osp.join(args.output_dir, file_name), imgs_labeled[i, j]) 47 | v_images.append(file_name) 48 | for j in xrange(5, 10): 49 | if imgs_detected[i, j].size == 0: 50 | break 51 | file_name = 'cam_1/{:05d}_{:05d}.bmp'.format(pid, len(v_images)) 52 | imsave(osp.join(args.output_dir, file_name), imgs_detected[i, j]) 53 | v_images.append(file_name) 54 | p_images.append(v_images) 55 | identities.append(p_images) 56 | # Save meta information into a json file 57 | meta = {'name': 'cuhk03', 'shot': 'multiple', 'num_cameras': 2} 58 | meta['identities'] = identities 59 | write_json(meta, osp.join(args.output_dir, 'meta.json')) 60 | # Save training and test splits into a json file 61 | view_counts = [a.shape[0] for a in matdata['labeled'].squeeze()] 62 | vid_offsets = np.r_[0, np.cumsum(view_counts)] 63 | test_info = np.random.choice(matdata['testsets'].squeeze()) 64 | test_pids = [] 65 | for i, j in test_info: 66 | pid = vid_offsets[i - 1] + j - 1 67 | test_pids.append(pid) 68 | test_pids.sort() 69 | trainval_pids = list(set(xrange(vid_offsets[-1])) - set(test_pids)) 70 | split = {'trainval': trainval_pids, 71 | 'test_probe': test_pids, 72 | 'test_gallery': test_pids} 73 | write_json(split, osp.join(args.output_dir, 'split.json')) 74 | 75 | 76 | if __name__ == '__main__': 77 | parser = ArgumentParser( 78 | description="Convert the CUHK-03 dataset into the uniform format") 79 | parser.add_argument( 80 | 'cuhk03_dir', 81 | help="Root directory of the CUHK-03 dataset containing cuhk-03.mat") 82 | parser.add_argument( 83 | 'output_dir', 84 | help="Output directory for the formatted CUHK-03 dataset") 85 | args = parser.parse_args() 86 | main(args) 87 | 88 | -------------------------------------------------------------------------------- /data/format_ilids.py: -------------------------------------------------------------------------------- 1 | import shutil 2 | import numpy as np 3 | from argparse import ArgumentParser 4 | from glob import glob 5 | from collections import defaultdict 6 | 7 | from utils import * 8 | 9 | 10 | def main(args): 11 | mkdir_if_missing(osp.join(args.output_dir, 'cam_0')) 12 | mkdir_if_missing(osp.join(args.output_dir, 'cam_1')) 13 | # Collect the images of each person into dict 14 | images = glob(osp.join(args.ilids_dir, 'Persons', '*.jpg')) 15 | pdict = defaultdict(list) 16 | for imname in images: 17 | pid = int(osp.basename(imname)[:4]) 18 | pdict[pid].append(imname) 19 | # Randomly choose half of the images as cam_0, others as cam_1 20 | identities = [] 21 | for i, (pid, images) in enumerate(pdict.iteritems()): 22 | num = len(images) 23 | np.random.shuffle(images) 24 | p_images = [[], []] 25 | for src_file in images[:(num // 2)]: 26 | tgt_file = 'cam_0/{:05d}_{:05d}.jpg'.format(i, len(p_images[0])) 27 | shutil.copy(src_file, osp.join(args.output_dir, tgt_file)) 28 | p_images[0].append(tgt_file) 29 | for src_file in images[(num // 2):]: 30 | tgt_file = 'cam_1/{:05d}_{:05d}.jpg'.format(i, len(p_images[1])) 31 | shutil.copy(src_file, osp.join(args.output_dir, tgt_file)) 32 | p_images[1].append(tgt_file) 33 | identities.append(p_images) 34 | # Save meta information into a json file 35 | meta = {'name': 'i-LIDS', 'shot': 'multiple', 'num_cameras': 2} 36 | meta['identities'] = identities 37 | write_json(meta, osp.join(args.output_dir, 'meta.json')) 38 | # Randomly create a training and test split 39 | num = len(identities) 40 | pids = np.random.permutation(num) 41 | trainval_pids = sorted(pids[num // 2:]) 42 | test_pids = sorted(pids[:num // 2]) 43 | split = {'trainval': trainval_pids, 44 | 'test_probe': test_pids, 45 | 'test_gallery': test_pids} 46 | write_json(split, osp.join(args.output_dir, 'split.json')) 47 | 48 | 49 | if __name__ == '__main__': 50 | parser = ArgumentParser( 51 | description="Convert the i-LIDS dataset into the uniform format") 52 | parser.add_argument( 53 | 'ilids_dir', 54 | help="Root directory of the i-LIDS dataset containing Persons/") 55 | parser.add_argument( 56 | 'output_dir', 57 | help="Output directory for the formatted i-LIDS dataset") 58 | args = parser.parse_args() 59 | main(args) 60 | 61 | -------------------------------------------------------------------------------- /data/format_market1501.py: -------------------------------------------------------------------------------- 1 | import shutil 2 | from argparse import ArgumentParser 3 | from glob import glob 4 | from collections import defaultdict 5 | 6 | from utils import * 7 | 8 | 9 | def main(args): 10 | # cam_0 to cam_5 11 | for i in xrange(6): 12 | mkdir_if_missing(osp.join(args.output_dir, 'cam_' + str(i))) 13 | # Collect the person_id and view_id into dict 14 | images = glob(osp.join(args.market1501_dir, 'bounding_box_train', '*.jpg')) 15 | pdict = defaultdict(lambda: defaultdict(list)) 16 | S = set() 17 | for imname in images: 18 | name = osp.basename(imname) 19 | pid = int(name[:4]) - 1 20 | vid = int(name[6:7]) - 1 21 | pdict[pid][vid].append(imname) 22 | S.add(pid) 23 | images = glob(osp.join(args.market1501_dir, 'gt_bbox', '*.jpg')) 24 | for imname in images: 25 | name = osp.basename(imname) 26 | pid = int(name[:4]) - 1 27 | vid = int(name[6:7]) - 1 28 | if pid in S: 29 | pdict[pid][vid].append(imname) 30 | identities = [] 31 | for i, pid in enumerate(pdict): 32 | vids = pdict[pid].keys() 33 | p_images = [[] for j in xrange(6)] 34 | for vid in vids: 35 | for src_file in pdict[pid][vid]: 36 | tgt_file = 'cam_{}/{:05d}_{:05d}.jpg'.format(vid, i, len(p_images[vid])) 37 | shutil.copy(src_file, osp.join(args.output_dir, tgt_file)) 38 | p_images[vid].append(tgt_file) 39 | identities.append(p_images) 40 | # Save meta information into a json file 41 | meta = {'name': 'market1501', 'shot': 'multiple', 'num_cameras': 6} 42 | meta['identities'] = identities 43 | write_json(meta, osp.join(args.output_dir, 'meta.json')) 44 | # Market's test is special. So we just use partial data for train / val. 45 | split = {'trainval': range(len(identities)), 46 | 'test_probe': [], 47 | 'test_gallery': []} 48 | write_json(split, osp.join(args.output_dir, 'split.json')) 49 | 50 | 51 | if __name__ == '__main__': 52 | parser = ArgumentParser( 53 | description="Convert the market1501 dataset into the uniform format") 54 | parser.add_argument( 55 | 'market1501_dir', 56 | help="Root directory of the market1501 dataset containing bounding_box_train/ and gt_bbox/") 57 | parser.add_argument( 58 | 'output_dir', 59 | help="Output directory for the formatted market1501 dataset") 60 | args = parser.parse_args() 61 | main(args) 62 | 63 | -------------------------------------------------------------------------------- /data/format_prid.py: -------------------------------------------------------------------------------- 1 | import shutil 2 | import numpy as np 3 | from argparse import ArgumentParser 4 | from glob import glob 5 | 6 | from utils import * 7 | 8 | 9 | def main(args): 10 | mkdir_if_missing(osp.join(args.output_dir, 'cam_0')) 11 | mkdir_if_missing(osp.join(args.output_dir, 'cam_1')) 12 | # Randomly choose 100 people from the 200 shared people as test probe 13 | p = list(np.random.permutation(200)) 14 | test_probe = range(100) 15 | test_gallery = range(100) 16 | identities = [] 17 | for pid in p[:100]: 18 | p_images = [] 19 | src_file = osp.join(args.prid_dir, 'single_shot', 'cam_a', 20 | 'person_{:04d}.png'.format(pid + 1)) 21 | tgt_file = osp.join('cam_0', '{:05d}_00000.png'.format(len(identities))) 22 | shutil.copy(src_file, osp.join(args.output_dir, tgt_file)) 23 | p_images.append([tgt_file]) 24 | src_file = osp.join(args.prid_dir, 'single_shot', 'cam_b', 25 | 'person_{:04d}.png'.format(pid + 1)) 26 | tgt_file = osp.join('cam_1', '{:05d}_00000.png'.format(len(identities))) 27 | shutil.copy(src_file, osp.join(args.output_dir, tgt_file)) 28 | p_images.append([tgt_file]) 29 | identities.append(p_images) 30 | # Other 100 people from the 200 as a part of trainval 31 | # Choose 10 images randomly from the multi-shot images 32 | trainval = range(100, 200) 33 | for pid in p[100:]: 34 | p_images = [[], []] 35 | images = glob(osp.join(args.prid_dir, 'multi_shot', 'cam_a', 36 | 'person_{:04d}'.format(pid + 1), '*.png')) 37 | images = np.random.choice(images, size=min(10, len(images)), replace=False) 38 | for src_file in images: 39 | tgt_file = osp.join('cam_0', 40 | '{:05d}_{:05d}.png'.format(len(identities), len(p_images[0]))) 41 | shutil.copy(src_file, osp.join(args.output_dir, tgt_file)) 42 | p_images[0].append(tgt_file) 43 | images = glob(osp.join(args.prid_dir, 'multi_shot', 'cam_b', 44 | 'person_{:04d}'.format(pid + 1), '*.png')) 45 | images = np.random.choice(images, size=min(10, len(images)), replace=False) 46 | for src_file in images: 47 | tgt_file = osp.join('cam_1', 48 | '{:05d}_{:05d}.png'.format(len(identities), len(p_images[1]))) 49 | shutil.copy(src_file, osp.join(args.output_dir, tgt_file)) 50 | p_images[1].append(tgt_file) 51 | identities.append(p_images) 52 | # 201 to 385 cam_a people as another part of trainval 53 | for pid in xrange(200, 385): 54 | p_images = [[], []] 55 | images = glob(osp.join(args.prid_dir, 'multi_shot', 'cam_a', 56 | 'person_{:04d}'.format(pid + 1), '*.png')) 57 | images = np.random.choice(images, size=min(10, len(images)), replace=False) 58 | for src_file in images: 59 | tgt_file = osp.join('cam_0', 60 | '{:05d}_{:05d}.png'.format(len(identities), len(p_images[0]))) 61 | shutil.copy(src_file, osp.join(args.output_dir, tgt_file)) 62 | p_images[0].append(tgt_file) 63 | trainval.append(len(identities)) 64 | identities.append(p_images) 65 | # 201 to 749 cam_b people as additional test gallery 66 | for pid in xrange(200, 749): 67 | src_file = osp.join(args.prid_dir, 'single_shot', 'cam_b', 68 | 'person_{:04d}.png'.format(pid + 1)) 69 | tgt_file = osp.join('cam_1', '{:05d}_00000.png'.format(len(identities))) 70 | shutil.copy(src_file, osp.join(args.output_dir, tgt_file)) 71 | p_images = [[], [tgt_file]] 72 | test_gallery.append(len(identities)) 73 | identities.append(p_images) 74 | # Save meta information into a json file 75 | meta = {'name': 'PRID', 'shot': 'multiple', 'num_cameras': 2} 76 | meta['identities'] = identities 77 | write_json(meta, osp.join(args.output_dir, 'meta.json')) 78 | # We have only one split 79 | split = {'trainval': trainval, 80 | 'test_probe': test_probe, 81 | 'test_gallery': test_gallery} 82 | write_json(split, osp.join(args.output_dir, 'split.json')) 83 | 84 | 85 | if __name__ == '__main__': 86 | parser = ArgumentParser( 87 | description="Convert the PRID dataset into the uniform format") 88 | parser.add_argument( 89 | 'prid_dir', 90 | help="Root directory of the PRID dataset containing single_shot/ and multi_shot/") 91 | parser.add_argument( 92 | 'output_dir', 93 | help="Output directory for the formatted PRID dataset") 94 | args = parser.parse_args() 95 | main(args) 96 | 97 | -------------------------------------------------------------------------------- /data/format_psdb.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from argparse import ArgumentParser 3 | from scipy.misc import imread 4 | from scipy.misc import imsave 5 | 6 | from utils import * 7 | 8 | 9 | def main(args): 10 | try: 11 | from scipy.io import loadmat 12 | matdata = loadmat(osp.join(args.psdb_dir, 'person.mat')) 13 | except: 14 | from hdf5storage import loadmat 15 | matdata = loadmat(osp.join(args.psdb_dir, 'person.mat')) 16 | mkdir_if_missing(osp.join(args.output_dir, 'cam_0')) 17 | mkdir_if_missing(osp.join(args.output_dir, 'cam_1')) 18 | identities = [] 19 | # Randomly choose half of the images as cam_0, others as cam_1 20 | for person in matdata['person'].squeeze(): 21 | pid = person[0].squeeze() 22 | imname = person[1].squeeze() 23 | bbox = person[2].squeeze() 24 | num = imname.shape[0] 25 | images = [] 26 | for i in xrange(num): 27 | image = imread(osp.join(args.psdb_dir, 'images', imname[i][0])) 28 | crop_image = image[int(bbox[i][1]):int(bbox[i][1]) + int(bbox[i][3]), int(bbox[i][0]):int(bbox[i][0]) + int(bbox[i][2])] 29 | images.append(crop_image) 30 | np.random.shuffle(images) 31 | p_images = [[], []] 32 | for image in images[:(num // 2)]: 33 | file_name = 'cam_0/{:05d}_{:05d}.bmp'.format(pid - 1, len(p_images[0])) 34 | imsave(osp.join(args.output_dir, file_name), image) 35 | p_images[0].append(file_name) 36 | for image in images[(num // 2):]: 37 | file_name = 'cam_1/{:05d}_{:05d}.bmp'.format(pid - 1, len(p_images[1])) 38 | imsave(osp.join(args.output_dir, file_name), image) 39 | p_images[1].append(file_name) 40 | identities.append(p_images) 41 | # Save meta information into a json file 42 | meta = {'name': 'psdb', 'shot': 'multiple', 'num_cameras': 2} 43 | meta['identities'] = identities 44 | write_json(meta, osp.join(args.output_dir, 'meta.json')) 45 | # We don't test on this dataset. Just use all the data for train / val. 46 | split = { 47 | 'trainval': range(len(identities)), 48 | 'test_probe': [], 49 | 'test_gallery': []} 50 | write_json(split, osp.join(args.output_dir, 'split.json')) 51 | 52 | 53 | if __name__ == '__main__': 54 | parser = ArgumentParser( 55 | description="Convert the psdb dataset into the uniform format") 56 | parser.add_argument( 57 | 'psdb_dir', 58 | help="Root directory of the psdb dataset containing person.mat") 59 | parser.add_argument( 60 | 'output_dir', 61 | help="Output directory for the formatted psdb dataset") 62 | args = parser.parse_args() 63 | main(args) 64 | 65 | -------------------------------------------------------------------------------- /data/format_shinpuhkan.py: -------------------------------------------------------------------------------- 1 | import shutil 2 | import numpy as np 3 | from argparse import ArgumentParser 4 | from glob import glob 5 | 6 | from utils import * 7 | 8 | 9 | def main(args): 10 | # cam_0 to cam_15 11 | for i in xrange(16): 12 | mkdir_if_missing(osp.join(args.output_dir, 'cam_' + str(i))) 13 | images = glob(osp.join(args.shinpuhkan_dir, 'images', '*.jpg')) 14 | images.sort() 15 | identities = [] 16 | prev_pid = -1 17 | for name in images: 18 | name = osp.basename(name) 19 | p_id = int(name[0:3]) - 1 20 | c_id = int(name[4:6]) - 1 21 | if prev_pid != p_id: 22 | identities.append([]) 23 | prev_cid = -1 24 | p_images = identities[-1] 25 | if prev_cid != c_id: 26 | p_images.append([]) 27 | v_images = p_images[-1] 28 | file_name = 'cam_{}/{:05d}_{:05d}.jpg'.format(c_id, p_id, len(v_images)) 29 | shutil.copy( 30 | osp.join(args.shinpuhkan_dir, 'images', name), 31 | osp.join(args.output_dir, file_name)) 32 | v_images.append(file_name) 33 | prev_pid = p_id 34 | prev_cid = c_id 35 | # Save meta information into a json file 36 | meta = {'name': 'Shinpuhkan', 'shot': 'multiple', 'num_cameras': 16} 37 | meta['identities'] = identities 38 | write_json(meta, osp.join(args.output_dir, 'meta.json')) 39 | # We don't test on this dataset. Just use all the data for train / val. 40 | split = { 41 | 'trainval': range(len(identities)), 42 | 'test_probe': [], 43 | 'test_gallery': []} 44 | write_json(split, osp.join(args.output_dir, 'split.json')) 45 | 46 | 47 | if __name__ == '__main__': 48 | parser = ArgumentParser( 49 | description="Convert the Shinpuhkan dataset into the uniform format") 50 | parser.add_argument( 51 | 'shinpuhkan_dir', 52 | help="Root directory of the Shinpuhkan dataset containing images/") 53 | parser.add_argument( 54 | 'output_dir', 55 | help="Output directory for the formatted Shinpuhkan dataset") 56 | args = parser.parse_args() 57 | main(args) 58 | 59 | -------------------------------------------------------------------------------- /data/format_viper.py: -------------------------------------------------------------------------------- 1 | import shutil 2 | import numpy as np 3 | from argparse import ArgumentParser 4 | from glob import glob 5 | 6 | from utils import * 7 | 8 | 9 | def main(args): 10 | mkdir_if_missing(osp.join(args.output_dir, 'cam_0')) 11 | mkdir_if_missing(osp.join(args.output_dir, 'cam_1')) 12 | identities = [] 13 | cam1_images = glob(osp.join(args.viper_dir, 'cam_a', '*.bmp')) 14 | cam2_images = glob(osp.join(args.viper_dir, 'cam_b', '*.bmp')) 15 | cam1_images.sort() 16 | cam2_images.sort() 17 | assert len(cam1_images) == len(cam2_images) 18 | for i in xrange(len(cam1_images)): 19 | cam1_pid = int(osp.basename(cam1_images[i])[:3]) 20 | cam2_pid = int(osp.basename(cam2_images[i])[:3]) 21 | assert cam1_pid == cam2_pid 22 | p_id = len(identities) 23 | p_images = [] 24 | # view-0 25 | file_name = 'cam_0/{:05d}_{:05d}.bmp'.format(p_id, 0) 26 | shutil.copy(cam1_images[i], 27 | osp.join(args.output_dir, file_name)) 28 | p_images.append([file_name]) 29 | # view-1 30 | file_name = 'cam_1/{:05d}_{:05d}.bmp'.format(p_id, 0) 31 | shutil.copy(cam2_images[i], 32 | osp.join(args.output_dir, file_name)) 33 | p_images.append([file_name]) 34 | identities.append(p_images) 35 | # Save meta information into a json file 36 | meta = {'name': 'VIPeR', 'shot': 'single', 'num_cameras': 2} 37 | meta['identities'] = identities 38 | write_json(meta, osp.join(args.output_dir, 'meta.json')) 39 | # Randomly create a training and test split 40 | num = len(identities) 41 | pids = np.random.permutation(num) 42 | trainval_pids = sorted(pids[num // 2:]) 43 | test_pids = sorted(pids[:num // 2]) 44 | split = { 45 | 'trainval': trainval_pids, 46 | 'test_probe': test_pids, 47 | 'test_gallery': test_pids} 48 | write_json(split, osp.join(args.output_dir, 'split.json')) 49 | 50 | 51 | if __name__ == '__main__': 52 | parser = ArgumentParser( 53 | description="Convert the VIPeR dataset into the uniform format") 54 | parser.add_argument( 55 | 'viper_dir', 56 | help="Root directory of the VIPeR dataset containing cam_a/ and cam_b/") 57 | parser.add_argument( 58 | 'output_dir', 59 | help="Output directory for the formatted VIPeR dataset") 60 | args = parser.parse_args() 61 | main(args) 62 | 63 | -------------------------------------------------------------------------------- /models/base/base_solver.prototxt: -------------------------------------------------------------------------------- 1 | net: "models/base/base_trainval.prototxt" 2 | test_iter: 1192 3 | test_interval: 1000 4 | test_initialization: false 5 | 6 | display: 20 7 | average_loss: 20 8 | iter_size: 2 9 | 10 | lr_policy: "multistep" 11 | base_lr: 0.1 12 | gamma: 0.5 13 | stepvalue: 25000 14 | stepvalue: 35000 15 | stepvalue: 40000 16 | stepvalue: 45000 17 | stepvalue: 50000 18 | stepvalue: 55000 19 | stepvalue: 60000 20 | stepvalue: 65000 21 | max_iter: 70000 22 | 23 | momentum: 0.9 24 | weight_decay: 0.0005 25 | 26 | snapshot: 10000 27 | snapshot_prefix: "external/exp/snapshots/base/base" 28 | solver_mode: GPU 29 | 30 | -------------------------------------------------------------------------------- /models/base/base_test.prototxt: -------------------------------------------------------------------------------- 1 | name: "base" 2 | 3 | layer { 4 | name: "data" 5 | type: "ImageData" 6 | top: "data" 7 | top: "label" 8 | transform_param { 9 | mirror: false 10 | mean_value: 103.939 11 | mean_value: 116.779 12 | mean_value: 123.68 13 | } 14 | image_data_param { 15 | source: "external/exp/datalists/${dataset}/${subset}.txt" 16 | root_folder: "" 17 | batch_size: 100 18 | new_height: 96 19 | new_width: 96 20 | shuffle: false 21 | } 22 | } 23 | 24 | ######################### conv1 ######################### 25 | 26 | layer { 27 | name: "conv1" 28 | type: "Convolution" 29 | bottom: "data" 30 | top: "conv1" 31 | param { 32 | lr_mult: 1 33 | decay_mult: 1 34 | } 35 | convolution_param { 36 | num_output: 32 37 | bias_term: false 38 | pad: 1 39 | kernel_size: 3 40 | stride: 1 41 | weight_filler { 42 | type: "xavier" 43 | } 44 | } 45 | } 46 | 47 | layer { 48 | name: "conv1_bn" 49 | type: "BatchNorm" 50 | bottom: "conv1" 51 | top: "conv1" 52 | batch_norm_param { 53 | use_global_stats: true 54 | } 55 | } 56 | 57 | layer { 58 | name: "conv1_scale" 59 | type: "Scale" 60 | bottom: "conv1" 61 | top: "conv1" 62 | scale_param { 63 | bias_term: true 64 | } 65 | } 66 | 67 | layer { 68 | name: "relu1" 69 | type: "ReLU" 70 | bottom: "conv1" 71 | top: "conv1" 72 | } 73 | 74 | ######################### conv2 ######################### 75 | 76 | layer { 77 | name: "conv2" 78 | type: "Convolution" 79 | bottom: "conv1" 80 | top: "conv2" 81 | param { 82 | lr_mult: 1 83 | decay_mult: 1 84 | } 85 | convolution_param { 86 | num_output: 32 87 | bias_term: false 88 | pad: 1 89 | kernel_size: 3 90 | stride: 1 91 | weight_filler { 92 | type: "xavier" 93 | } 94 | } 95 | } 96 | 97 | layer { 98 | name: "conv2_bn" 99 | type: "BatchNorm" 100 | bottom: "conv2" 101 | top: "conv2" 102 | batch_norm_param { 103 | use_global_stats: true 104 | } 105 | } 106 | 107 | layer { 108 | name: "conv2_scale" 109 | type: "Scale" 110 | bottom: "conv2" 111 | top: "conv2" 112 | scale_param { 113 | bias_term: true 114 | } 115 | } 116 | 117 | layer { 118 | name: "relu2" 119 | type: "ReLU" 120 | bottom: "conv2" 121 | top: "conv2" 122 | } 123 | 124 | ######################### conv3 ######################### 125 | 126 | layer { 127 | name: "conv3" 128 | type: "Convolution" 129 | bottom: "conv2" 130 | top: "conv3" 131 | param { 132 | lr_mult: 1 133 | decay_mult: 1 134 | } 135 | convolution_param { 136 | num_output: 64 137 | bias_term: false 138 | pad: 1 139 | kernel_size: 3 140 | stride: 1 141 | weight_filler { 142 | type: "xavier" 143 | } 144 | } 145 | } 146 | 147 | layer { 148 | name: "conv3_bn" 149 | type: "BatchNorm" 150 | bottom: "conv3" 151 | top: "conv3" 152 | batch_norm_param { 153 | use_global_stats: true 154 | } 155 | } 156 | 157 | layer { 158 | name: "conv3_scale" 159 | type: "Scale" 160 | bottom: "conv3" 161 | top: "conv3" 162 | scale_param { 163 | bias_term: true 164 | } 165 | } 166 | 167 | layer { 168 | name: "relu3" 169 | type: "ReLU" 170 | bottom: "conv3" 171 | top: "conv3" 172 | } 173 | 174 | layer { 175 | name: "pool1" 176 | type: "Pooling" 177 | bottom: "conv3" 178 | top: "pool1" 179 | pooling_param { 180 | pool: MAX 181 | kernel_size: 2 182 | stride: 2 183 | } 184 | } 185 | 186 | ######################### inception 1a ################## 187 | 188 | layer { 189 | name: "inception_1a/1x1" 190 | type: "Convolution" 191 | bottom: "pool1" 192 | top: "inception_1a/1x1" 193 | param { 194 | lr_mult: 1 195 | decay_mult: 1 196 | } 197 | convolution_param { 198 | num_output: 64 199 | bias_term: false 200 | kernel_size: 1 201 | weight_filler { 202 | type: "xavier" 203 | } 204 | } 205 | } 206 | 207 | layer { 208 | name: "inception_1a/1x1_bn" 209 | type: "BatchNorm" 210 | bottom: "inception_1a/1x1" 211 | top: "inception_1a/1x1" 212 | batch_norm_param { 213 | use_global_stats: true 214 | } 215 | } 216 | 217 | layer { 218 | name: "inception_1a/1x1_scale" 219 | type: "Scale" 220 | bottom: "inception_1a/1x1" 221 | top: "inception_1a/1x1" 222 | scale_param { 223 | bias_term: true 224 | } 225 | } 226 | 227 | layer { 228 | name: "inception_1a/relu_1x1" 229 | type: "ReLU" 230 | bottom: "inception_1a/1x1" 231 | top: "inception_1a/1x1" 232 | } 233 | 234 | layer { 235 | name: "inception_1a/3x3_reduce" 236 | type: "Convolution" 237 | bottom: "pool1" 238 | top: "inception_1a/3x3_reduce" 239 | param { 240 | lr_mult: 1 241 | decay_mult: 1 242 | } 243 | convolution_param { 244 | num_output: 64 245 | bias_term: false 246 | kernel_size: 1 247 | weight_filler { 248 | type: "xavier" 249 | } 250 | } 251 | } 252 | 253 | layer { 254 | name: "inception_1a/3x3_reduce_bn" 255 | type: "BatchNorm" 256 | bottom: "inception_1a/3x3_reduce" 257 | top: "inception_1a/3x3_reduce" 258 | batch_norm_param { 259 | use_global_stats: true 260 | } 261 | } 262 | 263 | layer { 264 | name: "inception_1a/3x3_reduce_scale" 265 | type: "Scale" 266 | bottom: "inception_1a/3x3_reduce" 267 | top: "inception_1a/3x3_reduce" 268 | scale_param { 269 | bias_term: true 270 | } 271 | } 272 | 273 | layer { 274 | name: "inception_1a/relu_3x3_reduce" 275 | type: "ReLU" 276 | bottom: "inception_1a/3x3_reduce" 277 | top: "inception_1a/3x3_reduce" 278 | } 279 | 280 | layer { 281 | name: "inception_1a/3x3" 282 | type: "Convolution" 283 | bottom: "inception_1a/3x3_reduce" 284 | top: "inception_1a/3x3" 285 | param { 286 | lr_mult: 1 287 | decay_mult: 1 288 | } 289 | convolution_param { 290 | num_output: 64 291 | bias_term: false 292 | pad: 1 293 | kernel_size: 3 294 | stride: 1 295 | weight_filler { 296 | type: "xavier" 297 | } 298 | } 299 | } 300 | 301 | layer { 302 | name: "inception_1a/3x3_bn" 303 | type: "BatchNorm" 304 | bottom: "inception_1a/3x3" 305 | top: "inception_1a/3x3" 306 | batch_norm_param { 307 | use_global_stats: true 308 | } 309 | } 310 | 311 | layer { 312 | name: "inception_1a/3x3_scale" 313 | type: "Scale" 314 | bottom: "inception_1a/3x3" 315 | top: "inception_1a/3x3" 316 | scale_param { 317 | bias_term: true 318 | } 319 | } 320 | 321 | layer { 322 | name: "inception_1a/relu_3x3" 323 | type: "ReLU" 324 | bottom: "inception_1a/3x3" 325 | top: "inception_1a/3x3" 326 | } 327 | 328 | layer { 329 | name: "inception_1a/double_3x3_reduce" 330 | type: "Convolution" 331 | bottom: "pool1" 332 | top: "inception_1a/double_3x3_reduce" 333 | param { 334 | lr_mult: 1 335 | decay_mult: 1 336 | } 337 | convolution_param { 338 | num_output: 64 339 | bias_term: false 340 | kernel_size: 1 341 | weight_filler { 342 | type: "xavier" 343 | } 344 | } 345 | } 346 | 347 | layer { 348 | name: "inception_1a/double_3x3_reduce_bn" 349 | type: "BatchNorm" 350 | bottom: "inception_1a/double_3x3_reduce" 351 | top: "inception_1a/double_3x3_reduce" 352 | batch_norm_param { 353 | use_global_stats: true 354 | } 355 | } 356 | 357 | layer { 358 | name: "inception_1a/double_3x3_reduce_scale" 359 | type: "Scale" 360 | bottom: "inception_1a/double_3x3_reduce" 361 | top: "inception_1a/double_3x3_reduce" 362 | scale_param { 363 | bias_term: true 364 | } 365 | } 366 | 367 | layer { 368 | name: "inception_1a/relu_double_3x3_reduce" 369 | type: "ReLU" 370 | bottom: "inception_1a/double_3x3_reduce" 371 | top: "inception_1a/double_3x3_reduce" 372 | } 373 | 374 | layer { 375 | name: "inception_1a/double_3x3_1" 376 | type: "Convolution" 377 | bottom: "inception_1a/double_3x3_reduce" 378 | top: "inception_1a/double_3x3_1" 379 | param { 380 | lr_mult: 1 381 | decay_mult: 1 382 | } 383 | convolution_param { 384 | num_output: 64 385 | bias_term: false 386 | pad: 1 387 | kernel_size: 3 388 | stride: 1 389 | weight_filler { 390 | type: "xavier" 391 | } 392 | } 393 | } 394 | 395 | layer { 396 | name: "inception_1a/double_3x3_1_bn" 397 | type: "BatchNorm" 398 | bottom: "inception_1a/double_3x3_1" 399 | top: "inception_1a/double_3x3_1" 400 | batch_norm_param { 401 | use_global_stats: true 402 | } 403 | } 404 | 405 | layer { 406 | name: "inception_1a/double_3x3_1_scale" 407 | type: "Scale" 408 | bottom: "inception_1a/double_3x3_1" 409 | top: "inception_1a/double_3x3_1" 410 | scale_param { 411 | bias_term: true 412 | } 413 | } 414 | 415 | layer { 416 | name: "inception_1a/relu_double_3x3_1" 417 | type: "ReLU" 418 | bottom: "inception_1a/double_3x3_1" 419 | top: "inception_1a/double_3x3_1" 420 | } 421 | 422 | layer { 423 | name: "inception_1a/double_3x3_2" 424 | type: "Convolution" 425 | bottom: "inception_1a/double_3x3_1" 426 | top: "inception_1a/double_3x3_2" 427 | param { 428 | lr_mult: 1 429 | decay_mult: 1 430 | } 431 | convolution_param { 432 | num_output: 64 433 | bias_term: false 434 | pad: 1 435 | kernel_size: 3 436 | stride: 1 437 | weight_filler { 438 | type: "xavier" 439 | } 440 | } 441 | } 442 | 443 | layer { 444 | name: "inception_1a/double_3x3_2_bn" 445 | type: "BatchNorm" 446 | bottom: "inception_1a/double_3x3_2" 447 | top: "inception_1a/double_3x3_2" 448 | batch_norm_param { 449 | use_global_stats: true 450 | } 451 | } 452 | 453 | layer { 454 | name: "inception_1a/double_3x3_2_scale" 455 | type: "Scale" 456 | bottom: "inception_1a/double_3x3_2" 457 | top: "inception_1a/double_3x3_2" 458 | scale_param { 459 | bias_term: true 460 | } 461 | } 462 | 463 | layer { 464 | name: "inception_1a/relu_double_3x3_2" 465 | type: "ReLU" 466 | bottom: "inception_1a/double_3x3_2" 467 | top: "inception_1a/double_3x3_2" 468 | } 469 | 470 | layer { 471 | name: "inception_1a/pool" 472 | type: "Pooling" 473 | bottom: "pool1" 474 | top: "inception_1a/pool" 475 | pooling_param { 476 | pool: AVE 477 | kernel_size: 3 478 | stride: 1 479 | pad: 1 480 | } 481 | } 482 | 483 | layer { 484 | name: "inception_1a/pool_proj" 485 | type: "Convolution" 486 | bottom: "inception_1a/pool" 487 | top: "inception_1a/pool_proj" 488 | param { 489 | lr_mult: 1 490 | decay_mult: 1 491 | } 492 | convolution_param { 493 | num_output: 64 494 | bias_term: false 495 | kernel_size: 1 496 | stride: 1 497 | weight_filler { 498 | type: "xavier" 499 | } 500 | } 501 | } 502 | 503 | layer { 504 | name: "inception_1a/pool_proj_bn" 505 | type: "BatchNorm" 506 | bottom: "inception_1a/pool_proj" 507 | top: "inception_1a/pool_proj" 508 | batch_norm_param { 509 | use_global_stats: true 510 | } 511 | } 512 | 513 | layer { 514 | name: "inception_1a/pool_proj_scale" 515 | type: "Scale" 516 | bottom: "inception_1a/pool_proj" 517 | top: "inception_1a/pool_proj" 518 | scale_param { 519 | bias_term: true 520 | } 521 | } 522 | 523 | layer { 524 | name: "inception_1a/relu_pool_proj" 525 | type: "ReLU" 526 | bottom: "inception_1a/pool_proj" 527 | top: "inception_1a/pool_proj" 528 | } 529 | 530 | layer { 531 | name: "inception_1a/output" 532 | type: "Concat" 533 | bottom: "inception_1a/1x1" 534 | bottom: "inception_1a/3x3" 535 | bottom: "inception_1a/double_3x3_2" 536 | bottom: "inception_1a/pool_proj" 537 | top: "inception_1a/output" 538 | } 539 | 540 | ######################### inception_1b ######################### 541 | 542 | layer { 543 | name: "inception_1b/3x3_reduce" 544 | type: "Convolution" 545 | bottom: "inception_1a/output" 546 | top: "inception_1b/3x3_reduce" 547 | param { 548 | lr_mult: 1 549 | decay_mult: 1 550 | } 551 | convolution_param { 552 | num_output: 64 553 | bias_term: false 554 | kernel_size: 1 555 | stride: 1 556 | weight_filler { 557 | type: "xavier" 558 | } 559 | } 560 | } 561 | 562 | layer { 563 | name: "inception_1b/3x3_reduce_bn" 564 | type: "BatchNorm" 565 | bottom: "inception_1b/3x3_reduce" 566 | top: "inception_1b/3x3_reduce" 567 | batch_norm_param { 568 | use_global_stats: true 569 | } 570 | } 571 | 572 | layer { 573 | name: "inception_1b/3x3_reduce_scale" 574 | type: "Scale" 575 | bottom: "inception_1b/3x3_reduce" 576 | top: "inception_1b/3x3_reduce" 577 | scale_param { 578 | bias_term: true 579 | } 580 | } 581 | 582 | layer { 583 | name: "inception_1b/relu_3x3_reduce" 584 | type: "ReLU" 585 | bottom: "inception_1b/3x3_reduce" 586 | top: "inception_1b/3x3_reduce" 587 | } 588 | 589 | layer { 590 | name: "inception_1b/3x3" 591 | type: "Convolution" 592 | bottom: "inception_1b/3x3_reduce" 593 | top: "inception_1b/3x3" 594 | param { 595 | lr_mult: 1 596 | decay_mult: 1 597 | } 598 | convolution_param { 599 | num_output: 64 600 | bias_term: false 601 | pad: 1 602 | kernel_size: 3 603 | stride: 2 604 | weight_filler { 605 | type: "xavier" 606 | } 607 | } 608 | } 609 | 610 | layer { 611 | name: "inception_1b/3x3_bn" 612 | type: "BatchNorm" 613 | bottom: "inception_1b/3x3" 614 | top: "inception_1b/3x3" 615 | batch_norm_param { 616 | use_global_stats: true 617 | } 618 | } 619 | 620 | layer { 621 | name: "inception_1b/3x3_scale" 622 | type: "Scale" 623 | bottom: "inception_1b/3x3" 624 | top: "inception_1b/3x3" 625 | scale_param { 626 | bias_term: true 627 | } 628 | } 629 | 630 | layer { 631 | name: "inception_1b/relu_3x3" 632 | type: "ReLU" 633 | bottom: "inception_1b/3x3" 634 | top: "inception_1b/3x3" 635 | } 636 | 637 | layer { 638 | name: "inception_1b/double_3x3_reduce" 639 | type: "Convolution" 640 | bottom: "inception_1a/output" 641 | top: "inception_1b/double_3x3_reduce" 642 | param { 643 | lr_mult: 1 644 | decay_mult: 1 645 | } 646 | convolution_param { 647 | num_output: 64 648 | bias_term: false 649 | pad: 1 650 | kernel_size: 3 651 | stride: 1 652 | weight_filler { 653 | type: "xavier" 654 | } 655 | } 656 | } 657 | 658 | layer { 659 | name: "inception_1b/double_3x3_reduce_bn" 660 | type: "BatchNorm" 661 | bottom: "inception_1b/double_3x3_reduce" 662 | top: "inception_1b/double_3x3_reduce" 663 | batch_norm_param { 664 | use_global_stats: true 665 | } 666 | } 667 | 668 | layer { 669 | name: "inception_1b/double_3x3_reduce_scale" 670 | type: "Scale" 671 | bottom: "inception_1b/double_3x3_reduce" 672 | top: "inception_1b/double_3x3_reduce" 673 | scale_param { 674 | bias_term: true 675 | } 676 | } 677 | 678 | layer { 679 | name: "inception_1b/relu_double_3x3_reduce" 680 | type: "ReLU" 681 | bottom: "inception_1b/double_3x3_reduce" 682 | top: "inception_1b/double_3x3_reduce" 683 | } 684 | 685 | layer { 686 | name: "inception_1b/double_3x3_1" 687 | type: "Convolution" 688 | bottom: "inception_1b/double_3x3_reduce" 689 | top: "inception_1b/double_3x3_1" 690 | param { 691 | lr_mult: 1 692 | decay_mult: 1 693 | } 694 | convolution_param { 695 | num_output: 64 696 | bias_term: false 697 | kernel_size: 1 698 | stride: 1 699 | weight_filler { 700 | type: "xavier" 701 | } 702 | } 703 | } 704 | 705 | layer { 706 | name: "inception_1b/double_3x3_1_bn" 707 | type: "BatchNorm" 708 | bottom: "inception_1b/double_3x3_1" 709 | top: "inception_1b/double_3x3_1" 710 | batch_norm_param { 711 | use_global_stats: true 712 | } 713 | } 714 | 715 | layer { 716 | name: "inception_1b/double_3x3_1_scale" 717 | type: "Scale" 718 | bottom: "inception_1b/double_3x3_1" 719 | top: "inception_1b/double_3x3_1" 720 | scale_param { 721 | bias_term: true 722 | } 723 | } 724 | 725 | layer { 726 | name: "inception_1b/relu_double_3x3_1" 727 | type: "ReLU" 728 | bottom: "inception_1b/double_3x3_1" 729 | top: "inception_1b/double_3x3_1" 730 | } 731 | 732 | layer { 733 | name: "inception_1b/double_3x3_2" 734 | type: "Convolution" 735 | bottom: "inception_1b/double_3x3_1" 736 | top: "inception_1b/double_3x3_2" 737 | param { 738 | lr_mult: 1 739 | decay_mult: 1 740 | } 741 | convolution_param { 742 | num_output: 64 743 | bias_term: false 744 | pad: 1 745 | kernel_size: 3 746 | stride: 2 747 | weight_filler { 748 | type: "xavier" 749 | } 750 | } 751 | } 752 | 753 | layer { 754 | name: "inception_1b/double_3x3_2_bn" 755 | type: "BatchNorm" 756 | bottom: "inception_1b/double_3x3_2" 757 | top: "inception_1b/double_3x3_2" 758 | batch_norm_param { 759 | use_global_stats: true 760 | } 761 | } 762 | 763 | layer { 764 | name: "inception_1b/double_3x3_2_scale" 765 | type: "Scale" 766 | bottom: "inception_1b/double_3x3_2" 767 | top: "inception_1b/double_3x3_2" 768 | scale_param { 769 | bias_term: true 770 | } 771 | } 772 | 773 | layer { 774 | name: "inception_1b/relu_double_3x3_2" 775 | type: "ReLU" 776 | bottom: "inception_1b/double_3x3_2" 777 | top: "inception_1b/double_3x3_2" 778 | } 779 | 780 | layer { 781 | name: "inception_1b/pool" 782 | type: "Pooling" 783 | bottom: "inception_1a/output" 784 | top: "inception_1b/pool" 785 | pooling_param { 786 | pool: MAX 787 | kernel_size: 3 788 | stride: 2 789 | } 790 | } 791 | 792 | layer { 793 | name: "inception_1b/output" 794 | type: "Concat" 795 | bottom: "inception_1b/3x3" 796 | bottom: "inception_1b/double_3x3_2" 797 | bottom: "inception_1b/pool" 798 | top: "inception_1b/output" 799 | } 800 | 801 | ######################### inception_2a ######################### 802 | 803 | layer { 804 | name: "inception_2a/1x1" 805 | type: "Convolution" 806 | bottom: "inception_1b/output" 807 | top: "inception_2a/1x1" 808 | param { 809 | lr_mult: 1 810 | decay_mult: 1 811 | } 812 | convolution_param { 813 | num_output: 128 814 | bias_term: false 815 | kernel_size: 1 816 | stride: 1 817 | weight_filler { 818 | type: "xavier" 819 | } 820 | } 821 | } 822 | 823 | layer { 824 | name: "inception_2a/1x1_bn" 825 | type: "BatchNorm" 826 | bottom: "inception_2a/1x1" 827 | top: "inception_2a/1x1" 828 | batch_norm_param { 829 | use_global_stats: true 830 | } 831 | } 832 | 833 | layer { 834 | name: "inception_2a/1x1_scale" 835 | type: "Scale" 836 | bottom: "inception_2a/1x1" 837 | top: "inception_2a/1x1" 838 | scale_param { 839 | bias_term: true 840 | } 841 | } 842 | 843 | layer { 844 | name: "inception_2a/relu_1x1" 845 | type: "ReLU" 846 | bottom: "inception_2a/1x1" 847 | top: "inception_2a/1x1" 848 | } 849 | 850 | layer { 851 | name: "inception_2a/3x3_reduce" 852 | type: "Convolution" 853 | bottom: "inception_1b/output" 854 | top: "inception_2a/3x3_reduce" 855 | param { 856 | lr_mult: 1 857 | decay_mult: 1 858 | } 859 | convolution_param { 860 | num_output: 128 861 | bias_term: false 862 | kernel_size: 1 863 | stride: 1 864 | weight_filler { 865 | type: "xavier" 866 | } 867 | } 868 | } 869 | 870 | layer { 871 | name: "inception_2a/3x3_reduce_bn" 872 | type: "BatchNorm" 873 | bottom: "inception_2a/3x3_reduce" 874 | top: "inception_2a/3x3_reduce" 875 | batch_norm_param { 876 | use_global_stats: true 877 | } 878 | } 879 | 880 | layer { 881 | name: "inception_2a/3x3_reduce_scale" 882 | type: "Scale" 883 | bottom: "inception_2a/3x3_reduce" 884 | top: "inception_2a/3x3_reduce" 885 | scale_param { 886 | bias_term: true 887 | } 888 | } 889 | 890 | layer { 891 | name: "inception_2a/relu_3x3_reduce" 892 | type: "ReLU" 893 | bottom: "inception_2a/3x3_reduce" 894 | top: "inception_2a/3x3_reduce" 895 | } 896 | 897 | layer { 898 | name: "inception_2a/3x3" 899 | type: "Convolution" 900 | bottom: "inception_2a/3x3_reduce" 901 | top: "inception_2a/3x3" 902 | param { 903 | lr_mult: 1 904 | decay_mult: 1 905 | } 906 | convolution_param { 907 | num_output: 128 908 | bias_term: false 909 | pad: 1 910 | kernel_size: 3 911 | stride: 1 912 | weight_filler { 913 | type: "xavier" 914 | } 915 | } 916 | } 917 | 918 | layer { 919 | name: "inception_2a/3x3_bn" 920 | type: "BatchNorm" 921 | bottom: "inception_2a/3x3" 922 | top: "inception_2a/3x3" 923 | batch_norm_param { 924 | use_global_stats: true 925 | } 926 | } 927 | 928 | layer { 929 | name: "inception_2a/3x3_scale" 930 | type: "Scale" 931 | bottom: "inception_2a/3x3" 932 | top: "inception_2a/3x3" 933 | scale_param { 934 | bias_term: true 935 | } 936 | } 937 | 938 | layer { 939 | name: "inception_2a/relu_3x3" 940 | type: "ReLU" 941 | bottom: "inception_2a/3x3" 942 | top: "inception_2a/3x3" 943 | } 944 | 945 | layer { 946 | name: "inception_2a/double_3x3_reduce" 947 | type: "Convolution" 948 | bottom: "inception_1b/output" 949 | top: "inception_2a/double_3x3_reduce" 950 | param { 951 | lr_mult: 1 952 | decay_mult: 1 953 | } 954 | convolution_param { 955 | num_output: 128 956 | bias_term: false 957 | kernel_size: 1 958 | stride: 1 959 | weight_filler { 960 | type: "xavier" 961 | } 962 | } 963 | } 964 | 965 | layer { 966 | name: "inception_2a/double_3x3_reduce_bn" 967 | type: "BatchNorm" 968 | bottom: "inception_2a/double_3x3_reduce" 969 | top: "inception_2a/double_3x3_reduce" 970 | batch_norm_param { 971 | use_global_stats: true 972 | } 973 | } 974 | 975 | layer { 976 | name: "inception_2a/double_3x3_reduce_scale" 977 | type: "Scale" 978 | bottom: "inception_2a/double_3x3_reduce" 979 | top: "inception_2a/double_3x3_reduce" 980 | scale_param { 981 | bias_term: true 982 | } 983 | } 984 | 985 | layer { 986 | name: "inception_2a/relu_double_3x3_reduce" 987 | type: "ReLU" 988 | bottom: "inception_2a/double_3x3_reduce" 989 | top: "inception_2a/double_3x3_reduce" 990 | } 991 | 992 | layer { 993 | name: "inception_2a/double_3x3_1" 994 | type: "Convolution" 995 | bottom: "inception_2a/double_3x3_reduce" 996 | top: "inception_2a/double_3x3_1" 997 | param { 998 | lr_mult: 1 999 | decay_mult: 1 1000 | } 1001 | convolution_param { 1002 | num_output: 128 1003 | bias_term: false 1004 | pad: 1 1005 | kernel_size: 3 1006 | stride: 1 1007 | weight_filler { 1008 | type: "xavier" 1009 | } 1010 | } 1011 | } 1012 | 1013 | layer { 1014 | name: "inception_2a/double_3x3_1_bn" 1015 | type: "BatchNorm" 1016 | bottom: "inception_2a/double_3x3_1" 1017 | top: "inception_2a/double_3x3_1" 1018 | batch_norm_param { 1019 | use_global_stats: true 1020 | } 1021 | } 1022 | 1023 | layer { 1024 | name: "inception_2a/double_3x3_1_scale" 1025 | type: "Scale" 1026 | bottom: "inception_2a/double_3x3_1" 1027 | top: "inception_2a/double_3x3_1" 1028 | scale_param { 1029 | bias_term: true 1030 | } 1031 | } 1032 | 1033 | layer { 1034 | name: "inception_2a/relu_double_3x3_1" 1035 | type: "ReLU" 1036 | bottom: "inception_2a/double_3x3_1" 1037 | top: "inception_2a/double_3x3_1" 1038 | } 1039 | 1040 | layer { 1041 | name: "inception_2a/double_3x3_2" 1042 | type: "Convolution" 1043 | bottom: "inception_2a/double_3x3_1" 1044 | top: "inception_2a/double_3x3_2" 1045 | param { 1046 | lr_mult: 1 1047 | decay_mult: 1 1048 | } 1049 | convolution_param { 1050 | num_output: 128 1051 | bias_term: false 1052 | pad: 1 1053 | kernel_size: 3 1054 | stride: 1 1055 | weight_filler { 1056 | type: "xavier" 1057 | } 1058 | } 1059 | } 1060 | 1061 | layer { 1062 | name: "inception_2a/double_3x3_2_bn" 1063 | type: "BatchNorm" 1064 | bottom: "inception_2a/double_3x3_2" 1065 | top: "inception_2a/double_3x3_2" 1066 | batch_norm_param { 1067 | use_global_stats: true 1068 | } 1069 | } 1070 | 1071 | layer { 1072 | name: "inception_2a/double_3x3_2_scale" 1073 | type: "Scale" 1074 | bottom: "inception_2a/double_3x3_2" 1075 | top: "inception_2a/double_3x3_2" 1076 | scale_param { 1077 | bias_term: true 1078 | } 1079 | } 1080 | 1081 | layer { 1082 | name: "inception_2a/relu_double_3x3_2" 1083 | type: "ReLU" 1084 | bottom: "inception_2a/double_3x3_2" 1085 | top: "inception_2a/double_3x3_2" 1086 | } 1087 | 1088 | layer { 1089 | name: "inception_2a/pool" 1090 | type: "Pooling" 1091 | bottom: "inception_1b/output" 1092 | top: "inception_2a/pool" 1093 | pooling_param { 1094 | pool: AVE 1095 | kernel_size: 3 1096 | stride: 1 1097 | pad: 1 1098 | } 1099 | } 1100 | 1101 | layer { 1102 | name: "inception_2a/pool_proj" 1103 | type: "Convolution" 1104 | bottom: "inception_2a/pool" 1105 | top: "inception_2a/pool_proj" 1106 | param { 1107 | lr_mult: 1 1108 | decay_mult: 1 1109 | } 1110 | convolution_param { 1111 | num_output: 128 1112 | bias_term: false 1113 | kernel_size: 1 1114 | stride: 1 1115 | weight_filler { 1116 | type: "xavier" 1117 | } 1118 | } 1119 | } 1120 | 1121 | layer { 1122 | name: "inception_2a/pool_proj_bn" 1123 | type: "BatchNorm" 1124 | bottom: "inception_2a/pool_proj" 1125 | top: "inception_2a/pool_proj" 1126 | batch_norm_param { 1127 | use_global_stats: true 1128 | } 1129 | } 1130 | 1131 | layer { 1132 | name: "inception_2a/pool_proj_scale" 1133 | type: "Scale" 1134 | bottom: "inception_2a/pool_proj" 1135 | top: "inception_2a/pool_proj" 1136 | scale_param { 1137 | bias_term: true 1138 | } 1139 | } 1140 | 1141 | layer { 1142 | name: "inception_2a/relu_pool_proj" 1143 | type: "ReLU" 1144 | bottom: "inception_2a/pool_proj" 1145 | top: "inception_2a/pool_proj" 1146 | } 1147 | 1148 | layer { 1149 | name: "inception_2a/output" 1150 | type: "Concat" 1151 | bottom: "inception_2a/1x1" 1152 | bottom: "inception_2a/3x3" 1153 | bottom: "inception_2a/double_3x3_2" 1154 | bottom: "inception_2a/pool_proj" 1155 | top: "inception_2a/output" 1156 | } 1157 | 1158 | ########################### inception_2b ######################### 1159 | 1160 | layer { 1161 | name: "inception_2b/3x3_reduce" 1162 | type: "Convolution" 1163 | bottom: "inception_2a/output" 1164 | top: "inception_2b/3x3_reduce" 1165 | param { 1166 | lr_mult: 1 1167 | decay_mult: 1 1168 | } 1169 | convolution_param { 1170 | num_output: 128 1171 | bias_term: false 1172 | kernel_size: 1 1173 | stride: 1 1174 | weight_filler { 1175 | type: "xavier" 1176 | } 1177 | } 1178 | } 1179 | 1180 | layer { 1181 | name: "inception_2b/3x3_reduce_bn" 1182 | type: "BatchNorm" 1183 | bottom: "inception_2b/3x3_reduce" 1184 | top: "inception_2b/3x3_reduce" 1185 | batch_norm_param { 1186 | use_global_stats: true 1187 | } 1188 | } 1189 | 1190 | layer { 1191 | name: "inception_2b/3x3_reduce_scale" 1192 | type: "Scale" 1193 | bottom: "inception_2b/3x3_reduce" 1194 | top: "inception_2b/3x3_reduce" 1195 | scale_param { 1196 | bias_term: true 1197 | } 1198 | } 1199 | 1200 | layer { 1201 | name: "inception_2b/relu_3x3_reduce" 1202 | type: "ReLU" 1203 | bottom: "inception_2b/3x3_reduce" 1204 | top: "inception_2b/3x3_reduce" 1205 | } 1206 | 1207 | layer { 1208 | name: "inception_2b/3x3" 1209 | type: "Convolution" 1210 | bottom: "inception_2b/3x3_reduce" 1211 | top: "inception_2b/3x3" 1212 | param { 1213 | lr_mult: 1 1214 | decay_mult: 1 1215 | } 1216 | convolution_param { 1217 | num_output: 128 1218 | bias_term: false 1219 | pad: 1 1220 | kernel_size: 3 1221 | stride: 2 1222 | weight_filler { 1223 | type: "xavier" 1224 | } 1225 | } 1226 | } 1227 | 1228 | layer { 1229 | name: "inception_2b/3x3_bn" 1230 | type: "BatchNorm" 1231 | bottom: "inception_2b/3x3" 1232 | top: "inception_2b/3x3" 1233 | batch_norm_param { 1234 | use_global_stats: true 1235 | } 1236 | } 1237 | 1238 | layer { 1239 | name: "inception_2b/3x3_scale" 1240 | type: "Scale" 1241 | bottom: "inception_2b/3x3" 1242 | top: "inception_2b/3x3" 1243 | scale_param { 1244 | bias_term: true 1245 | } 1246 | } 1247 | 1248 | layer { 1249 | name: "inception_2b/relu_3x3" 1250 | type: "ReLU" 1251 | bottom: "inception_2b/3x3" 1252 | top: "inception_2b/3x3" 1253 | } 1254 | 1255 | layer { 1256 | name: "inception_2b/double_3x3_reduce" 1257 | type: "Convolution" 1258 | bottom: "inception_2a/output" 1259 | top: "inception_2b/double_3x3_reduce" 1260 | param { 1261 | lr_mult: 1 1262 | decay_mult: 1 1263 | } 1264 | convolution_param { 1265 | num_output: 128 1266 | bias_term: false 1267 | kernel_size: 1 1268 | stride: 1 1269 | weight_filler { 1270 | type: "xavier" 1271 | } 1272 | } 1273 | } 1274 | 1275 | layer { 1276 | name: "inception_2b/double_3x3_reduce_bn" 1277 | type: "BatchNorm" 1278 | bottom: "inception_2b/double_3x3_reduce" 1279 | top: "inception_2b/double_3x3_reduce" 1280 | batch_norm_param { 1281 | use_global_stats: true 1282 | } 1283 | } 1284 | 1285 | layer { 1286 | name: "inception_2b/double_3x3_reduce_scale" 1287 | type: "Scale" 1288 | bottom: "inception_2b/double_3x3_reduce" 1289 | top: "inception_2b/double_3x3_reduce" 1290 | scale_param { 1291 | bias_term: true 1292 | } 1293 | } 1294 | 1295 | layer { 1296 | name: "inception_2b/relu_double_3x3_reduce" 1297 | type: "ReLU" 1298 | bottom: "inception_2b/double_3x3_reduce" 1299 | top: "inception_2b/double_3x3_reduce" 1300 | } 1301 | 1302 | layer { 1303 | name: "inception_2b/double_3x3_1" 1304 | type: "Convolution" 1305 | bottom: "inception_2b/double_3x3_reduce" 1306 | top: "inception_2b/double_3x3_1" 1307 | param { 1308 | lr_mult: 1 1309 | decay_mult: 1 1310 | } 1311 | convolution_param { 1312 | num_output: 128 1313 | bias_term: false 1314 | pad: 1 1315 | kernel_size: 3 1316 | stride: 1 1317 | weight_filler { 1318 | type: "xavier" 1319 | } 1320 | } 1321 | } 1322 | 1323 | layer { 1324 | name: "inception_2b/double_3x3_1_bn" 1325 | type: "BatchNorm" 1326 | bottom: "inception_2b/double_3x3_1" 1327 | top: "inception_2b/double_3x3_1" 1328 | batch_norm_param { 1329 | use_global_stats: true 1330 | } 1331 | } 1332 | 1333 | layer { 1334 | name: "inception_2b/double_3x3_1_scale" 1335 | type: "Scale" 1336 | bottom: "inception_2b/double_3x3_1" 1337 | top: "inception_2b/double_3x3_1" 1338 | scale_param { 1339 | bias_term: true 1340 | } 1341 | } 1342 | 1343 | layer { 1344 | name: "inception_2b/relu_double_3x3_1" 1345 | type: "ReLU" 1346 | bottom: "inception_2b/double_3x3_1" 1347 | top: "inception_2b/double_3x3_1" 1348 | } 1349 | 1350 | layer { 1351 | name: "inception_2b/double_3x3_2" 1352 | type: "Convolution" 1353 | bottom: "inception_2b/double_3x3_1" 1354 | top: "inception_2b/double_3x3_2" 1355 | param { 1356 | lr_mult: 1 1357 | decay_mult: 1 1358 | } 1359 | convolution_param { 1360 | num_output: 128 1361 | bias_term: false 1362 | pad: 1 1363 | kernel_size: 3 1364 | stride: 2 1365 | weight_filler { 1366 | type: "xavier" 1367 | } 1368 | } 1369 | } 1370 | 1371 | layer { 1372 | name: "inception_2b/double_3x3_2_bn" 1373 | type: "BatchNorm" 1374 | bottom: "inception_2b/double_3x3_2" 1375 | top: "inception_2b/double_3x3_2" 1376 | batch_norm_param { 1377 | use_global_stats: true 1378 | } 1379 | } 1380 | 1381 | layer { 1382 | name: "inception_2b/double_3x3_2_scale" 1383 | type: "Scale" 1384 | bottom: "inception_2b/double_3x3_2" 1385 | top: "inception_2b/double_3x3_2" 1386 | scale_param { 1387 | bias_term: true 1388 | } 1389 | } 1390 | 1391 | layer { 1392 | name: "inception_2b/relu_double_3x3_2" 1393 | type: "ReLU" 1394 | bottom: "inception_2b/double_3x3_2" 1395 | top: "inception_2b/double_3x3_2" 1396 | } 1397 | 1398 | layer { 1399 | name: "inception_2b/pool" 1400 | type: "Pooling" 1401 | bottom: "inception_2a/output" 1402 | top: "inception_2b/pool" 1403 | pooling_param { 1404 | pool: MAX 1405 | kernel_size: 3 1406 | stride: 2 1407 | } 1408 | } 1409 | 1410 | layer { 1411 | name: "inception_2b/output" 1412 | type: "Concat" 1413 | bottom: "inception_2b/3x3" 1414 | bottom: "inception_2b/double_3x3_2" 1415 | bottom: "inception_2b/pool" 1416 | top: "inception_2b/output" 1417 | } 1418 | 1419 | ######################### inception_3a ######################### 1420 | 1421 | layer { 1422 | name: "inception_3a/1x1" 1423 | type: "Convolution" 1424 | bottom: "inception_2b/output" 1425 | top: "inception_3a/1x1" 1426 | param { 1427 | lr_mult: 1 1428 | decay_mult: 1 1429 | } 1430 | convolution_param { 1431 | num_output: 256 1432 | bias_term: false 1433 | kernel_size: 1 1434 | stride: 1 1435 | weight_filler { 1436 | type: "xavier" 1437 | } 1438 | } 1439 | } 1440 | 1441 | layer { 1442 | name: "inception_3a/1x1_bn" 1443 | type: "BatchNorm" 1444 | bottom: "inception_3a/1x1" 1445 | top: "inception_3a/1x1" 1446 | batch_norm_param { 1447 | use_global_stats: true 1448 | } 1449 | } 1450 | 1451 | layer { 1452 | name: "inception_3a/1x1_scale" 1453 | type: "Scale" 1454 | bottom: "inception_3a/1x1" 1455 | top: "inception_3a/1x1" 1456 | scale_param { 1457 | bias_term: true 1458 | } 1459 | } 1460 | 1461 | layer { 1462 | name: "inception_3a/relu_1x1" 1463 | type: "ReLU" 1464 | bottom: "inception_3a/1x1" 1465 | top: "inception_3a/1x1" 1466 | } 1467 | 1468 | layer { 1469 | name: "inception_3a/3x3_reduce" 1470 | type: "Convolution" 1471 | bottom: "inception_2b/output" 1472 | top: "inception_3a/3x3_reduce" 1473 | param { 1474 | lr_mult: 1 1475 | decay_mult: 1 1476 | } 1477 | convolution_param { 1478 | num_output: 256 1479 | bias_term: false 1480 | kernel_size: 1 1481 | stride: 1 1482 | weight_filler { 1483 | type: "xavier" 1484 | } 1485 | } 1486 | } 1487 | 1488 | layer { 1489 | name: "inception_3a/3x3_reduce_bn" 1490 | type: "BatchNorm" 1491 | bottom: "inception_3a/3x3_reduce" 1492 | top: "inception_3a/3x3_reduce" 1493 | batch_norm_param { 1494 | use_global_stats: true 1495 | } 1496 | } 1497 | 1498 | layer { 1499 | name: "inception_3a/3x3_reduce_scale" 1500 | type: "Scale" 1501 | bottom: "inception_3a/3x3_reduce" 1502 | top: "inception_3a/3x3_reduce" 1503 | scale_param { 1504 | bias_term: true 1505 | } 1506 | } 1507 | 1508 | layer { 1509 | name: "inception_3a/relu_3x3_reduce" 1510 | type: "ReLU" 1511 | bottom: "inception_3a/3x3_reduce" 1512 | top: "inception_3a/3x3_reduce" 1513 | } 1514 | 1515 | layer { 1516 | name: "inception_3a/3x3" 1517 | type: "Convolution" 1518 | bottom: "inception_3a/3x3_reduce" 1519 | top: "inception_3a/3x3" 1520 | param { 1521 | lr_mult: 1 1522 | decay_mult: 1 1523 | } 1524 | convolution_param { 1525 | num_output: 256 1526 | bias_term: false 1527 | pad: 1 1528 | kernel_size: 3 1529 | stride: 1 1530 | weight_filler { 1531 | type: "xavier" 1532 | } 1533 | } 1534 | } 1535 | 1536 | layer { 1537 | name: "inception_3a/3x3_bn" 1538 | type: "BatchNorm" 1539 | bottom: "inception_3a/3x3" 1540 | top: "inception_3a/3x3" 1541 | batch_norm_param { 1542 | use_global_stats: true 1543 | } 1544 | } 1545 | 1546 | layer { 1547 | name: "inception_3a/3x3_scale" 1548 | type: "Scale" 1549 | bottom: "inception_3a/3x3" 1550 | top: "inception_3a/3x3" 1551 | scale_param { 1552 | bias_term: true 1553 | } 1554 | } 1555 | 1556 | layer { 1557 | name: "inception_3a/relu_3x3" 1558 | type: "ReLU" 1559 | bottom: "inception_3a/3x3" 1560 | top: "inception_3a/3x3" 1561 | } 1562 | 1563 | layer { 1564 | name: "inception_3a/double_3x3_reduce" 1565 | type: "Convolution" 1566 | bottom: "inception_2b/output" 1567 | top: "inception_3a/double_3x3_reduce" 1568 | param { 1569 | lr_mult: 1 1570 | decay_mult: 1 1571 | } 1572 | convolution_param { 1573 | num_output: 256 1574 | bias_term: false 1575 | kernel_size: 1 1576 | stride: 1 1577 | weight_filler { 1578 | type: "xavier" 1579 | } 1580 | } 1581 | } 1582 | 1583 | layer { 1584 | name: "inception_3a/double_3x3_reduce_bn" 1585 | type: "BatchNorm" 1586 | bottom: "inception_3a/double_3x3_reduce" 1587 | top: "inception_3a/double_3x3_reduce" 1588 | batch_norm_param { 1589 | use_global_stats: true 1590 | } 1591 | } 1592 | 1593 | layer { 1594 | name: "inception_3a/double_3x3_reduce_scale" 1595 | type: "Scale" 1596 | bottom: "inception_3a/double_3x3_reduce" 1597 | top: "inception_3a/double_3x3_reduce" 1598 | scale_param { 1599 | bias_term: true 1600 | } 1601 | } 1602 | 1603 | layer { 1604 | name: "inception_3a/relu_double_3x3_reduce" 1605 | type: "ReLU" 1606 | bottom: "inception_3a/double_3x3_reduce" 1607 | top: "inception_3a/double_3x3_reduce" 1608 | } 1609 | 1610 | layer { 1611 | name: "inception_3a/double_3x3_1" 1612 | type: "Convolution" 1613 | bottom: "inception_3a/double_3x3_reduce" 1614 | top: "inception_3a/double_3x3_1" 1615 | param { 1616 | lr_mult: 1 1617 | decay_mult: 1 1618 | } 1619 | convolution_param { 1620 | num_output: 256 1621 | bias_term: false 1622 | pad: 1 1623 | kernel_size: 3 1624 | stride: 1 1625 | weight_filler { 1626 | type: "xavier" 1627 | } 1628 | } 1629 | } 1630 | 1631 | layer { 1632 | name: "inception_3a/double_3x3_1_bn" 1633 | type: "BatchNorm" 1634 | bottom: "inception_3a/double_3x3_1" 1635 | top: "inception_3a/double_3x3_1" 1636 | batch_norm_param { 1637 | use_global_stats: true 1638 | } 1639 | } 1640 | 1641 | layer { 1642 | name: "inception_3a/double_3x3_1_scale" 1643 | type: "Scale" 1644 | bottom: "inception_3a/double_3x3_1" 1645 | top: "inception_3a/double_3x3_1" 1646 | scale_param { 1647 | bias_term: true 1648 | } 1649 | } 1650 | 1651 | layer { 1652 | name: "inception_3a/relu_double_3x3_1" 1653 | type: "ReLU" 1654 | bottom: "inception_3a/double_3x3_1" 1655 | top: "inception_3a/double_3x3_1" 1656 | } 1657 | 1658 | layer { 1659 | name: "inception_3a/double_3x3_2" 1660 | type: "Convolution" 1661 | bottom: "inception_3a/double_3x3_1" 1662 | top: "inception_3a/double_3x3_2" 1663 | param { 1664 | lr_mult: 1 1665 | decay_mult: 1 1666 | } 1667 | convolution_param { 1668 | num_output: 256 1669 | bias_term: false 1670 | pad: 1 1671 | kernel_size: 3 1672 | stride: 1 1673 | weight_filler { 1674 | type: "xavier" 1675 | } 1676 | } 1677 | } 1678 | 1679 | layer { 1680 | name: "inception_3a/double_3x3_2_bn" 1681 | type: "BatchNorm" 1682 | bottom: "inception_3a/double_3x3_2" 1683 | top: "inception_3a/double_3x3_2" 1684 | batch_norm_param { 1685 | use_global_stats: true 1686 | } 1687 | } 1688 | 1689 | layer { 1690 | name: "inception_3a/double_3x3_2_scale" 1691 | type: "Scale" 1692 | bottom: "inception_3a/double_3x3_2" 1693 | top: "inception_3a/double_3x3_2" 1694 | scale_param { 1695 | bias_term: true 1696 | } 1697 | } 1698 | 1699 | layer { 1700 | name: "inception_3a/relu_double_3x3_2" 1701 | type: "ReLU" 1702 | bottom: "inception_3a/double_3x3_2" 1703 | top: "inception_3a/double_3x3_2" 1704 | } 1705 | 1706 | layer { 1707 | name: "inception_3a/pool" 1708 | type: "Pooling" 1709 | bottom: "inception_2b/output" 1710 | top: "inception_3a/pool" 1711 | pooling_param { 1712 | pool: AVE 1713 | kernel_size: 3 1714 | stride: 1 1715 | pad: 1 1716 | } 1717 | } 1718 | 1719 | layer { 1720 | name: "inception_3a/pool_proj" 1721 | type: "Convolution" 1722 | bottom: "inception_3a/pool" 1723 | top: "inception_3a/pool_proj" 1724 | param { 1725 | lr_mult: 1 1726 | decay_mult: 1 1727 | } 1728 | convolution_param { 1729 | num_output: 256 1730 | bias_term: false 1731 | kernel_size: 1 1732 | stride: 1 1733 | weight_filler { 1734 | type: "xavier" 1735 | } 1736 | } 1737 | } 1738 | 1739 | layer { 1740 | name: "inception_3a/pool_proj_bn" 1741 | type: "BatchNorm" 1742 | bottom: "inception_3a/pool_proj" 1743 | top: "inception_3a/pool_proj" 1744 | batch_norm_param { 1745 | use_global_stats: true 1746 | } 1747 | } 1748 | 1749 | layer { 1750 | name: "inception_3a/pool_proj_scale" 1751 | type: "Scale" 1752 | bottom: "inception_3a/pool_proj" 1753 | top: "inception_3a/pool_proj" 1754 | scale_param { 1755 | bias_term: true 1756 | } 1757 | } 1758 | 1759 | layer { 1760 | name: "inception_3a/relu_pool_proj" 1761 | type: "ReLU" 1762 | bottom: "inception_3a/pool_proj" 1763 | top: "inception_3a/pool_proj" 1764 | } 1765 | 1766 | layer { 1767 | name: "inception_3a/output" 1768 | type: "Concat" 1769 | bottom: "inception_3a/1x1" 1770 | bottom: "inception_3a/3x3" 1771 | bottom: "inception_3a/double_3x3_2" 1772 | bottom: "inception_3a/pool_proj" 1773 | top: "inception_3a/output" 1774 | } 1775 | 1776 | ######################### inception_3b ######################### 1777 | 1778 | layer { 1779 | name: "inception_3b/3x3_reduce" 1780 | type: "Convolution" 1781 | bottom: "inception_3a/output" 1782 | top: "inception_3b/3x3_reduce" 1783 | param { 1784 | lr_mult: 1 1785 | decay_mult: 1 1786 | } 1787 | convolution_param { 1788 | num_output: 256 1789 | bias_term: false 1790 | kernel_size: 1 1791 | stride: 1 1792 | weight_filler { 1793 | type: "xavier" 1794 | } 1795 | } 1796 | } 1797 | 1798 | layer { 1799 | name: "inception_3b/3x3_reduce_bn" 1800 | type: "BatchNorm" 1801 | bottom: "inception_3b/3x3_reduce" 1802 | top: "inception_3b/3x3_reduce" 1803 | batch_norm_param { 1804 | use_global_stats: true 1805 | } 1806 | } 1807 | 1808 | layer { 1809 | name: "inception_3b/3x3_reduce_scale" 1810 | type: "Scale" 1811 | bottom: "inception_3b/3x3_reduce" 1812 | top: "inception_3b/3x3_reduce" 1813 | scale_param { 1814 | bias_term: true 1815 | } 1816 | } 1817 | 1818 | layer { 1819 | name: "inception_3b/relu_3x3_reduce" 1820 | type: "ReLU" 1821 | bottom: "inception_3b/3x3_reduce" 1822 | top: "inception_3b/3x3_reduce" 1823 | } 1824 | 1825 | layer { 1826 | name: "inception_3b/3x3" 1827 | type: "Convolution" 1828 | bottom: "inception_3b/3x3_reduce" 1829 | top: "inception_3b/3x3" 1830 | param { 1831 | lr_mult: 1 1832 | decay_mult: 1 1833 | } 1834 | convolution_param { 1835 | num_output: 256 1836 | bias_term: false 1837 | pad: 1 1838 | kernel_size: 3 1839 | stride: 2 1840 | weight_filler { 1841 | type: "xavier" 1842 | } 1843 | } 1844 | } 1845 | 1846 | layer { 1847 | name: "inception_3b/3x3_bn" 1848 | type: "BatchNorm" 1849 | bottom: "inception_3b/3x3" 1850 | top: "inception_3b/3x3" 1851 | batch_norm_param { 1852 | use_global_stats: true 1853 | } 1854 | } 1855 | 1856 | layer { 1857 | name: "inception_3b/3x3_scale" 1858 | type: "Scale" 1859 | bottom: "inception_3b/3x3" 1860 | top: "inception_3b/3x3" 1861 | scale_param { 1862 | bias_term: true 1863 | } 1864 | } 1865 | 1866 | layer { 1867 | name: "inception_3b/relu_3x3" 1868 | type: "ReLU" 1869 | bottom: "inception_3b/3x3" 1870 | top: "inception_3b/3x3" 1871 | } 1872 | 1873 | layer { 1874 | name: "inception_3b/double_3x3_reduce" 1875 | type: "Convolution" 1876 | bottom: "inception_3a/output" 1877 | top: "inception_3b/double_3x3_reduce" 1878 | param { 1879 | lr_mult: 1 1880 | decay_mult: 1 1881 | } 1882 | convolution_param { 1883 | num_output: 256 1884 | bias_term: false 1885 | kernel_size: 1 1886 | stride: 1 1887 | weight_filler { 1888 | type: "xavier" 1889 | } 1890 | } 1891 | } 1892 | 1893 | layer { 1894 | name: "inception_3b/double_3x3_reduce_bn" 1895 | type: "BatchNorm" 1896 | bottom: "inception_3b/double_3x3_reduce" 1897 | top: "inception_3b/double_3x3_reduce" 1898 | batch_norm_param { 1899 | use_global_stats: true 1900 | } 1901 | } 1902 | 1903 | layer { 1904 | name: "inception_3b/double_3x3_reduce_scale" 1905 | type: "Scale" 1906 | bottom: "inception_3b/double_3x3_reduce" 1907 | top: "inception_3b/double_3x3_reduce" 1908 | scale_param { 1909 | bias_term: true 1910 | } 1911 | } 1912 | 1913 | layer { 1914 | name: "inception_3b/relu_double_3x3_reduce" 1915 | type: "ReLU" 1916 | bottom: "inception_3b/double_3x3_reduce" 1917 | top: "inception_3b/double_3x3_reduce" 1918 | } 1919 | 1920 | layer { 1921 | name: "inception_3b/double_3x3_1" 1922 | type: "Convolution" 1923 | bottom: "inception_3b/double_3x3_reduce" 1924 | top: "inception_3b/double_3x3_1" 1925 | param { 1926 | lr_mult: 1 1927 | decay_mult: 1 1928 | } 1929 | convolution_param { 1930 | num_output: 256 1931 | bias_term: false 1932 | pad: 1 1933 | kernel_size: 3 1934 | stride: 1 1935 | weight_filler { 1936 | type: "xavier" 1937 | } 1938 | } 1939 | } 1940 | 1941 | layer { 1942 | name: "inception_3b/double_3x3_1_bn" 1943 | type: "BatchNorm" 1944 | bottom: "inception_3b/double_3x3_1" 1945 | top: "inception_3b/double_3x3_1" 1946 | batch_norm_param { 1947 | use_global_stats: true 1948 | } 1949 | } 1950 | 1951 | layer { 1952 | name: "inception_3b/double_3x3_1_scale" 1953 | type: "Scale" 1954 | bottom: "inception_3b/double_3x3_1" 1955 | top: "inception_3b/double_3x3_1" 1956 | scale_param { 1957 | bias_term: true 1958 | } 1959 | } 1960 | 1961 | layer { 1962 | name: "inception_3b/relu_double_3x3_1" 1963 | type: "ReLU" 1964 | bottom: "inception_3b/double_3x3_1" 1965 | top: "inception_3b/double_3x3_1" 1966 | } 1967 | 1968 | layer { 1969 | name: "inception_3b/double_3x3_2" 1970 | type: "Convolution" 1971 | bottom: "inception_3b/double_3x3_1" 1972 | top: "inception_3b/double_3x3_2" 1973 | param { 1974 | lr_mult: 1 1975 | decay_mult: 1 1976 | } 1977 | convolution_param { 1978 | num_output: 256 1979 | bias_term: false 1980 | pad: 1 1981 | kernel_size: 3 1982 | stride: 2 1983 | weight_filler { 1984 | type: "xavier" 1985 | } 1986 | } 1987 | } 1988 | 1989 | layer { 1990 | name: "inception_3b/double_3x3_2_bn" 1991 | type: "BatchNorm" 1992 | bottom: "inception_3b/double_3x3_2" 1993 | top: "inception_3b/double_3x3_2" 1994 | batch_norm_param { 1995 | use_global_stats: true 1996 | } 1997 | } 1998 | 1999 | layer { 2000 | name: "inception_3b/double_3x3_2_scale" 2001 | type: "Scale" 2002 | bottom: "inception_3b/double_3x3_2" 2003 | top: "inception_3b/double_3x3_2" 2004 | scale_param { 2005 | bias_term: true 2006 | } 2007 | } 2008 | 2009 | layer { 2010 | name: "inception_3b/relu_double_3x3_2" 2011 | type: "ReLU" 2012 | bottom: "inception_3b/double_3x3_2" 2013 | top: "inception_3b/double_3x3_2" 2014 | } 2015 | 2016 | layer { 2017 | name: "inception_3b/pool" 2018 | type: "Pooling" 2019 | bottom: "inception_3a/output" 2020 | top: "inception_3b/pool" 2021 | pooling_param { 2022 | pool: MAX 2023 | kernel_size: 3 2024 | stride: 2 2025 | } 2026 | } 2027 | 2028 | layer { 2029 | name: "inception_3b/output" 2030 | type: "Concat" 2031 | bottom: "inception_3b/3x3" 2032 | bottom: "inception_3b/double_3x3_2" 2033 | bottom: "inception_3b/pool" 2034 | top: "inception_3b/output" 2035 | } 2036 | 2037 | ######################### global pool ######################### 2038 | 2039 | layer { 2040 | name: "global_pool" 2041 | top: "global_pool" 2042 | bottom: "inception_3b/output" 2043 | type: "Pooling" 2044 | pooling_param { 2045 | pool: AVE 2046 | kernel_h: 6 2047 | kernel_w: 6 2048 | stride: 1 2049 | } 2050 | } 2051 | 2052 | ######################### fc ######################### 2053 | 2054 | layer { 2055 | name: "fc7" 2056 | type: "InnerProduct" 2057 | bottom: "global_pool" 2058 | top: "fc7" 2059 | param { 2060 | lr_mult: 1 2061 | decay_mult: 1 2062 | } 2063 | param { 2064 | lr_mult: 2 2065 | decay_mult: 0 2066 | } 2067 | inner_product_param { 2068 | num_output: 256 2069 | weight_filler { 2070 | type: "gaussian" 2071 | std: 0.001 2072 | } 2073 | bias_filler { 2074 | type: "constant" 2075 | value: 0 2076 | } 2077 | } 2078 | } 2079 | 2080 | layer { 2081 | name: "fc7_bn" 2082 | type: "BatchNorm" 2083 | bottom: "fc7" 2084 | top: "fc7" 2085 | batch_norm_param { 2086 | use_global_stats: true 2087 | } 2088 | } 2089 | 2090 | layer { 2091 | name: "fc7_scale" 2092 | type: "Scale" 2093 | bottom: "fc7" 2094 | top: "fc7" 2095 | scale_param { 2096 | bias_term: true 2097 | } 2098 | } 2099 | 2100 | -------------------------------------------------------------------------------- /models/body/body_solver.prototxt: -------------------------------------------------------------------------------- 1 | net: "models/body/body_trainval.prototxt" 2 | test_iter: 1192 3 | test_interval: 1000 4 | test_initialization: false 5 | 6 | display: 20 7 | average_loss: 20 8 | iter_size: 2 9 | 10 | lr_policy: "multistep" 11 | base_lr: 0.1 12 | gamma: 0.5 13 | stepvalue: 25000 14 | stepvalue: 35000 15 | stepvalue: 40000 16 | stepvalue: 45000 17 | stepvalue: 50000 18 | stepvalue: 55000 19 | stepvalue: 60000 20 | stepvalue: 65000 21 | max_iter: 70000 22 | 23 | momentum: 0.9 24 | weight_decay: 0.0005 25 | 26 | snapshot: 10000 27 | snapshot_prefix: "external/exp/snapshots/body/body" 28 | solver_mode: GPU 29 | 30 | -------------------------------------------------------------------------------- /models/head/head_solver.prototxt: -------------------------------------------------------------------------------- 1 | net: "models/head/head_trainval.prototxt" 2 | test_iter: 1192 3 | test_interval: 1000 4 | test_initialization: false 5 | 6 | display: 20 7 | average_loss: 20 8 | iter_size: 2 9 | 10 | lr_policy: "multistep" 11 | base_lr: 0.1 12 | gamma: 0.5 13 | stepvalue: 25000 14 | stepvalue: 35000 15 | stepvalue: 40000 16 | stepvalue: 45000 17 | stepvalue: 50000 18 | stepvalue: 55000 19 | stepvalue: 60000 20 | stepvalue: 65000 21 | max_iter: 70000 22 | 23 | momentum: 0.9 24 | weight_decay: 0.0005 25 | 26 | snapshot: 10000 27 | snapshot_prefix: "external/exp/snapshots/head/head" 28 | solver_mode: GPU 29 | 30 | -------------------------------------------------------------------------------- /models/larm/larm_solver.prototxt: -------------------------------------------------------------------------------- 1 | net: "models/larm/larm_trainval.prototxt" 2 | test_iter: 1192 3 | test_interval: 1000 4 | test_initialization: false 5 | 6 | display: 20 7 | average_loss: 20 8 | iter_size: 2 9 | 10 | lr_policy: "multistep" 11 | base_lr: 0.1 12 | gamma: 0.5 13 | stepvalue: 25000 14 | stepvalue: 35000 15 | stepvalue: 40000 16 | stepvalue: 45000 17 | stepvalue: 50000 18 | stepvalue: 55000 19 | stepvalue: 60000 20 | stepvalue: 65000 21 | max_iter: 70000 22 | 23 | momentum: 0.9 24 | weight_decay: 0.0005 25 | 26 | snapshot: 10000 27 | snapshot_prefix: "external/exp/snapshots/larm/larm" 28 | solver_mode: GPU 29 | 30 | -------------------------------------------------------------------------------- /models/larm/larm_test.prototxt: -------------------------------------------------------------------------------- 1 | name: "larm" 2 | 3 | layer { 4 | name: "data/larm" 5 | type: "Python" 6 | top: "data" 7 | top: "label" 8 | top: "headbox" 9 | top: "bodybox" 10 | top: "legbox" 11 | top: "rarmbox" 12 | top: "larmbox" 13 | top: "rlegbox" 14 | top: "llegbox" 15 | python_param { 16 | module: "roi_data_layer" 17 | layer: "RoiDataLayer" 18 | param_str: "{'source': 'external/exp/datalists/${dataset}/${subset}_p.txt', 'root_folder': '', 'batch_size': 100, 'new_height': 96, 'new_width': 96, 'shuffle': False, 'mirror': False, 'mean_value': [103.939,116.779,123.68], 'region_num': 7, 'region_scale': True}" 19 | } 20 | } 21 | 22 | ######################### conv1 ######################### 23 | 24 | layer { 25 | name: "conv1" 26 | type: "Convolution" 27 | bottom: "data" 28 | top: "conv1" 29 | param { 30 | lr_mult: 0 31 | decay_mult: 0 32 | } 33 | convolution_param { 34 | num_output: 32 35 | bias_term: false 36 | pad: 1 37 | kernel_size: 3 38 | stride: 1 39 | weight_filler { 40 | type: "xavier" 41 | } 42 | } 43 | } 44 | 45 | layer { 46 | name: "conv1_bn" 47 | type: "BatchNorm" 48 | bottom: "conv1" 49 | top: "conv1" 50 | batch_norm_param { 51 | use_global_stats: true 52 | } 53 | } 54 | 55 | layer { 56 | name: "conv1_scale" 57 | type: "Scale" 58 | bottom: "conv1" 59 | top: "conv1" 60 | scale_param { 61 | bias_term: true 62 | } 63 | } 64 | 65 | layer { 66 | name: "relu1" 67 | type: "ReLU" 68 | bottom: "conv1" 69 | top: "conv1" 70 | } 71 | 72 | ######################### conv2 ######################### 73 | 74 | layer { 75 | name: "conv2" 76 | type: "Convolution" 77 | bottom: "conv1" 78 | top: "conv2" 79 | param { 80 | lr_mult: 0 81 | decay_mult: 0 82 | } 83 | convolution_param { 84 | num_output: 32 85 | bias_term: false 86 | pad: 1 87 | kernel_size: 3 88 | stride: 1 89 | weight_filler { 90 | type: "xavier" 91 | } 92 | } 93 | } 94 | 95 | layer { 96 | name: "conv2_bn" 97 | type: "BatchNorm" 98 | bottom: "conv2" 99 | top: "conv2" 100 | batch_norm_param { 101 | use_global_stats: true 102 | } 103 | } 104 | 105 | layer { 106 | name: "conv2_scale" 107 | type: "Scale" 108 | bottom: "conv2" 109 | top: "conv2" 110 | scale_param { 111 | bias_term: true 112 | } 113 | } 114 | 115 | layer { 116 | name: "relu2" 117 | type: "ReLU" 118 | bottom: "conv2" 119 | top: "conv2" 120 | } 121 | 122 | ######################### conv3 ######################### 123 | 124 | layer { 125 | name: "conv3" 126 | type: "Convolution" 127 | bottom: "conv2" 128 | top: "conv3" 129 | param { 130 | lr_mult: 0 131 | decay_mult: 0 132 | } 133 | convolution_param { 134 | num_output: 64 135 | bias_term: false 136 | pad: 1 137 | kernel_size: 3 138 | stride: 1 139 | weight_filler { 140 | type: "xavier" 141 | } 142 | } 143 | } 144 | 145 | layer { 146 | name: "conv3_bn" 147 | type: "BatchNorm" 148 | bottom: "conv3" 149 | top: "conv3" 150 | batch_norm_param { 151 | use_global_stats: true 152 | } 153 | } 154 | 155 | layer { 156 | name: "conv3_scale" 157 | type: "Scale" 158 | bottom: "conv3" 159 | top: "conv3" 160 | scale_param { 161 | bias_term: true 162 | } 163 | } 164 | 165 | layer { 166 | name: "relu3" 167 | type: "ReLU" 168 | bottom: "conv3" 169 | top: "conv3" 170 | } 171 | 172 | layer { 173 | name: "pool1" 174 | type: "Pooling" 175 | bottom: "conv3" 176 | top: "pool1" 177 | pooling_param { 178 | pool: MAX 179 | kernel_size: 2 180 | stride: 2 181 | } 182 | } 183 | 184 | ######################### inception 1a ################## 185 | 186 | layer { 187 | name: "inception_1a/1x1" 188 | type: "Convolution" 189 | bottom: "pool1" 190 | top: "inception_1a/1x1" 191 | param { 192 | lr_mult: 0 193 | decay_mult: 0 194 | } 195 | convolution_param { 196 | num_output: 64 197 | bias_term: false 198 | kernel_size: 1 199 | weight_filler { 200 | type: "xavier" 201 | } 202 | } 203 | } 204 | 205 | layer { 206 | name: "inception_1a/1x1_bn" 207 | type: "BatchNorm" 208 | bottom: "inception_1a/1x1" 209 | top: "inception_1a/1x1" 210 | batch_norm_param { 211 | use_global_stats: true 212 | } 213 | } 214 | 215 | layer { 216 | name: "inception_1a/1x1_scale" 217 | type: "Scale" 218 | bottom: "inception_1a/1x1" 219 | top: "inception_1a/1x1" 220 | scale_param { 221 | bias_term: true 222 | } 223 | } 224 | 225 | layer { 226 | name: "inception_1a/relu_1x1" 227 | type: "ReLU" 228 | bottom: "inception_1a/1x1" 229 | top: "inception_1a/1x1" 230 | } 231 | 232 | layer { 233 | name: "inception_1a/3x3_reduce" 234 | type: "Convolution" 235 | bottom: "pool1" 236 | top: "inception_1a/3x3_reduce" 237 | param { 238 | lr_mult: 0 239 | decay_mult: 0 240 | } 241 | convolution_param { 242 | num_output: 64 243 | bias_term: false 244 | kernel_size: 1 245 | weight_filler { 246 | type: "xavier" 247 | } 248 | } 249 | } 250 | 251 | layer { 252 | name: "inception_1a/3x3_reduce_bn" 253 | type: "BatchNorm" 254 | bottom: "inception_1a/3x3_reduce" 255 | top: "inception_1a/3x3_reduce" 256 | batch_norm_param { 257 | use_global_stats: true 258 | } 259 | } 260 | 261 | layer { 262 | name: "inception_1a/3x3_reduce_scale" 263 | type: "Scale" 264 | bottom: "inception_1a/3x3_reduce" 265 | top: "inception_1a/3x3_reduce" 266 | scale_param { 267 | bias_term: true 268 | } 269 | } 270 | 271 | layer { 272 | name: "inception_1a/relu_3x3_reduce" 273 | type: "ReLU" 274 | bottom: "inception_1a/3x3_reduce" 275 | top: "inception_1a/3x3_reduce" 276 | } 277 | 278 | layer { 279 | name: "inception_1a/3x3" 280 | type: "Convolution" 281 | bottom: "inception_1a/3x3_reduce" 282 | top: "inception_1a/3x3" 283 | param { 284 | lr_mult: 0 285 | decay_mult: 0 286 | } 287 | convolution_param { 288 | num_output: 64 289 | bias_term: false 290 | pad: 1 291 | kernel_size: 3 292 | stride: 1 293 | weight_filler { 294 | type: "xavier" 295 | } 296 | } 297 | } 298 | 299 | layer { 300 | name: "inception_1a/3x3_bn" 301 | type: "BatchNorm" 302 | bottom: "inception_1a/3x3" 303 | top: "inception_1a/3x3" 304 | batch_norm_param { 305 | use_global_stats: true 306 | } 307 | } 308 | 309 | layer { 310 | name: "inception_1a/3x3_scale" 311 | type: "Scale" 312 | bottom: "inception_1a/3x3" 313 | top: "inception_1a/3x3" 314 | scale_param { 315 | bias_term: true 316 | } 317 | } 318 | 319 | layer { 320 | name: "inception_1a/relu_3x3" 321 | type: "ReLU" 322 | bottom: "inception_1a/3x3" 323 | top: "inception_1a/3x3" 324 | } 325 | 326 | layer { 327 | name: "inception_1a/double_3x3_reduce" 328 | type: "Convolution" 329 | bottom: "pool1" 330 | top: "inception_1a/double_3x3_reduce" 331 | param { 332 | lr_mult: 0 333 | decay_mult: 0 334 | } 335 | convolution_param { 336 | num_output: 64 337 | bias_term: false 338 | kernel_size: 1 339 | weight_filler { 340 | type: "xavier" 341 | } 342 | } 343 | } 344 | 345 | layer { 346 | name: "inception_1a/double_3x3_reduce_bn" 347 | type: "BatchNorm" 348 | bottom: "inception_1a/double_3x3_reduce" 349 | top: "inception_1a/double_3x3_reduce" 350 | batch_norm_param { 351 | use_global_stats: true 352 | } 353 | } 354 | 355 | layer { 356 | name: "inception_1a/double_3x3_reduce_scale" 357 | type: "Scale" 358 | bottom: "inception_1a/double_3x3_reduce" 359 | top: "inception_1a/double_3x3_reduce" 360 | scale_param { 361 | bias_term: true 362 | } 363 | } 364 | 365 | layer { 366 | name: "inception_1a/relu_double_3x3_reduce" 367 | type: "ReLU" 368 | bottom: "inception_1a/double_3x3_reduce" 369 | top: "inception_1a/double_3x3_reduce" 370 | } 371 | 372 | layer { 373 | name: "inception_1a/double_3x3_1" 374 | type: "Convolution" 375 | bottom: "inception_1a/double_3x3_reduce" 376 | top: "inception_1a/double_3x3_1" 377 | param { 378 | lr_mult: 0 379 | decay_mult: 0 380 | } 381 | convolution_param { 382 | num_output: 64 383 | bias_term: false 384 | pad: 1 385 | kernel_size: 3 386 | stride: 1 387 | weight_filler { 388 | type: "xavier" 389 | } 390 | } 391 | } 392 | 393 | layer { 394 | name: "inception_1a/double_3x3_1_bn" 395 | type: "BatchNorm" 396 | bottom: "inception_1a/double_3x3_1" 397 | top: "inception_1a/double_3x3_1" 398 | batch_norm_param { 399 | use_global_stats: true 400 | } 401 | } 402 | 403 | layer { 404 | name: "inception_1a/double_3x3_1_scale" 405 | type: "Scale" 406 | bottom: "inception_1a/double_3x3_1" 407 | top: "inception_1a/double_3x3_1" 408 | scale_param { 409 | bias_term: true 410 | } 411 | } 412 | 413 | layer { 414 | name: "inception_1a/relu_double_3x3_1" 415 | type: "ReLU" 416 | bottom: "inception_1a/double_3x3_1" 417 | top: "inception_1a/double_3x3_1" 418 | } 419 | 420 | layer { 421 | name: "inception_1a/double_3x3_2" 422 | type: "Convolution" 423 | bottom: "inception_1a/double_3x3_1" 424 | top: "inception_1a/double_3x3_2" 425 | param { 426 | lr_mult: 0 427 | decay_mult: 0 428 | } 429 | convolution_param { 430 | num_output: 64 431 | bias_term: false 432 | pad: 1 433 | kernel_size: 3 434 | stride: 1 435 | weight_filler { 436 | type: "xavier" 437 | } 438 | } 439 | } 440 | 441 | layer { 442 | name: "inception_1a/double_3x3_2_bn" 443 | type: "BatchNorm" 444 | bottom: "inception_1a/double_3x3_2" 445 | top: "inception_1a/double_3x3_2" 446 | batch_norm_param { 447 | use_global_stats: true 448 | } 449 | } 450 | 451 | layer { 452 | name: "inception_1a/double_3x3_2_scale" 453 | type: "Scale" 454 | bottom: "inception_1a/double_3x3_2" 455 | top: "inception_1a/double_3x3_2" 456 | scale_param { 457 | bias_term: true 458 | } 459 | } 460 | 461 | layer { 462 | name: "inception_1a/relu_double_3x3_2" 463 | type: "ReLU" 464 | bottom: "inception_1a/double_3x3_2" 465 | top: "inception_1a/double_3x3_2" 466 | } 467 | 468 | layer { 469 | name: "inception_1a/pool" 470 | type: "Pooling" 471 | bottom: "pool1" 472 | top: "inception_1a/pool" 473 | pooling_param { 474 | pool: AVE 475 | kernel_size: 3 476 | stride: 1 477 | pad: 1 478 | } 479 | } 480 | 481 | layer { 482 | name: "inception_1a/pool_proj" 483 | type: "Convolution" 484 | bottom: "inception_1a/pool" 485 | top: "inception_1a/pool_proj" 486 | param { 487 | lr_mult: 0 488 | decay_mult: 0 489 | } 490 | convolution_param { 491 | num_output: 64 492 | bias_term: false 493 | kernel_size: 1 494 | stride: 1 495 | weight_filler { 496 | type: "xavier" 497 | } 498 | } 499 | } 500 | 501 | layer { 502 | name: "inception_1a/pool_proj_bn" 503 | type: "BatchNorm" 504 | bottom: "inception_1a/pool_proj" 505 | top: "inception_1a/pool_proj" 506 | batch_norm_param { 507 | use_global_stats: true 508 | } 509 | } 510 | 511 | layer { 512 | name: "inception_1a/pool_proj_scale" 513 | type: "Scale" 514 | bottom: "inception_1a/pool_proj" 515 | top: "inception_1a/pool_proj" 516 | scale_param { 517 | bias_term: true 518 | } 519 | } 520 | 521 | layer { 522 | name: "inception_1a/relu_pool_proj" 523 | type: "ReLU" 524 | bottom: "inception_1a/pool_proj" 525 | top: "inception_1a/pool_proj" 526 | } 527 | 528 | layer { 529 | name: "inception_1a/output" 530 | type: "Concat" 531 | bottom: "inception_1a/1x1" 532 | bottom: "inception_1a/3x3" 533 | bottom: "inception_1a/double_3x3_2" 534 | bottom: "inception_1a/pool_proj" 535 | top: "inception_1a/output" 536 | } 537 | 538 | ######################### inception_1b ######################### 539 | 540 | layer { 541 | name: "inception_1b/3x3_reduce" 542 | type: "Convolution" 543 | bottom: "inception_1a/output" 544 | top: "inception_1b/3x3_reduce" 545 | param { 546 | lr_mult: 0 547 | decay_mult: 0 548 | } 549 | convolution_param { 550 | num_output: 64 551 | bias_term: false 552 | kernel_size: 1 553 | stride: 1 554 | weight_filler { 555 | type: "xavier" 556 | } 557 | } 558 | } 559 | 560 | layer { 561 | name: "inception_1b/3x3_reduce_bn" 562 | type: "BatchNorm" 563 | bottom: "inception_1b/3x3_reduce" 564 | top: "inception_1b/3x3_reduce" 565 | batch_norm_param { 566 | use_global_stats: true 567 | } 568 | } 569 | 570 | layer { 571 | name: "inception_1b/3x3_reduce_scale" 572 | type: "Scale" 573 | bottom: "inception_1b/3x3_reduce" 574 | top: "inception_1b/3x3_reduce" 575 | scale_param { 576 | bias_term: true 577 | } 578 | } 579 | 580 | layer { 581 | name: "inception_1b/relu_3x3_reduce" 582 | type: "ReLU" 583 | bottom: "inception_1b/3x3_reduce" 584 | top: "inception_1b/3x3_reduce" 585 | } 586 | 587 | layer { 588 | name: "inception_1b/3x3" 589 | type: "Convolution" 590 | bottom: "inception_1b/3x3_reduce" 591 | top: "inception_1b/3x3" 592 | param { 593 | lr_mult: 0 594 | decay_mult: 0 595 | } 596 | convolution_param { 597 | num_output: 64 598 | bias_term: false 599 | pad: 1 600 | kernel_size: 3 601 | stride: 2 602 | weight_filler { 603 | type: "xavier" 604 | } 605 | } 606 | } 607 | 608 | layer { 609 | name: "inception_1b/3x3_bn" 610 | type: "BatchNorm" 611 | bottom: "inception_1b/3x3" 612 | top: "inception_1b/3x3" 613 | batch_norm_param { 614 | use_global_stats: true 615 | } 616 | } 617 | 618 | layer { 619 | name: "inception_1b/3x3_scale" 620 | type: "Scale" 621 | bottom: "inception_1b/3x3" 622 | top: "inception_1b/3x3" 623 | scale_param { 624 | bias_term: true 625 | } 626 | } 627 | 628 | layer { 629 | name: "inception_1b/relu_3x3" 630 | type: "ReLU" 631 | bottom: "inception_1b/3x3" 632 | top: "inception_1b/3x3" 633 | } 634 | 635 | layer { 636 | name: "inception_1b/double_3x3_reduce" 637 | type: "Convolution" 638 | bottom: "inception_1a/output" 639 | top: "inception_1b/double_3x3_reduce" 640 | param { 641 | lr_mult: 0 642 | decay_mult: 0 643 | } 644 | convolution_param { 645 | num_output: 64 646 | bias_term: false 647 | pad: 1 648 | kernel_size: 3 649 | stride: 1 650 | weight_filler { 651 | type: "xavier" 652 | } 653 | } 654 | } 655 | 656 | layer { 657 | name: "inception_1b/double_3x3_reduce_bn" 658 | type: "BatchNorm" 659 | bottom: "inception_1b/double_3x3_reduce" 660 | top: "inception_1b/double_3x3_reduce" 661 | batch_norm_param { 662 | use_global_stats: true 663 | } 664 | } 665 | 666 | layer { 667 | name: "inception_1b/double_3x3_reduce_scale" 668 | type: "Scale" 669 | bottom: "inception_1b/double_3x3_reduce" 670 | top: "inception_1b/double_3x3_reduce" 671 | scale_param { 672 | bias_term: true 673 | } 674 | } 675 | 676 | layer { 677 | name: "inception_1b/relu_double_3x3_reduce" 678 | type: "ReLU" 679 | bottom: "inception_1b/double_3x3_reduce" 680 | top: "inception_1b/double_3x3_reduce" 681 | } 682 | 683 | layer { 684 | name: "inception_1b/double_3x3_1" 685 | type: "Convolution" 686 | bottom: "inception_1b/double_3x3_reduce" 687 | top: "inception_1b/double_3x3_1" 688 | param { 689 | lr_mult: 0 690 | decay_mult: 0 691 | } 692 | convolution_param { 693 | num_output: 64 694 | bias_term: false 695 | kernel_size: 1 696 | stride: 1 697 | weight_filler { 698 | type: "xavier" 699 | } 700 | } 701 | } 702 | 703 | layer { 704 | name: "inception_1b/double_3x3_1_bn" 705 | type: "BatchNorm" 706 | bottom: "inception_1b/double_3x3_1" 707 | top: "inception_1b/double_3x3_1" 708 | batch_norm_param { 709 | use_global_stats: true 710 | } 711 | } 712 | 713 | layer { 714 | name: "inception_1b/double_3x3_1_scale" 715 | type: "Scale" 716 | bottom: "inception_1b/double_3x3_1" 717 | top: "inception_1b/double_3x3_1" 718 | scale_param { 719 | bias_term: true 720 | } 721 | } 722 | 723 | layer { 724 | name: "inception_1b/relu_double_3x3_1" 725 | type: "ReLU" 726 | bottom: "inception_1b/double_3x3_1" 727 | top: "inception_1b/double_3x3_1" 728 | } 729 | 730 | layer { 731 | name: "inception_1b/double_3x3_2" 732 | type: "Convolution" 733 | bottom: "inception_1b/double_3x3_1" 734 | top: "inception_1b/double_3x3_2" 735 | param { 736 | lr_mult: 0 737 | decay_mult: 0 738 | } 739 | convolution_param { 740 | num_output: 64 741 | bias_term: false 742 | pad: 1 743 | kernel_size: 3 744 | stride: 2 745 | weight_filler { 746 | type: "xavier" 747 | } 748 | } 749 | } 750 | 751 | layer { 752 | name: "inception_1b/double_3x3_2_bn" 753 | type: "BatchNorm" 754 | bottom: "inception_1b/double_3x3_2" 755 | top: "inception_1b/double_3x3_2" 756 | batch_norm_param { 757 | use_global_stats: true 758 | } 759 | } 760 | 761 | layer { 762 | name: "inception_1b/double_3x3_2_scale" 763 | type: "Scale" 764 | bottom: "inception_1b/double_3x3_2" 765 | top: "inception_1b/double_3x3_2" 766 | scale_param { 767 | bias_term: true 768 | } 769 | } 770 | 771 | layer { 772 | name: "inception_1b/relu_double_3x3_2" 773 | type: "ReLU" 774 | bottom: "inception_1b/double_3x3_2" 775 | top: "inception_1b/double_3x3_2" 776 | } 777 | 778 | layer { 779 | name: "inception_1b/pool" 780 | type: "Pooling" 781 | bottom: "inception_1a/output" 782 | top: "inception_1b/pool" 783 | pooling_param { 784 | pool: MAX 785 | kernel_size: 3 786 | stride: 2 787 | } 788 | } 789 | 790 | layer { 791 | name: "inception_1b/output" 792 | type: "Concat" 793 | bottom: "inception_1b/3x3" 794 | bottom: "inception_1b/double_3x3_2" 795 | bottom: "inception_1b/pool" 796 | top: "inception_1b/output" 797 | } 798 | 799 | ######################### inception_2a ######################### 800 | 801 | layer { 802 | name: "inception_2a/1x1" 803 | type: "Convolution" 804 | bottom: "inception_1b/output" 805 | top: "inception_2a/1x1" 806 | param { 807 | lr_mult: 0 808 | decay_mult: 0 809 | } 810 | convolution_param { 811 | num_output: 128 812 | bias_term: false 813 | kernel_size: 1 814 | stride: 1 815 | weight_filler { 816 | type: "xavier" 817 | } 818 | } 819 | } 820 | 821 | layer { 822 | name: "inception_2a/1x1_bn" 823 | type: "BatchNorm" 824 | bottom: "inception_2a/1x1" 825 | top: "inception_2a/1x1" 826 | batch_norm_param { 827 | use_global_stats: true 828 | } 829 | } 830 | 831 | layer { 832 | name: "inception_2a/1x1_scale" 833 | type: "Scale" 834 | bottom: "inception_2a/1x1" 835 | top: "inception_2a/1x1" 836 | scale_param { 837 | bias_term: true 838 | } 839 | } 840 | 841 | layer { 842 | name: "inception_2a/relu_1x1" 843 | type: "ReLU" 844 | bottom: "inception_2a/1x1" 845 | top: "inception_2a/1x1" 846 | } 847 | 848 | layer { 849 | name: "inception_2a/3x3_reduce" 850 | type: "Convolution" 851 | bottom: "inception_1b/output" 852 | top: "inception_2a/3x3_reduce" 853 | param { 854 | lr_mult: 0 855 | decay_mult: 0 856 | } 857 | convolution_param { 858 | num_output: 128 859 | bias_term: false 860 | kernel_size: 1 861 | stride: 1 862 | weight_filler { 863 | type: "xavier" 864 | } 865 | } 866 | } 867 | 868 | layer { 869 | name: "inception_2a/3x3_reduce_bn" 870 | type: "BatchNorm" 871 | bottom: "inception_2a/3x3_reduce" 872 | top: "inception_2a/3x3_reduce" 873 | batch_norm_param { 874 | use_global_stats: true 875 | } 876 | } 877 | 878 | layer { 879 | name: "inception_2a/3x3_reduce_scale" 880 | type: "Scale" 881 | bottom: "inception_2a/3x3_reduce" 882 | top: "inception_2a/3x3_reduce" 883 | scale_param { 884 | bias_term: true 885 | } 886 | } 887 | 888 | layer { 889 | name: "inception_2a/relu_3x3_reduce" 890 | type: "ReLU" 891 | bottom: "inception_2a/3x3_reduce" 892 | top: "inception_2a/3x3_reduce" 893 | } 894 | 895 | layer { 896 | name: "inception_2a/3x3" 897 | type: "Convolution" 898 | bottom: "inception_2a/3x3_reduce" 899 | top: "inception_2a/3x3" 900 | param { 901 | lr_mult: 0 902 | decay_mult: 0 903 | } 904 | convolution_param { 905 | num_output: 128 906 | bias_term: false 907 | pad: 1 908 | kernel_size: 3 909 | stride: 1 910 | weight_filler { 911 | type: "xavier" 912 | } 913 | } 914 | } 915 | 916 | layer { 917 | name: "inception_2a/3x3_bn" 918 | type: "BatchNorm" 919 | bottom: "inception_2a/3x3" 920 | top: "inception_2a/3x3" 921 | batch_norm_param { 922 | use_global_stats: true 923 | } 924 | } 925 | 926 | layer { 927 | name: "inception_2a/3x3_scale" 928 | type: "Scale" 929 | bottom: "inception_2a/3x3" 930 | top: "inception_2a/3x3" 931 | scale_param { 932 | bias_term: true 933 | } 934 | } 935 | 936 | layer { 937 | name: "inception_2a/relu_3x3" 938 | type: "ReLU" 939 | bottom: "inception_2a/3x3" 940 | top: "inception_2a/3x3" 941 | } 942 | 943 | layer { 944 | name: "inception_2a/double_3x3_reduce" 945 | type: "Convolution" 946 | bottom: "inception_1b/output" 947 | top: "inception_2a/double_3x3_reduce" 948 | param { 949 | lr_mult: 0 950 | decay_mult: 0 951 | } 952 | convolution_param { 953 | num_output: 128 954 | bias_term: false 955 | kernel_size: 1 956 | stride: 1 957 | weight_filler { 958 | type: "xavier" 959 | } 960 | } 961 | } 962 | 963 | layer { 964 | name: "inception_2a/double_3x3_reduce_bn" 965 | type: "BatchNorm" 966 | bottom: "inception_2a/double_3x3_reduce" 967 | top: "inception_2a/double_3x3_reduce" 968 | batch_norm_param { 969 | use_global_stats: true 970 | } 971 | } 972 | 973 | layer { 974 | name: "inception_2a/double_3x3_reduce_scale" 975 | type: "Scale" 976 | bottom: "inception_2a/double_3x3_reduce" 977 | top: "inception_2a/double_3x3_reduce" 978 | scale_param { 979 | bias_term: true 980 | } 981 | } 982 | 983 | layer { 984 | name: "inception_2a/relu_double_3x3_reduce" 985 | type: "ReLU" 986 | bottom: "inception_2a/double_3x3_reduce" 987 | top: "inception_2a/double_3x3_reduce" 988 | } 989 | 990 | layer { 991 | name: "inception_2a/double_3x3_1" 992 | type: "Convolution" 993 | bottom: "inception_2a/double_3x3_reduce" 994 | top: "inception_2a/double_3x3_1" 995 | param { 996 | lr_mult: 0 997 | decay_mult: 0 998 | } 999 | convolution_param { 1000 | num_output: 128 1001 | bias_term: false 1002 | pad: 1 1003 | kernel_size: 3 1004 | stride: 1 1005 | weight_filler { 1006 | type: "xavier" 1007 | } 1008 | } 1009 | } 1010 | 1011 | layer { 1012 | name: "inception_2a/double_3x3_1_bn" 1013 | type: "BatchNorm" 1014 | bottom: "inception_2a/double_3x3_1" 1015 | top: "inception_2a/double_3x3_1" 1016 | batch_norm_param { 1017 | use_global_stats: true 1018 | } 1019 | } 1020 | 1021 | layer { 1022 | name: "inception_2a/double_3x3_1_scale" 1023 | type: "Scale" 1024 | bottom: "inception_2a/double_3x3_1" 1025 | top: "inception_2a/double_3x3_1" 1026 | scale_param { 1027 | bias_term: true 1028 | } 1029 | } 1030 | 1031 | layer { 1032 | name: "inception_2a/relu_double_3x3_1" 1033 | type: "ReLU" 1034 | bottom: "inception_2a/double_3x3_1" 1035 | top: "inception_2a/double_3x3_1" 1036 | } 1037 | 1038 | layer { 1039 | name: "inception_2a/double_3x3_2" 1040 | type: "Convolution" 1041 | bottom: "inception_2a/double_3x3_1" 1042 | top: "inception_2a/double_3x3_2" 1043 | param { 1044 | lr_mult: 0 1045 | decay_mult: 0 1046 | } 1047 | convolution_param { 1048 | num_output: 128 1049 | bias_term: false 1050 | pad: 1 1051 | kernel_size: 3 1052 | stride: 1 1053 | weight_filler { 1054 | type: "xavier" 1055 | } 1056 | } 1057 | } 1058 | 1059 | layer { 1060 | name: "inception_2a/double_3x3_2_bn" 1061 | type: "BatchNorm" 1062 | bottom: "inception_2a/double_3x3_2" 1063 | top: "inception_2a/double_3x3_2" 1064 | batch_norm_param { 1065 | use_global_stats: true 1066 | } 1067 | } 1068 | 1069 | layer { 1070 | name: "inception_2a/double_3x3_2_scale" 1071 | type: "Scale" 1072 | bottom: "inception_2a/double_3x3_2" 1073 | top: "inception_2a/double_3x3_2" 1074 | scale_param { 1075 | bias_term: true 1076 | } 1077 | } 1078 | 1079 | layer { 1080 | name: "inception_2a/relu_double_3x3_2" 1081 | type: "ReLU" 1082 | bottom: "inception_2a/double_3x3_2" 1083 | top: "inception_2a/double_3x3_2" 1084 | } 1085 | 1086 | layer { 1087 | name: "inception_2a/pool" 1088 | type: "Pooling" 1089 | bottom: "inception_1b/output" 1090 | top: "inception_2a/pool" 1091 | pooling_param { 1092 | pool: AVE 1093 | kernel_size: 3 1094 | stride: 1 1095 | pad: 1 1096 | } 1097 | } 1098 | 1099 | layer { 1100 | name: "inception_2a/pool_proj" 1101 | type: "Convolution" 1102 | bottom: "inception_2a/pool" 1103 | top: "inception_2a/pool_proj" 1104 | param { 1105 | lr_mult: 0 1106 | decay_mult: 0 1107 | } 1108 | convolution_param { 1109 | num_output: 128 1110 | bias_term: false 1111 | kernel_size: 1 1112 | stride: 1 1113 | weight_filler { 1114 | type: "xavier" 1115 | } 1116 | } 1117 | } 1118 | 1119 | layer { 1120 | name: "inception_2a/pool_proj_bn" 1121 | type: "BatchNorm" 1122 | bottom: "inception_2a/pool_proj" 1123 | top: "inception_2a/pool_proj" 1124 | batch_norm_param { 1125 | use_global_stats: true 1126 | } 1127 | } 1128 | 1129 | layer { 1130 | name: "inception_2a/pool_proj_scale" 1131 | type: "Scale" 1132 | bottom: "inception_2a/pool_proj" 1133 | top: "inception_2a/pool_proj" 1134 | scale_param { 1135 | bias_term: true 1136 | } 1137 | } 1138 | 1139 | layer { 1140 | name: "inception_2a/relu_pool_proj" 1141 | type: "ReLU" 1142 | bottom: "inception_2a/pool_proj" 1143 | top: "inception_2a/pool_proj" 1144 | } 1145 | 1146 | layer { 1147 | name: "inception_2a/output" 1148 | type: "Concat" 1149 | bottom: "inception_2a/1x1" 1150 | bottom: "inception_2a/3x3" 1151 | bottom: "inception_2a/double_3x3_2" 1152 | bottom: "inception_2a/pool_proj" 1153 | top: "inception_2a/output" 1154 | } 1155 | 1156 | ########################### inception_2b ######################### 1157 | 1158 | layer { 1159 | name: "inception_2b/3x3_reduce" 1160 | type: "Convolution" 1161 | bottom: "inception_2a/output" 1162 | top: "inception_2b/3x3_reduce" 1163 | param { 1164 | lr_mult: 0 1165 | decay_mult: 0 1166 | } 1167 | convolution_param { 1168 | num_output: 128 1169 | bias_term: false 1170 | kernel_size: 1 1171 | stride: 1 1172 | weight_filler { 1173 | type: "xavier" 1174 | } 1175 | } 1176 | } 1177 | 1178 | layer { 1179 | name: "inception_2b/3x3_reduce_bn" 1180 | type: "BatchNorm" 1181 | bottom: "inception_2b/3x3_reduce" 1182 | top: "inception_2b/3x3_reduce" 1183 | batch_norm_param { 1184 | use_global_stats: true 1185 | } 1186 | } 1187 | 1188 | layer { 1189 | name: "inception_2b/3x3_reduce_scale" 1190 | type: "Scale" 1191 | bottom: "inception_2b/3x3_reduce" 1192 | top: "inception_2b/3x3_reduce" 1193 | scale_param { 1194 | bias_term: true 1195 | } 1196 | } 1197 | 1198 | layer { 1199 | name: "inception_2b/relu_3x3_reduce" 1200 | type: "ReLU" 1201 | bottom: "inception_2b/3x3_reduce" 1202 | top: "inception_2b/3x3_reduce" 1203 | } 1204 | 1205 | layer { 1206 | name: "inception_2b/3x3" 1207 | type: "Convolution" 1208 | bottom: "inception_2b/3x3_reduce" 1209 | top: "inception_2b/3x3" 1210 | param { 1211 | lr_mult: 0 1212 | decay_mult: 0 1213 | } 1214 | convolution_param { 1215 | num_output: 128 1216 | bias_term: false 1217 | pad: 1 1218 | kernel_size: 3 1219 | stride: 2 1220 | weight_filler { 1221 | type: "xavier" 1222 | } 1223 | } 1224 | } 1225 | 1226 | layer { 1227 | name: "inception_2b/3x3_bn" 1228 | type: "BatchNorm" 1229 | bottom: "inception_2b/3x3" 1230 | top: "inception_2b/3x3" 1231 | batch_norm_param { 1232 | use_global_stats: true 1233 | } 1234 | } 1235 | 1236 | layer { 1237 | name: "inception_2b/3x3_scale" 1238 | type: "Scale" 1239 | bottom: "inception_2b/3x3" 1240 | top: "inception_2b/3x3" 1241 | scale_param { 1242 | bias_term: true 1243 | } 1244 | } 1245 | 1246 | layer { 1247 | name: "inception_2b/relu_3x3" 1248 | type: "ReLU" 1249 | bottom: "inception_2b/3x3" 1250 | top: "inception_2b/3x3" 1251 | } 1252 | 1253 | layer { 1254 | name: "inception_2b/double_3x3_reduce" 1255 | type: "Convolution" 1256 | bottom: "inception_2a/output" 1257 | top: "inception_2b/double_3x3_reduce" 1258 | param { 1259 | lr_mult: 0 1260 | decay_mult: 0 1261 | } 1262 | convolution_param { 1263 | num_output: 128 1264 | bias_term: false 1265 | kernel_size: 1 1266 | stride: 1 1267 | weight_filler { 1268 | type: "xavier" 1269 | } 1270 | } 1271 | } 1272 | 1273 | layer { 1274 | name: "inception_2b/double_3x3_reduce_bn" 1275 | type: "BatchNorm" 1276 | bottom: "inception_2b/double_3x3_reduce" 1277 | top: "inception_2b/double_3x3_reduce" 1278 | batch_norm_param { 1279 | use_global_stats: true 1280 | } 1281 | } 1282 | 1283 | layer { 1284 | name: "inception_2b/double_3x3_reduce_scale" 1285 | type: "Scale" 1286 | bottom: "inception_2b/double_3x3_reduce" 1287 | top: "inception_2b/double_3x3_reduce" 1288 | scale_param { 1289 | bias_term: true 1290 | } 1291 | } 1292 | 1293 | layer { 1294 | name: "inception_2b/relu_double_3x3_reduce" 1295 | type: "ReLU" 1296 | bottom: "inception_2b/double_3x3_reduce" 1297 | top: "inception_2b/double_3x3_reduce" 1298 | } 1299 | 1300 | layer { 1301 | name: "inception_2b/double_3x3_1" 1302 | type: "Convolution" 1303 | bottom: "inception_2b/double_3x3_reduce" 1304 | top: "inception_2b/double_3x3_1" 1305 | param { 1306 | lr_mult: 0 1307 | decay_mult: 0 1308 | } 1309 | convolution_param { 1310 | num_output: 128 1311 | bias_term: false 1312 | pad: 1 1313 | kernel_size: 3 1314 | stride: 1 1315 | weight_filler { 1316 | type: "xavier" 1317 | } 1318 | } 1319 | } 1320 | 1321 | layer { 1322 | name: "inception_2b/double_3x3_1_bn" 1323 | type: "BatchNorm" 1324 | bottom: "inception_2b/double_3x3_1" 1325 | top: "inception_2b/double_3x3_1" 1326 | batch_norm_param { 1327 | use_global_stats: true 1328 | } 1329 | } 1330 | 1331 | layer { 1332 | name: "inception_2b/double_3x3_1_scale" 1333 | type: "Scale" 1334 | bottom: "inception_2b/double_3x3_1" 1335 | top: "inception_2b/double_3x3_1" 1336 | scale_param { 1337 | bias_term: true 1338 | } 1339 | } 1340 | 1341 | layer { 1342 | name: "inception_2b/relu_double_3x3_1" 1343 | type: "ReLU" 1344 | bottom: "inception_2b/double_3x3_1" 1345 | top: "inception_2b/double_3x3_1" 1346 | } 1347 | 1348 | layer { 1349 | name: "inception_2b/double_3x3_2" 1350 | type: "Convolution" 1351 | bottom: "inception_2b/double_3x3_1" 1352 | top: "inception_2b/double_3x3_2" 1353 | param { 1354 | lr_mult: 0 1355 | decay_mult: 0 1356 | } 1357 | convolution_param { 1358 | num_output: 128 1359 | bias_term: false 1360 | pad: 1 1361 | kernel_size: 3 1362 | stride: 2 1363 | weight_filler { 1364 | type: "xavier" 1365 | } 1366 | } 1367 | } 1368 | 1369 | layer { 1370 | name: "inception_2b/double_3x3_2_bn" 1371 | type: "BatchNorm" 1372 | bottom: "inception_2b/double_3x3_2" 1373 | top: "inception_2b/double_3x3_2" 1374 | batch_norm_param { 1375 | use_global_stats: true 1376 | } 1377 | } 1378 | 1379 | layer { 1380 | name: "inception_2b/double_3x3_2_scale" 1381 | type: "Scale" 1382 | bottom: "inception_2b/double_3x3_2" 1383 | top: "inception_2b/double_3x3_2" 1384 | scale_param { 1385 | bias_term: true 1386 | } 1387 | } 1388 | 1389 | layer { 1390 | name: "inception_2b/relu_double_3x3_2" 1391 | type: "ReLU" 1392 | bottom: "inception_2b/double_3x3_2" 1393 | top: "inception_2b/double_3x3_2" 1394 | } 1395 | 1396 | layer { 1397 | name: "inception_2b/pool" 1398 | type: "Pooling" 1399 | bottom: "inception_2a/output" 1400 | top: "inception_2b/pool" 1401 | pooling_param { 1402 | pool: MAX 1403 | kernel_size: 3 1404 | stride: 2 1405 | } 1406 | } 1407 | 1408 | layer { 1409 | name: "inception_2b/output" 1410 | type: "Concat" 1411 | bottom: "inception_2b/3x3" 1412 | bottom: "inception_2b/double_3x3_2" 1413 | bottom: "inception_2b/pool" 1414 | top: "inception_2b/output" 1415 | } 1416 | 1417 | layer { 1418 | name: "inception_2b/output/larm" 1419 | bottom: "inception_2b/output" 1420 | bottom: "larmbox" 1421 | top: "inception_2b/output/larm" 1422 | type: "ROIPooling" 1423 | roi_pooling_param { 1424 | pooled_w: 12 1425 | pooled_h: 12 1426 | spatial_scale: 0.125 #(1/8) 1427 | } 1428 | } 1429 | 1430 | ######################### inception_3a ######################### 1431 | 1432 | layer { 1433 | name: "inception_3a/1x1/larm" 1434 | type: "Convolution" 1435 | bottom: "inception_2b/output/larm" 1436 | top: "inception_3a/1x1/larm" 1437 | param { 1438 | lr_mult: 1 1439 | decay_mult: 1 1440 | } 1441 | convolution_param { 1442 | num_output: 256 1443 | bias_term: false 1444 | kernel_size: 1 1445 | stride: 1 1446 | weight_filler { 1447 | type: "xavier" 1448 | } 1449 | } 1450 | } 1451 | 1452 | layer { 1453 | name: "inception_3a/1x1_bn/larm" 1454 | type: "BatchNorm" 1455 | bottom: "inception_3a/1x1/larm" 1456 | top: "inception_3a/1x1/larm" 1457 | batch_norm_param { 1458 | use_global_stats: true 1459 | } 1460 | } 1461 | 1462 | layer { 1463 | name: "inception_3a/1x1_scale/larm" 1464 | type: "Scale" 1465 | bottom: "inception_3a/1x1/larm" 1466 | top: "inception_3a/1x1/larm" 1467 | scale_param { 1468 | bias_term: true 1469 | } 1470 | } 1471 | 1472 | layer { 1473 | name: "inception_3a/relu_1x1/larm" 1474 | type: "ReLU" 1475 | bottom: "inception_3a/1x1/larm" 1476 | top: "inception_3a/1x1/larm" 1477 | } 1478 | 1479 | layer { 1480 | name: "inception_3a/3x3_reduce/larm" 1481 | type: "Convolution" 1482 | bottom: "inception_2b/output/larm" 1483 | top: "inception_3a/3x3_reduce/larm" 1484 | param { 1485 | lr_mult: 1 1486 | decay_mult: 1 1487 | } 1488 | convolution_param { 1489 | num_output: 256 1490 | bias_term: false 1491 | kernel_size: 1 1492 | stride: 1 1493 | weight_filler { 1494 | type: "xavier" 1495 | } 1496 | } 1497 | } 1498 | 1499 | layer { 1500 | name: "inception_3a/3x3_reduce_bn/larm" 1501 | type: "BatchNorm" 1502 | bottom: "inception_3a/3x3_reduce/larm" 1503 | top: "inception_3a/3x3_reduce/larm" 1504 | batch_norm_param { 1505 | use_global_stats: true 1506 | } 1507 | } 1508 | 1509 | layer { 1510 | name: "inception_3a/3x3_reduce_scale/larm" 1511 | type: "Scale" 1512 | bottom: "inception_3a/3x3_reduce/larm" 1513 | top: "inception_3a/3x3_reduce/larm" 1514 | scale_param { 1515 | bias_term: true 1516 | } 1517 | } 1518 | 1519 | layer { 1520 | name: "inception_3a/relu_3x3_reduce/larm" 1521 | type: "ReLU" 1522 | bottom: "inception_3a/3x3_reduce/larm" 1523 | top: "inception_3a/3x3_reduce/larm" 1524 | } 1525 | 1526 | layer { 1527 | name: "inception_3a/3x3/larm" 1528 | type: "Convolution" 1529 | bottom: "inception_3a/3x3_reduce/larm" 1530 | top: "inception_3a/3x3/larm" 1531 | param { 1532 | lr_mult: 1 1533 | decay_mult: 1 1534 | } 1535 | convolution_param { 1536 | num_output: 256 1537 | bias_term: false 1538 | pad: 1 1539 | kernel_size: 3 1540 | stride: 1 1541 | weight_filler { 1542 | type: "xavier" 1543 | } 1544 | } 1545 | } 1546 | 1547 | layer { 1548 | name: "inception_3a/3x3_bn/larm" 1549 | type: "BatchNorm" 1550 | bottom: "inception_3a/3x3/larm" 1551 | top: "inception_3a/3x3/larm" 1552 | batch_norm_param { 1553 | use_global_stats: true 1554 | } 1555 | } 1556 | 1557 | layer { 1558 | name: "inception_3a/3x3_scale/larm" 1559 | type: "Scale" 1560 | bottom: "inception_3a/3x3/larm" 1561 | top: "inception_3a/3x3/larm" 1562 | scale_param { 1563 | bias_term: true 1564 | } 1565 | } 1566 | 1567 | layer { 1568 | name: "inception_3a/relu_3x3/larm" 1569 | type: "ReLU" 1570 | bottom: "inception_3a/3x3/larm" 1571 | top: "inception_3a/3x3/larm" 1572 | } 1573 | 1574 | layer { 1575 | name: "inception_3a/double_3x3_reduce/larm" 1576 | type: "Convolution" 1577 | bottom: "inception_2b/output/larm" 1578 | top: "inception_3a/double_3x3_reduce/larm" 1579 | param { 1580 | lr_mult: 1 1581 | decay_mult: 1 1582 | } 1583 | convolution_param { 1584 | num_output: 256 1585 | bias_term: false 1586 | kernel_size: 1 1587 | stride: 1 1588 | weight_filler { 1589 | type: "xavier" 1590 | } 1591 | } 1592 | } 1593 | 1594 | layer { 1595 | name: "inception_3a/double_3x3_reduce_bn/larm" 1596 | type: "BatchNorm" 1597 | bottom: "inception_3a/double_3x3_reduce/larm" 1598 | top: "inception_3a/double_3x3_reduce/larm" 1599 | batch_norm_param { 1600 | use_global_stats: true 1601 | } 1602 | } 1603 | 1604 | layer { 1605 | name: "inception_3a/double_3x3_reduce_scale/larm" 1606 | type: "Scale" 1607 | bottom: "inception_3a/double_3x3_reduce/larm" 1608 | top: "inception_3a/double_3x3_reduce/larm" 1609 | scale_param { 1610 | bias_term: true 1611 | } 1612 | } 1613 | 1614 | layer { 1615 | name: "inception_3a/relu_double_3x3_reduce/larm" 1616 | type: "ReLU" 1617 | bottom: "inception_3a/double_3x3_reduce/larm" 1618 | top: "inception_3a/double_3x3_reduce/larm" 1619 | } 1620 | 1621 | layer { 1622 | name: "inception_3a/double_3x3_1/larm" 1623 | type: "Convolution" 1624 | bottom: "inception_3a/double_3x3_reduce/larm" 1625 | top: "inception_3a/double_3x3_1/larm" 1626 | param { 1627 | lr_mult: 1 1628 | decay_mult: 1 1629 | } 1630 | convolution_param { 1631 | num_output: 256 1632 | bias_term: false 1633 | pad: 1 1634 | kernel_size: 3 1635 | stride: 1 1636 | weight_filler { 1637 | type: "xavier" 1638 | } 1639 | } 1640 | } 1641 | 1642 | layer { 1643 | name: "inception_3a/double_3x3_1_bn/larm" 1644 | type: "BatchNorm" 1645 | bottom: "inception_3a/double_3x3_1/larm" 1646 | top: "inception_3a/double_3x3_1/larm" 1647 | batch_norm_param { 1648 | use_global_stats: true 1649 | } 1650 | } 1651 | 1652 | layer { 1653 | name: "inception_3a/double_3x3_1_scale/larm" 1654 | type: "Scale" 1655 | bottom: "inception_3a/double_3x3_1/larm" 1656 | top: "inception_3a/double_3x3_1/larm" 1657 | scale_param { 1658 | bias_term: true 1659 | } 1660 | } 1661 | 1662 | layer { 1663 | name: "inception_3a/relu_double_3x3_1/larm" 1664 | type: "ReLU" 1665 | bottom: "inception_3a/double_3x3_1/larm" 1666 | top: "inception_3a/double_3x3_1/larm" 1667 | } 1668 | 1669 | layer { 1670 | name: "inception_3a/double_3x3_2/larm" 1671 | type: "Convolution" 1672 | bottom: "inception_3a/double_3x3_1/larm" 1673 | top: "inception_3a/double_3x3_2/larm" 1674 | param { 1675 | lr_mult: 1 1676 | decay_mult: 1 1677 | } 1678 | convolution_param { 1679 | num_output: 256 1680 | bias_term: false 1681 | pad: 1 1682 | kernel_size: 3 1683 | stride: 1 1684 | weight_filler { 1685 | type: "xavier" 1686 | } 1687 | } 1688 | } 1689 | 1690 | layer { 1691 | name: "inception_3a/double_3x3_2_bn/larm" 1692 | type: "BatchNorm" 1693 | bottom: "inception_3a/double_3x3_2/larm" 1694 | top: "inception_3a/double_3x3_2/larm" 1695 | batch_norm_param { 1696 | use_global_stats: true 1697 | } 1698 | } 1699 | 1700 | layer { 1701 | name: "inception_3a/double_3x3_2_scale/larm" 1702 | type: "Scale" 1703 | bottom: "inception_3a/double_3x3_2/larm" 1704 | top: "inception_3a/double_3x3_2/larm" 1705 | scale_param { 1706 | bias_term: true 1707 | } 1708 | } 1709 | 1710 | layer { 1711 | name: "inception_3a/relu_double_3x3_2/larm" 1712 | type: "ReLU" 1713 | bottom: "inception_3a/double_3x3_2/larm" 1714 | top: "inception_3a/double_3x3_2/larm" 1715 | } 1716 | 1717 | layer { 1718 | name: "inception_3a/pool/larm" 1719 | type: "Pooling" 1720 | bottom: "inception_2b/output/larm" 1721 | top: "inception_3a/pool/larm" 1722 | pooling_param { 1723 | pool: AVE 1724 | kernel_size: 3 1725 | stride: 1 1726 | pad: 1 1727 | } 1728 | } 1729 | 1730 | layer { 1731 | name: "inception_3a/pool_proj/larm" 1732 | type: "Convolution" 1733 | bottom: "inception_3a/pool/larm" 1734 | top: "inception_3a/pool_proj/larm" 1735 | param { 1736 | lr_mult: 1 1737 | decay_mult: 1 1738 | } 1739 | convolution_param { 1740 | num_output: 256 1741 | bias_term: false 1742 | kernel_size: 1 1743 | stride: 1 1744 | weight_filler { 1745 | type: "xavier" 1746 | } 1747 | } 1748 | } 1749 | 1750 | layer { 1751 | name: "inception_3a/pool_proj_bn/larm" 1752 | type: "BatchNorm" 1753 | bottom: "inception_3a/pool_proj/larm" 1754 | top: "inception_3a/pool_proj/larm" 1755 | batch_norm_param { 1756 | use_global_stats: true 1757 | } 1758 | } 1759 | 1760 | layer { 1761 | name: "inception_3a/pool_proj_scale/larm" 1762 | type: "Scale" 1763 | bottom: "inception_3a/pool_proj/larm" 1764 | top: "inception_3a/pool_proj/larm" 1765 | scale_param { 1766 | bias_term: true 1767 | } 1768 | } 1769 | 1770 | layer { 1771 | name: "inception_3a/relu_pool_proj/larm" 1772 | type: "ReLU" 1773 | bottom: "inception_3a/pool_proj/larm" 1774 | top: "inception_3a/pool_proj/larm" 1775 | } 1776 | 1777 | layer { 1778 | name: "inception_3a/output/larm" 1779 | type: "Concat" 1780 | bottom: "inception_3a/1x1/larm" 1781 | bottom: "inception_3a/3x3/larm" 1782 | bottom: "inception_3a/double_3x3_2/larm" 1783 | bottom: "inception_3a/pool_proj/larm" 1784 | top: "inception_3a/output/larm" 1785 | } 1786 | 1787 | ######################### inception_3b ######################### 1788 | 1789 | layer { 1790 | name: "inception_3b/3x3_reduce/larm" 1791 | type: "Convolution" 1792 | bottom: "inception_3a/output/larm" 1793 | top: "inception_3b/3x3_reduce/larm" 1794 | param { 1795 | lr_mult: 1 1796 | decay_mult: 1 1797 | } 1798 | convolution_param { 1799 | num_output: 256 1800 | bias_term: false 1801 | kernel_size: 1 1802 | stride: 1 1803 | weight_filler { 1804 | type: "xavier" 1805 | } 1806 | } 1807 | } 1808 | 1809 | layer { 1810 | name: "inception_3b/3x3_reduce_bn/larm" 1811 | type: "BatchNorm" 1812 | bottom: "inception_3b/3x3_reduce/larm" 1813 | top: "inception_3b/3x3_reduce/larm" 1814 | batch_norm_param { 1815 | use_global_stats: true 1816 | } 1817 | } 1818 | 1819 | layer { 1820 | name: "inception_3b/3x3_reduce_scale/larm" 1821 | type: "Scale" 1822 | bottom: "inception_3b/3x3_reduce/larm" 1823 | top: "inception_3b/3x3_reduce/larm" 1824 | scale_param { 1825 | bias_term: true 1826 | } 1827 | } 1828 | 1829 | layer { 1830 | name: "inception_3b/relu_3x3_reduce/larm" 1831 | type: "ReLU" 1832 | bottom: "inception_3b/3x3_reduce/larm" 1833 | top: "inception_3b/3x3_reduce/larm" 1834 | } 1835 | 1836 | layer { 1837 | name: "inception_3b/3x3/larm" 1838 | type: "Convolution" 1839 | bottom: "inception_3b/3x3_reduce/larm" 1840 | top: "inception_3b/3x3/larm" 1841 | param { 1842 | lr_mult: 1 1843 | decay_mult: 1 1844 | } 1845 | convolution_param { 1846 | num_output: 256 1847 | bias_term: false 1848 | pad: 1 1849 | kernel_size: 3 1850 | stride: 2 1851 | weight_filler { 1852 | type: "xavier" 1853 | } 1854 | } 1855 | } 1856 | 1857 | layer { 1858 | name: "inception_3b/3x3_bn/larm" 1859 | type: "BatchNorm" 1860 | bottom: "inception_3b/3x3/larm" 1861 | top: "inception_3b/3x3/larm" 1862 | batch_norm_param { 1863 | use_global_stats: true 1864 | } 1865 | } 1866 | 1867 | layer { 1868 | name: "inception_3b/3x3_scale/larm" 1869 | type: "Scale" 1870 | bottom: "inception_3b/3x3/larm" 1871 | top: "inception_3b/3x3/larm" 1872 | scale_param { 1873 | bias_term: true 1874 | } 1875 | } 1876 | 1877 | layer { 1878 | name: "inception_3b/relu_3x3/larm" 1879 | type: "ReLU" 1880 | bottom: "inception_3b/3x3/larm" 1881 | top: "inception_3b/3x3/larm" 1882 | } 1883 | 1884 | layer { 1885 | name: "inception_3b/double_3x3_reduce/larm" 1886 | type: "Convolution" 1887 | bottom: "inception_3a/output/larm" 1888 | top: "inception_3b/double_3x3_reduce/larm" 1889 | param { 1890 | lr_mult: 1 1891 | decay_mult: 1 1892 | } 1893 | convolution_param { 1894 | num_output: 256 1895 | bias_term: false 1896 | kernel_size: 1 1897 | stride: 1 1898 | weight_filler { 1899 | type: "xavier" 1900 | } 1901 | } 1902 | } 1903 | 1904 | layer { 1905 | name: "inception_3b/double_3x3_reduce_bn/larm" 1906 | type: "BatchNorm" 1907 | bottom: "inception_3b/double_3x3_reduce/larm" 1908 | top: "inception_3b/double_3x3_reduce/larm" 1909 | batch_norm_param { 1910 | use_global_stats: true 1911 | } 1912 | } 1913 | 1914 | layer { 1915 | name: "inception_3b/double_3x3_reduce_scale/larm" 1916 | type: "Scale" 1917 | bottom: "inception_3b/double_3x3_reduce/larm" 1918 | top: "inception_3b/double_3x3_reduce/larm" 1919 | scale_param { 1920 | bias_term: true 1921 | } 1922 | } 1923 | 1924 | layer { 1925 | name: "inception_3b/relu_double_3x3_reduce/larm" 1926 | type: "ReLU" 1927 | bottom: "inception_3b/double_3x3_reduce/larm" 1928 | top: "inception_3b/double_3x3_reduce/larm" 1929 | } 1930 | 1931 | layer { 1932 | name: "inception_3b/double_3x3_1/larm" 1933 | type: "Convolution" 1934 | bottom: "inception_3b/double_3x3_reduce/larm" 1935 | top: "inception_3b/double_3x3_1/larm" 1936 | param { 1937 | lr_mult: 1 1938 | decay_mult: 1 1939 | } 1940 | convolution_param { 1941 | num_output: 256 1942 | bias_term: false 1943 | pad: 1 1944 | kernel_size: 3 1945 | stride: 1 1946 | weight_filler { 1947 | type: "xavier" 1948 | } 1949 | } 1950 | } 1951 | 1952 | layer { 1953 | name: "inception_3b/double_3x3_1_bn/larm" 1954 | type: "BatchNorm" 1955 | bottom: "inception_3b/double_3x3_1/larm" 1956 | top: "inception_3b/double_3x3_1/larm" 1957 | batch_norm_param { 1958 | use_global_stats: true 1959 | } 1960 | } 1961 | 1962 | layer { 1963 | name: "inception_3b/double_3x3_1_scale/larm" 1964 | type: "Scale" 1965 | bottom: "inception_3b/double_3x3_1/larm" 1966 | top: "inception_3b/double_3x3_1/larm" 1967 | scale_param { 1968 | bias_term: true 1969 | } 1970 | } 1971 | 1972 | layer { 1973 | name: "inception_3b/relu_double_3x3_1/larm" 1974 | type: "ReLU" 1975 | bottom: "inception_3b/double_3x3_1/larm" 1976 | top: "inception_3b/double_3x3_1/larm" 1977 | } 1978 | 1979 | layer { 1980 | name: "inception_3b/double_3x3_2/larm" 1981 | type: "Convolution" 1982 | bottom: "inception_3b/double_3x3_1/larm" 1983 | top: "inception_3b/double_3x3_2/larm" 1984 | param { 1985 | lr_mult: 1 1986 | decay_mult: 1 1987 | } 1988 | convolution_param { 1989 | num_output: 256 1990 | bias_term: false 1991 | pad: 1 1992 | kernel_size: 3 1993 | stride: 2 1994 | weight_filler { 1995 | type: "xavier" 1996 | } 1997 | } 1998 | } 1999 | 2000 | layer { 2001 | name: "inception_3b/double_3x3_2_bn/larm" 2002 | type: "BatchNorm" 2003 | bottom: "inception_3b/double_3x3_2/larm" 2004 | top: "inception_3b/double_3x3_2/larm" 2005 | batch_norm_param { 2006 | use_global_stats: true 2007 | } 2008 | } 2009 | 2010 | layer { 2011 | name: "inception_3b/double_3x3_2_scale/larm" 2012 | type: "Scale" 2013 | bottom: "inception_3b/double_3x3_2/larm" 2014 | top: "inception_3b/double_3x3_2/larm" 2015 | scale_param { 2016 | bias_term: true 2017 | } 2018 | } 2019 | 2020 | layer { 2021 | name: "inception_3b/relu_double_3x3_2/larm" 2022 | type: "ReLU" 2023 | bottom: "inception_3b/double_3x3_2/larm" 2024 | top: "inception_3b/double_3x3_2/larm" 2025 | } 2026 | 2027 | layer { 2028 | name: "inception_3b/pool/larm" 2029 | type: "Pooling" 2030 | bottom: "inception_3a/output/larm" 2031 | top: "inception_3b/pool/larm" 2032 | pooling_param { 2033 | pool: MAX 2034 | kernel_size: 3 2035 | stride: 2 2036 | } 2037 | } 2038 | 2039 | layer { 2040 | name: "inception_3b/output/larm" 2041 | type: "Concat" 2042 | bottom: "inception_3b/3x3/larm" 2043 | bottom: "inception_3b/double_3x3_2/larm" 2044 | bottom: "inception_3b/pool/larm" 2045 | top: "inception_3b/output/larm" 2046 | } 2047 | 2048 | ######################### global pool ######################### 2049 | 2050 | layer { 2051 | name: "global_pool/larm" 2052 | top: "global_pool/larm" 2053 | bottom: "inception_3b/output/larm" 2054 | type: "Pooling" 2055 | pooling_param { 2056 | pool: AVE 2057 | kernel_h: 6 2058 | kernel_w: 6 2059 | stride: 1 2060 | } 2061 | } 2062 | 2063 | ######################### fc ######################### 2064 | 2065 | layer { 2066 | name: "fc7/larm" 2067 | type: "InnerProduct" 2068 | bottom: "global_pool/larm" 2069 | top: "fc7/larm" 2070 | param { 2071 | lr_mult: 1 2072 | decay_mult: 1 2073 | } 2074 | param { 2075 | lr_mult: 2 2076 | decay_mult: 0 2077 | } 2078 | inner_product_param { 2079 | num_output: 256 2080 | weight_filler { 2081 | type: "gaussian" 2082 | std: 0.001 2083 | } 2084 | bias_filler { 2085 | type: "constant" 2086 | value: 0 2087 | } 2088 | } 2089 | } 2090 | 2091 | layer { 2092 | name: "fc7_bn/larm" 2093 | type: "BatchNorm" 2094 | bottom: "fc7/larm" 2095 | top: "fc7/larm" 2096 | batch_norm_param { 2097 | use_global_stats: true 2098 | } 2099 | } 2100 | 2101 | layer { 2102 | name: "fc7_scale/larm" 2103 | type: "Scale" 2104 | bottom: "fc7/larm" 2105 | top: "fc7/larm" 2106 | scale_param { 2107 | bias_term: true 2108 | } 2109 | } 2110 | 2111 | layer { 2112 | name: "silence" 2113 | type: "Silence" 2114 | bottom: "headbox" 2115 | bottom: "bodybox" 2116 | bottom: "legbox" 2117 | bottom: "rarmbox" 2118 | bottom: "rlegbox" 2119 | bottom: "llegbox" 2120 | } 2121 | 2122 | -------------------------------------------------------------------------------- /models/leg/leg_solver.prototxt: -------------------------------------------------------------------------------- 1 | net: "models/leg/leg_trainval.prototxt" 2 | test_iter: 1192 3 | test_interval: 1000 4 | test_initialization: false 5 | 6 | display: 20 7 | average_loss: 20 8 | iter_size: 2 9 | 10 | lr_policy: "multistep" 11 | base_lr: 0.1 12 | gamma: 0.5 13 | stepvalue: 25000 14 | stepvalue: 35000 15 | stepvalue: 40000 16 | stepvalue: 45000 17 | stepvalue: 50000 18 | stepvalue: 55000 19 | stepvalue: 60000 20 | stepvalue: 65000 21 | max_iter: 70000 22 | 23 | momentum: 0.9 24 | weight_decay: 0.0005 25 | 26 | snapshot: 10000 27 | snapshot_prefix: "external/exp/snapshots/leg/leg" 28 | solver_mode: GPU 29 | 30 | -------------------------------------------------------------------------------- /models/lleg/lleg_solver.prototxt: -------------------------------------------------------------------------------- 1 | net: "models/lleg/lleg_trainval.prototxt" 2 | test_iter: 1192 3 | test_interval: 1000 4 | test_initialization: false 5 | 6 | display: 20 7 | average_loss: 20 8 | iter_size: 2 9 | 10 | lr_policy: "multistep" 11 | base_lr: 0.1 12 | gamma: 0.5 13 | stepvalue: 25000 14 | stepvalue: 35000 15 | stepvalue: 40000 16 | stepvalue: 45000 17 | stepvalue: 50000 18 | stepvalue: 55000 19 | stepvalue: 60000 20 | stepvalue: 65000 21 | max_iter: 70000 22 | 23 | momentum: 0.9 24 | weight_decay: 0.0005 25 | 26 | snapshot: 10000 27 | snapshot_prefix: "external/exp/snapshots/lleg/lleg" 28 | solver_mode: GPU 29 | 30 | -------------------------------------------------------------------------------- /models/new/new_solver.prototxt: -------------------------------------------------------------------------------- 1 | net: "models/new/new_trainval.prototxt" 2 | 3 | display: 1 4 | iter_size: 1 5 | 6 | lr_policy: "multistep" 7 | base_lr: 0.1 8 | gamma: 0.5 9 | max_iter: 1 10 | 11 | momentum: 0.9 12 | weight_decay: 0.0005 13 | 14 | snapshot: 10000 15 | snapshot_prefix: "external/exp/snapshots/new/new" 16 | solver_mode: GPU 17 | 18 | -------------------------------------------------------------------------------- /models/new/new_trainval.prototxt: -------------------------------------------------------------------------------- 1 | name: "new" 2 | 3 | layer { 4 | name: "data" 5 | type: "Python" 6 | top: "data" 7 | top: "label" 8 | top: "headbox" 9 | top: "bodybox" 10 | top: "legbox" 11 | top: "rarmbox" 12 | top: "larmbox" 13 | top: "rlegbox" 14 | top: "llegbox" 15 | python_param { 16 | module: "roi_data_layer" 17 | layer: "RoiDataLayer" 18 | param_str: "{'source': 'external/exp/datalists/jstl_10/test.txt', 'root_folder': '', 'batch_size': 1, 'new_height': 96, 'new_width': 96, 'shuffle': False, 'mirror': False, 'mean_value': [103.939,116.779,123.68], 'region_num': 7, 'region_scale': True}" 19 | } 20 | } 21 | 22 | layer { 23 | name: "silence" 24 | type: "Silence" 25 | bottom: "data" 26 | bottom: "label" 27 | } 28 | 29 | -------------------------------------------------------------------------------- /models/rarm/rarm_solver.prototxt: -------------------------------------------------------------------------------- 1 | net: "models/rarm/rarm_trainval.prototxt" 2 | test_iter: 1192 3 | test_interval: 1000 4 | test_initialization: false 5 | 6 | display: 20 7 | average_loss: 20 8 | iter_size: 2 9 | 10 | lr_policy: "multistep" 11 | base_lr: 0.1 12 | gamma: 0.5 13 | stepvalue: 25000 14 | stepvalue: 35000 15 | stepvalue: 40000 16 | stepvalue: 45000 17 | stepvalue: 50000 18 | stepvalue: 55000 19 | stepvalue: 60000 20 | stepvalue: 65000 21 | max_iter: 70000 22 | 23 | momentum: 0.9 24 | weight_decay: 0.0005 25 | 26 | snapshot: 10000 27 | snapshot_prefix: "external/exp/snapshots/rarm/rarm" 28 | solver_mode: GPU 29 | 30 | -------------------------------------------------------------------------------- /models/rleg/rleg_solver.prototxt: -------------------------------------------------------------------------------- 1 | net: "models/rleg/rleg_trainval.prototxt" 2 | test_iter: 1192 3 | test_interval: 1000 4 | test_initialization: false 5 | 6 | display: 20 7 | average_loss: 20 8 | iter_size: 2 9 | 10 | lr_policy: "multistep" 11 | base_lr: 0.1 12 | gamma: 0.5 13 | stepvalue: 25000 14 | stepvalue: 35000 15 | stepvalue: 40000 16 | stepvalue: 45000 17 | stepvalue: 50000 18 | stepvalue: 55000 19 | stepvalue: 60000 20 | stepvalue: 65000 21 | max_iter: 70000 22 | 23 | momentum: 0.9 24 | weight_decay: 0.0005 25 | 26 | snapshot: 10000 27 | snapshot_prefix: "external/exp/snapshots/rleg/rleg" 28 | solver_mode: GPU 29 | 30 | -------------------------------------------------------------------------------- /models/spindlenet/spindlenet_solver.prototxt: -------------------------------------------------------------------------------- 1 | net: "models/spindlenet/spindlenet_trainval.prototxt" 2 | 3 | display: 20 4 | average_loss: 20 5 | iter_size: 2 6 | 7 | lr_policy: "multistep" 8 | base_lr: 0.005 9 | gamma: 0.5 10 | stepvalue: 30000 11 | stepvalue: 40000 12 | max_iter: 50000 13 | 14 | momentum: 0.9 15 | weight_decay: 0.0005 16 | 17 | snapshot: 10000 18 | snapshot_prefix: "external/exp/snapshots/spindlenet/spindlenet" 19 | solver_mode: GPU 20 | 21 | -------------------------------------------------------------------------------- /scripts/format_rawdata.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | cd $(dirname ${BASH_SOURCE[0]})/../ 4 | 5 | RAW=external/raw_data 6 | EXP=external/exp 7 | 8 | echo "Formatting CUHK03 ..." 9 | if [ ! -d "$RAW/cuhk03_release" ]; then 10 | unzip -q -d $RAW/ $RAW/cuhk03_release.zip 11 | fi 12 | # Save the matfile in the v7 format to fast computation 13 | cd $RAW/cuhk03_release 14 | matlab -nodisplay -nojvm -nosplash -r "load('cuhk-03.mat'); save('cuhk-03.mat', 'detected', 'labeled', 'testsets', '-v7'); exit;" 15 | cd - 16 | python data/format_cuhk03.py $RAW/cuhk03_release $EXP/datasets/cuhk03 17 | 18 | echo "Formatting CUHK01 ..." 19 | if [ ! -d "$RAW/cuhk01" ]; then 20 | unzip -q -d $RAW/cuhk01/ $RAW/CUHK01.zip 21 | fi 22 | python data/format_cuhk01.py $RAW/cuhk01 $EXP/datasets/cuhk01 23 | 24 | echo "Formatting PRID ..." 25 | if [ ! -d "$RAW/prid" ]; then 26 | unzip -q -d $RAW/prid/ $RAW/prid_2011.zip 27 | fi 28 | python data/format_prid.py $RAW/prid $EXP/datasets/prid 29 | 30 | echo "Formatting VIPeR ..." 31 | if [ ! -d "$RAW/VIPeR" ]; then 32 | unzip -q -d $RAW/ $RAW/VIPeR.v1.0.zip 33 | fi 34 | python data/format_viper.py $RAW/VIPeR $EXP/datasets/viper 35 | 36 | echo "Formatting 3DPeS ..." 37 | if [ ! -d "$RAW/3DPeS" ]; then 38 | unzip -q -d $RAW/ $RAW/3DPeS_ReId_Snap.zip 39 | fi 40 | python data/format_3dpes.py $RAW/3DPeS $EXP/datasets/3dpes 41 | 42 | echo "Formatting i-LIDS ..." 43 | if [ ! -d "$RAW/i-LIDS" ]; then 44 | tar -xf $RAW/i-LIDS.tar.gz -C $RAW/ 45 | fi 46 | python data/format_ilids.py $RAW/i-LIDS $EXP/datasets/ilids 47 | 48 | echo "Formatting Shinpuhkan ..." 49 | if [ ! -d "$RAW/Shinpuhkan2014dataset" ]; then 50 | unzip -q -d $RAW/ $RAW/Shinpuhkan2014dataset.zip 51 | fi 52 | python data/format_shinpuhkan.py $RAW/Shinpuhkan2014dataset $EXP/datasets/shinpuhkan 53 | 54 | echo "Formatting CUHK02 ..." 55 | if [ ! -d "$RAW/cuhk02" ]; then 56 | tar -xf $RAW/cuhk02.tar.gz -C $RAW/ 57 | fi 58 | python data/format_cuhk02.py $RAW/cuhk02 $EXP/datasets/cuhk02 59 | 60 | echo "Formatting psdb..." 61 | if [ ! -d "$RAW/psdb" ]; then 62 | tar -xf $RAW/psdb.tar -C $RAW/ 63 | fi 64 | # Save the matfile in the v7 format to fast computation 65 | cd $RAW/psdb 66 | matlab -nodisplay -nojvm -nosplash -r "load('person.mat'); save('person.mat', 'person', '-v7'); exit;" 67 | cd - 68 | python data/format_psdb.py $RAW/psdb $EXP/datasets/psdb 69 | 70 | echo "Formatting Market-1501..." 71 | if [ ! -d "$RAW/Market-1501-v15.09.15" ]; then 72 | unzip -q -d $RAW/ $RAW/Market-1501-v15.09.15.zip 73 | fi 74 | python data/format_market1501.py $RAW/Market-1501-v15.09.15 $EXP/datasets/market1501 75 | 76 | echo "Formatting SenseReID.zip..." 77 | if [ ! -d "$RAW/SenseReID" ]; then 78 | unzip -q -d $RAW/ $RAW/SenseReID.zip 79 | fi 80 | 81 | -------------------------------------------------------------------------------- /scripts/gen_proposal_datalist.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # Change to the project root directory. Assume this file is at scripts/. 3 | cd $(dirname ${BASH_SOURCE[0]})/../ 4 | 5 | EXP_DIR=external/exp 6 | DATALISTS_DIR=${EXP_DIR}/datalists 7 | 8 | echo "-----jstl_10 train-----" 9 | python RPN/inference.py $DATALISTS_DIR/jstl_10/train.txt $DATALISTS_DIR/jstl_10/train_p.txt jstl_10 10 | echo "-----jstl_10 val-----" 11 | python RPN/inference.py $DATALISTS_DIR/jstl_10/val.txt $DATALISTS_DIR/jstl_10/val_p.txt jstl_10 12 | 13 | for dataset in cuhk03 cuhk01 prid viper 3dpes ilids market1501 sensereid; do 14 | echo "-----${dataset} test_probe-----" 15 | python RPN/inference.py $DATALISTS_DIR/${dataset}/test_probe.txt $DATALISTS_DIR/${dataset}/test_probe_p.txt ${dataset} 16 | echo "-----${dataset} test_gallery-----" 17 | python RPN/inference.py $DATALISTS_DIR/${dataset}/test_gallery.txt $DATALISTS_DIR/${dataset}/test_gallery_p.txt ${dataset} 18 | done 19 | 20 | -------------------------------------------------------------------------------- /scripts/make_datalists.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | cd $(dirname ${BASH_SOURCE[0]})/../ 4 | 5 | EXP=external/exp 6 | 7 | for d in 3dpes cuhk01 cuhk02 cuhk03 ilids market1501 prid psdb shinpuhkan viper sensereid; do 8 | echo "Making $d datalists..." 9 | python tools/make_lists.py $EXP/datasets/$d $EXP/datalists/$d 10 | done 11 | 12 | -------------------------------------------------------------------------------- /scripts/merge_datalists.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | cd $(dirname ${BASH_SOURCE[0]})/../ 4 | 5 | EXP=external/exp 6 | DATALISTS=$EXP/datalists 7 | 8 | python tools/merge_lists.py \ 9 | $DATALISTS/jstl_10 \ 10 | --datalist-dirs $DATALISTS/3dpes $DATALISTS/cuhk01 $DATALISTS/cuhk02 \ 11 | $DATALISTS/cuhk03 $DATALISTS/ilids $DATALISTS/market1501 \ 12 | $DATALISTS/prid $DATALISTS/psdb $DATALISTS/shinpuhkan $DATALISTS/viper 13 | 14 | python tools/check_jstltrainlist.py $DATALISTS/jstl_10 15 | 16 | -------------------------------------------------------------------------------- /scripts/test.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # Change to the project root directory. Assume this file is at scripts/. 3 | cd $(dirname ${BASH_SOURCE[0]})/../ 4 | 5 | # Some constants 6 | CAFFE_DIR=external/caffe 7 | 8 | EXP_DIR=external/exp 9 | DATALISTS_DIR=${EXP_DIR}/datalists 10 | RESULTS_DIR=${EXP_DIR}/results 11 | SNAPSHOTS_DIR=${EXP_DIR}/snapshots 12 | 13 | MODELS_DIR=models 14 | 15 | extract_features() { 16 | local model_name=$1 17 | local dataset=$2 18 | local trained_model=$3 19 | local blob=fc7/spindle 20 | 21 | local result_dir=${RESULTS_DIR}/${model_name}/${dataset}_${blob} 22 | rm -rf ${result_dir} 23 | mkdir -p ${result_dir} 24 | 25 | # Extract test probe, and test gallery features. 26 | for subset in test_probe test_gallery; do 27 | echo "Extracting ${subset} set" 28 | local num_samples=$(wc -l ${DATALISTS_DIR}/${dataset}/${subset}.txt | awk '{print $1}') 29 | local num_iters=$(((num_samples + 99) / 100)) 30 | local tmp_model=${MODELS_DIR}/${model_name}/${model_name}_tmp.prototxt 31 | sed -e "s/\${dataset}/${dataset}/g; s/\${subset}/${subset}/g" \ 32 | ${MODELS_DIR}/${model_name}/${model_name}_test.prototxt > ${tmp_model} 33 | ${CAFFE_DIR}/build/tools/extract_features \ 34 | ${trained_model} ${tmp_model} ${blob},label \ 35 | ${result_dir}/${subset}_features_lmdb,${result_dir}/${subset}_labels_lmdb \ 36 | ${num_iters} lmdb GPU 0 37 | python tools/convert_lmdb_to_numpy.py \ 38 | ${result_dir}/${subset}_features_lmdb ${result_dir}/${subset}_features.npy \ 39 | --truncate ${num_samples} 40 | python tools/convert_lmdb_to_numpy.py \ 41 | ${result_dir}/${subset}_labels_lmdb ${result_dir}/${subset}_labels.npy \ 42 | --truncate ${num_samples} 43 | rm ${tmp_model} 44 | done 45 | } 46 | 47 | model_name=spindlenet 48 | 49 | trained_model=${SNAPSHOTS_DIR}/${model_name}/${model_name}_iter_50000.caffemodel 50 | 51 | # Extract features on all datasets 52 | for dataset in cuhk03 cuhk01 prid viper 3dpes ilids market1501 sensereid; do 53 | extract_features ${model_name} ${dataset} ${trained_model} 54 | done 55 | 56 | # Evaluate performance 57 | for dataset in cuhk03 cuhk01 prid viper 3dpes ilids market1501 sensereid; do 58 | echo ${dataset} #> ${result_dir}/result.log 59 | blob=fc7/spindle 60 | result_dir=${RESULTS_DIR}/${model_name}/${dataset}_${blob} 61 | python tools/evaluation.py ${result_dir} #> ${result_dir}/result.log 62 | done 63 | 64 | -------------------------------------------------------------------------------- /scripts/train_base.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # Change to the project root directory. Assume this file is at scripts/. 3 | cd $(dirname ${BASH_SOURCE[0]})/../ 4 | 5 | # Some constants 6 | CAFFE_DIR=external/caffe 7 | 8 | EXP_DIR=external/exp 9 | SNAPSHOTS_DIR=${EXP_DIR}/snapshots 10 | MODELS_DIR=models 11 | LOGS_DIR=logs 12 | 13 | 14 | model_name=base 15 | 16 | mkdir -p ${LOGS_DIR}/${model_name} 17 | mkdir -p ${SNAPSHOTS_DIR}/${model_name} 18 | 19 | solver=${MODELS_DIR}/${model_name}/${model_name}_solver.prototxt 20 | log=${LOGS_DIR}/${model_name}/ 21 | 22 | GLOG_log_dir=${log} ${CAFFE_DIR}/build/tools/caffe train --solver=${solver} --gpu=0 23 | 24 | -------------------------------------------------------------------------------- /scripts/train_body.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # Change to the project root directory. Assume this file is at scripts/. 3 | cd $(dirname ${BASH_SOURCE[0]})/../ 4 | 5 | # Some constants 6 | CAFFE_DIR=external/caffe 7 | 8 | EXP_DIR=external/exp 9 | SNAPSHOTS_DIR=${EXP_DIR}/snapshots 10 | MODELS_DIR=models 11 | LOGS_DIR=logs 12 | 13 | 14 | model_name=body 15 | 16 | mkdir -p ${LOGS_DIR}/${model_name} 17 | mkdir -p ${SNAPSHOTS_DIR}/${model_name} 18 | 19 | solver=${MODELS_DIR}/${model_name}/${model_name}_solver.prototxt 20 | log=${LOGS_DIR}/${model_name}/ 21 | 22 | pretrained_model=${SNAPSHOTS_DIR}/base/base_iter_70000.caffemodel 23 | 24 | GLOG_log_dir=${log} ${CAFFE_DIR}/build/tools/caffe train --solver=${solver} --gpu=2 \ 25 | --weights=${pretrained_model} 26 | 27 | -------------------------------------------------------------------------------- /scripts/train_head.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # Change to the project root directory. Assume this file is at scripts/. 3 | cd $(dirname ${BASH_SOURCE[0]})/../ 4 | 5 | # Some constants 6 | CAFFE_DIR=external/caffe 7 | 8 | EXP_DIR=external/exp 9 | SNAPSHOTS_DIR=${EXP_DIR}/snapshots 10 | MODELS_DIR=models 11 | LOGS_DIR=logs 12 | 13 | 14 | model_name=head 15 | 16 | mkdir -p ${LOGS_DIR}/${model_name} 17 | mkdir -p ${SNAPSHOTS_DIR}/${model_name} 18 | 19 | solver=${MODELS_DIR}/${model_name}/${model_name}_solver.prototxt 20 | log=${LOGS_DIR}/${model_name}/ 21 | 22 | pretrained_model=${SNAPSHOTS_DIR}/base/base_iter_70000.caffemodel 23 | 24 | GLOG_log_dir=${log} ${CAFFE_DIR}/build/tools/caffe train --solver=${solver} --gpu=1 \ 25 | --weights=${pretrained_model} 26 | 27 | -------------------------------------------------------------------------------- /scripts/train_larm.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # Change to the project root directory. Assume this file is at scripts/. 3 | cd $(dirname ${BASH_SOURCE[0]})/../ 4 | 5 | # Some constants 6 | CAFFE_DIR=external/caffe 7 | 8 | EXP_DIR=external/exp 9 | SNAPSHOTS_DIR=${EXP_DIR}/snapshots 10 | MODELS_DIR=models 11 | LOGS_DIR=logs 12 | 13 | 14 | model_name=larm 15 | 16 | mkdir -p ${LOGS_DIR}/${model_name} 17 | mkdir -p ${SNAPSHOTS_DIR}/${model_name} 18 | 19 | solver=${MODELS_DIR}/${model_name}/${model_name}_solver.prototxt 20 | log=${LOGS_DIR}/${model_name}/ 21 | 22 | pretrained_model=${SNAPSHOTS_DIR}/base/base_iter_70000.caffemodel 23 | 24 | GLOG_log_dir=${log} ${CAFFE_DIR}/build/tools/caffe train --solver=${solver} --gpu=1 \ 25 | --weights=${pretrained_model} 26 | 27 | -------------------------------------------------------------------------------- /scripts/train_leg.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # Change to the project root directory. Assume this file is at scripts/. 3 | cd $(dirname ${BASH_SOURCE[0]})/../ 4 | 5 | # Some constants 6 | CAFFE_DIR=external/caffe 7 | 8 | EXP_DIR=external/exp 9 | SNAPSHOTS_DIR=${EXP_DIR}/snapshots 10 | MODELS_DIR=models 11 | LOGS_DIR=logs 12 | 13 | 14 | model_name=leg 15 | 16 | mkdir -p ${LOGS_DIR}/${model_name} 17 | mkdir -p ${SNAPSHOTS_DIR}/${model_name} 18 | 19 | solver=${MODELS_DIR}/${model_name}/${model_name}_solver.prototxt 20 | log=${LOGS_DIR}/${model_name}/ 21 | 22 | pretrained_model=${SNAPSHOTS_DIR}/base/base_iter_70000.caffemodel 23 | 24 | GLOG_log_dir=${log} ${CAFFE_DIR}/build/tools/caffe train --solver=${solver} --gpu=3 \ 25 | --weights=${pretrained_model} 26 | 27 | -------------------------------------------------------------------------------- /scripts/train_lleg.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # Change to the project root directory. Assume this file is at scripts/. 3 | cd $(dirname ${BASH_SOURCE[0]})/../ 4 | 5 | # Some constants 6 | CAFFE_DIR=external/caffe 7 | 8 | EXP_DIR=external/exp 9 | SNAPSHOTS_DIR=${EXP_DIR}/snapshots 10 | MODELS_DIR=models 11 | LOGS_DIR=logs 12 | 13 | 14 | model_name=lleg 15 | 16 | mkdir -p ${LOGS_DIR}/${model_name} 17 | mkdir -p ${SNAPSHOTS_DIR}/${model_name} 18 | 19 | solver=${MODELS_DIR}/${model_name}/${model_name}_solver.prototxt 20 | log=${LOGS_DIR}/${model_name}/ 21 | 22 | pretrained_model=${SNAPSHOTS_DIR}/base/base_iter_70000.caffemodel 23 | 24 | GLOG_log_dir=${log} ${CAFFE_DIR}/build/tools/caffe train --solver=${solver} --gpu=3 \ 25 | --weights=${pretrained_model} 26 | 27 | -------------------------------------------------------------------------------- /scripts/train_rarm.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # Change to the project root directory. Assume this file is at scripts/. 3 | cd $(dirname ${BASH_SOURCE[0]})/../ 4 | 5 | # Some constants 6 | CAFFE_DIR=external/caffe 7 | 8 | EXP_DIR=external/exp 9 | SNAPSHOTS_DIR=${EXP_DIR}/snapshots 10 | MODELS_DIR=models 11 | LOGS_DIR=logs 12 | 13 | 14 | model_name=rarm 15 | 16 | mkdir -p ${LOGS_DIR}/${model_name} 17 | mkdir -p ${SNAPSHOTS_DIR}/${model_name} 18 | 19 | solver=${MODELS_DIR}/${model_name}/${model_name}_solver.prototxt 20 | log=${LOGS_DIR}/${model_name}/ 21 | 22 | pretrained_model=${SNAPSHOTS_DIR}/base/base_iter_70000.caffemodel 23 | 24 | GLOG_log_dir=${log} ${CAFFE_DIR}/build/tools/caffe train --solver=${solver} --gpu=0 \ 25 | --weights=${pretrained_model} 26 | 27 | -------------------------------------------------------------------------------- /scripts/train_rleg.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # Change to the project root directory. Assume this file is at scripts/. 3 | cd $(dirname ${BASH_SOURCE[0]})/../ 4 | 5 | # Some constants 6 | CAFFE_DIR=external/caffe 7 | 8 | EXP_DIR=external/exp 9 | SNAPSHOTS_DIR=${EXP_DIR}/snapshots 10 | MODELS_DIR=models 11 | LOGS_DIR=logs 12 | 13 | 14 | model_name=rleg 15 | 16 | mkdir -p ${LOGS_DIR}/${model_name} 17 | mkdir -p ${SNAPSHOTS_DIR}/${model_name} 18 | 19 | solver=${MODELS_DIR}/${model_name}/${model_name}_solver.prototxt 20 | log=${LOGS_DIR}/${model_name}/ 21 | 22 | pretrained_model=${SNAPSHOTS_DIR}/base/base_iter_70000.caffemodel 23 | 24 | GLOG_log_dir=${log} ${CAFFE_DIR}/build/tools/caffe train --solver=${solver} --gpu=2 \ 25 | --weights=${pretrained_model} 26 | 27 | -------------------------------------------------------------------------------- /scripts/train_spindlenet.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # Change to the project root directory. Assume this file is at scripts/. 3 | cd $(dirname ${BASH_SOURCE[0]})/../ 4 | 5 | # Some constants 6 | CAFFE_DIR=external/caffe 7 | 8 | EXP_DIR=external/exp 9 | SNAPSHOTS_DIR=${EXP_DIR}/snapshots 10 | MODELS_DIR=models 11 | LOGS_DIR=logs 12 | 13 | 14 | model_name=spindlenet 15 | 16 | mkdir -p ${LOGS_DIR}/${model_name} 17 | mkdir -p ${SNAPSHOTS_DIR}/${model_name} 18 | 19 | solver=${MODELS_DIR}/${model_name}/${model_name}_solver.prototxt 20 | log=${LOGS_DIR}/${model_name}/ 21 | 22 | pretrained_model1=${SNAPSHOTS_DIR}/head/head_iter_70000.caffemodel 23 | pretrained_model2=${SNAPSHOTS_DIR}/body/body_iter_70000.caffemodel 24 | pretrained_model3=${SNAPSHOTS_DIR}/leg/leg_iter_70000.caffemodel 25 | pretrained_model4=${SNAPSHOTS_DIR}/rarm/rarm_iter_70000.caffemodel 26 | pretrained_model5=${SNAPSHOTS_DIR}/larm/larm_iter_70000.caffemodel 27 | pretrained_model6=${SNAPSHOTS_DIR}/rleg/rleg_iter_70000.caffemodel 28 | pretrained_model7=${SNAPSHOTS_DIR}/lleg/lleg_iter_70000.caffemodel 29 | pretrained_model8=${SNAPSHOTS_DIR}/base/base_iter_70000.caffemodel 30 | 31 | GLOG_log_dir=${log} ${CAFFE_DIR}/build/tools/caffe train --solver=${solver} --gpu=0 \ 32 | --weights=${pretrained_model1},${pretrained_model2},${pretrained_model3},${pretrained_model4},${pretrained_model5},${pretrained_model6},${pretrained_model7},${pretrained_model8} 33 | 34 | -------------------------------------------------------------------------------- /tools/check_jstltrainlist.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from argparse import ArgumentParser 3 | from collections import defaultdict 4 | 5 | from utils import * 6 | 7 | 8 | def main(args): 9 | files, labels = read_kv(osp.join(args.jstl_dir, 'train.txt')) 10 | 11 | with open(osp.join(args.jstl_dir, 'sum_id.txt'), 'r') as f: 12 | sum_id = int(f.readline()) 13 | 14 | dic = np.asarray([False for i in xrange(sum_id)]) 15 | pdict = defaultdict(str) 16 | for i, label in enumerate(labels): 17 | dic[int(label) - 1] = True 18 | pid = files[i].split('/')[-1].split('_')[0] 19 | assert pdict[label] == pid or pdict[label] == '' 20 | pdict[label] = pid 21 | 22 | if dic.all() == True: 23 | print "The train.txt is good." 24 | else: 25 | print "The train.txt is bad." 26 | 27 | 28 | if __name__ == '__main__': 29 | parser = ArgumentParser( 30 | description="Check jstl_x/trainlist") 31 | parser.add_argument( 32 | 'jstl_dir', 33 | help="Root directory of the jstl containing train.txt and sum_id.txt") 34 | args = parser.parse_args() 35 | main(args) 36 | 37 | -------------------------------------------------------------------------------- /tools/convert_lmdb_to_numpy.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import lmdb 3 | import numpy as np 4 | from argparse import ArgumentParser 5 | 6 | from utils import * 7 | 8 | if 'external/caffe/python' not in sys.path: 9 | sys.path.insert(0, 'external/caffe/python') 10 | from caffe.proto.caffe_pb2 import Datum 11 | 12 | 13 | def main(args): 14 | datum = Datum() 15 | data = [] 16 | env = lmdb.open(args.input_lmdb) 17 | with env.begin() as txn: 18 | cursor = txn.cursor() 19 | for i, (key, value) in enumerate(cursor): 20 | if i >= args.truncate: 21 | break 22 | datum.ParseFromString(value) 23 | data.append(datum.float_data) 24 | data = np.squeeze(np.asarray(data)) 25 | np.save(args.output_npy, data) 26 | 27 | 28 | if __name__ == '__main__': 29 | parser = ArgumentParser() 30 | parser.add_argument('input_lmdb') 31 | parser.add_argument('output_npy') 32 | parser.add_argument( 33 | '--truncate', type=int, default=np.inf, 34 | help="Stop converting the items from the database after this. " 35 | "All the items will be converted if not specified.") 36 | args = parser.parse_args() 37 | main(args) 38 | 39 | -------------------------------------------------------------------------------- /tools/evaluation.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from argparse import ArgumentParser 3 | from sklearn.metrics.pairwise import pairwise_distances 4 | 5 | from utils import * 6 | 7 | 8 | def _cmc_core(D, G, P, k): 9 | order = np.argsort(D, axis=0) 10 | res = np.zeros((k, D.shape[1])) 11 | for i in xrange(k): 12 | for j in xrange(D.shape[1]): 13 | if G[order[i][j]] == P[j]: 14 | res[i][j] += 1 15 | return (res.sum(axis=1) * 1.0 / D.shape[1]).cumsum() 16 | 17 | 18 | def _get_test_data(result_dir): 19 | PF = np.load(osp.join(result_dir, 'test_probe_features.npy')) 20 | PL = np.load(osp.join(result_dir, 'test_probe_labels.npy')) 21 | GF = np.load(osp.join(result_dir, 'test_gallery_features.npy')) 22 | GL = np.load(osp.join(result_dir, 'test_gallery_labels.npy')) 23 | # Reassign the labels to make them sequentially numbered from zero 24 | unique_labels = np.unique(np.r_[PL, GL]) 25 | labels_map = {l: i for i, l in enumerate(unique_labels)} 26 | PL = np.asarray([labels_map[l] for l in PL]) 27 | GL = np.asarray([labels_map[l] for l in GL]) 28 | return PF, PL, GF, GL 29 | 30 | 31 | def main(args): 32 | PF, PL, GF, GL = _get_test_data(args.result_dir) 33 | D = pairwise_distances(GF, PF, metric=args.method, n_jobs=-2) 34 | 35 | gallery_labels_set = np.unique(GL) 36 | 37 | for label in PL: 38 | if label not in gallery_labels_set: 39 | print 'Probe-id is out of Gallery-id sets.' 40 | 41 | Times = 100 42 | k = 20 43 | 44 | res = np.zeros(k) 45 | 46 | gallery_labels_map = [[] for i in xrange(gallery_labels_set.size)] 47 | for i, g in enumerate(GL): 48 | gallery_labels_map[g].append(i) 49 | 50 | for __ in xrange(Times): 51 | # Randomly select one gallery sample per label selected 52 | newD = np.zeros((gallery_labels_set.size, PL.size)) 53 | for i, g in enumerate(gallery_labels_set): 54 | j = np.random.choice(gallery_labels_map[g]) 55 | newD[i, :] = D[j, :] 56 | # Compute CMC 57 | res += _cmc_core(newD, gallery_labels_set, PL, k) 58 | res /= Times 59 | 60 | for topk in [1, 5, 10, 20]: 61 | print "{:8}{:8.1%}".format('top-' + str(topk), res[topk - 1]) 62 | 63 | 64 | if __name__ == '__main__': 65 | parser = ArgumentParser( 66 | description="Evaluate performance") 67 | parser.add_argument( 68 | 'result_dir', 69 | help="Result directory. Containing extracted features and labels. ") 70 | parser.add_argument( 71 | '--method', 72 | choices=['euclidean', 'cosine'], 73 | default='cosine') 74 | args = parser.parse_args() 75 | main(args) 76 | 77 | -------------------------------------------------------------------------------- /tools/make_lists.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from argparse import ArgumentParser 3 | import os 4 | import os.path as osp 5 | 6 | from utils import * 7 | 8 | 9 | def _get_list(images): 10 | ret = [] 11 | for img in images: 12 | label = int(osp.basename(img)[:5]) 13 | ret.append((img, label)) 14 | return np.asarray(ret) 15 | 16 | 17 | def _save(file_label_list, file_path): 18 | content = ['{} {}'.format(x, y) for x, y in file_label_list] 19 | write_list(content, file_path) 20 | 21 | 22 | def main(args): 23 | test_probe, test_gallery = [], [] 24 | mkdir_if_missing(args.output_dir) 25 | if args.dataset_dir.split('/')[-1] != "sensereid": 26 | meta = read_json(osp.join(args.dataset_dir, 'meta.json')) 27 | split = read_json(osp.join(args.dataset_dir, 'split.json')) 28 | identities = np.asarray(meta['identities']) 29 | # Make train / val. 30 | # To ensure each identity has at least one training image, 31 | # we first randomly choose one image per id in train set. 32 | trainval = identities[split['trainval']] 33 | train = [] 34 | val = [] 35 | res = [] 36 | for person in trainval: 37 | vec = [] 38 | for views in person: 39 | for img in views: 40 | vec.append(img) 41 | np.random.shuffle(vec) 42 | train.append(osp.join(args.dataset_dir, vec[0])) 43 | for img in vec[1:]: 44 | res.append(img) 45 | num_val = int((len(train) + len(res)) * args.val_ratio) 46 | np.random.shuffle(res) 47 | for img in res[:num_val]: 48 | val.append(osp.join(args.dataset_dir, img)) 49 | for img in res[num_val:]: 50 | train.append(osp.join(args.dataset_dir, img)) 51 | train = _get_list(train) 52 | val = _get_list(val) 53 | # Make test probe / gallery. Probe identities should be a subset of 54 | # gallery's. First half views are probe, others are gallery. 55 | assert len(set(split['test_probe']) - set(split['test_gallery'])) == 0 56 | for person in identities[split['test_probe']]: 57 | for views in person[:len(person) // 2]: 58 | for img in views: 59 | test_probe.append(osp.join(args.dataset_dir, img)) 60 | for views in person[len(person) // 2:]: 61 | for img in views: 62 | test_gallery.append(osp.join(args.dataset_dir, img)) 63 | only_in_gallery = list(set(split['test_gallery']) - set(split['test_probe'])) 64 | for person in identities[only_in_gallery]: 65 | for views in person: 66 | for img in views: 67 | test_gallery.append(osp.join(args.dataset_dir, img)) 68 | 69 | if args.dataset_dir.split('/')[-1] == "market1501": 70 | market_dataset_dir = "external/raw_data/Market-1501-v15.09.15" 71 | for root, dirs, files in os.walk(osp.join(market_dataset_dir, 'bounding_box_test')): 72 | for image in files: 73 | if image.split('.')[-1] == 'jpg': 74 | if image[0] == '-': 75 | continue 76 | 77 | name = osp.join(market_dataset_dir, 'bounding_box_test', image) 78 | label = int(image[0:4]) 79 | test_gallery.append((name, label)) 80 | 81 | for root, dirs, files in os.walk(osp.join(market_dataset_dir, 'query')): 82 | for image in files: 83 | if image.split('.')[-1] == 'jpg': 84 | if image[0] == '-': 85 | continue 86 | 87 | name = osp.join(market_dataset_dir, 'query', image) 88 | label = int(image[0:4]) 89 | test_probe.append((name, label)) 90 | test_probe = np.asarray(test_probe) 91 | test_gallery = np.asarray(test_gallery) 92 | else: 93 | test_probe = _get_list(test_probe) 94 | test_gallery = _get_list(test_gallery) 95 | 96 | _save(train, osp.join(args.output_dir, 'train.txt')) 97 | _save(val, osp.join(args.output_dir, 'val.txt')) 98 | else: 99 | sensereid_dataset_dir = "external/raw_data/SenseReID" 100 | for root, dirs, files in os.walk(osp.join(sensereid_dataset_dir, 'test_gallery')): 101 | for image in files: 102 | if image.split('.')[-1] == 'jpg': 103 | name = osp.join(sensereid_dataset_dir, 'test_gallery', image) 104 | label = int(image[0:5]) 105 | test_gallery.append((name, label)) 106 | 107 | for root, dirs, files in os.walk(osp.join(sensereid_dataset_dir, 'test_probe')): 108 | for image in files: 109 | if image.split('.')[-1] == 'jpg': 110 | name = osp.join(sensereid_dataset_dir, 'test_probe', image) 111 | label = int(image[0:5]) 112 | test_probe.append((name, label)) 113 | 114 | test_probe = np.asarray(test_probe) 115 | test_gallery = np.asarray(test_gallery) 116 | 117 | if test_gallery.shape[0] != 0: 118 | _save(test_probe, osp.join(args.output_dir, 'test_probe.txt')) 119 | _save(test_gallery, osp.join(args.output_dir, 'test_gallery.txt')) 120 | 121 | 122 | if __name__ == '__main__': 123 | parser = ArgumentParser( 124 | description="Create lists of image file and label") 125 | parser.add_argument( 126 | 'dataset_dir', 127 | help="Directory of a formatted dataset") 128 | parser.add_argument( 129 | 'output_dir', 130 | help="Output directory for the lists") 131 | parser.add_argument( 132 | '--val-ratio', type=float, default=0.2, 133 | help="Ratio between validation and trainval data. Default 0.2.") 134 | args = parser.parse_args() 135 | main(args) 136 | 137 | -------------------------------------------------------------------------------- /tools/merge_lists.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from argparse import ArgumentParser 3 | 4 | from utils import * 5 | 6 | 7 | def main(args): 8 | id_offset = 0 9 | merged_train_kv = {} 10 | merged_val_kv = {} 11 | for datalist_dir in args.datalist_dirs: 12 | train_files, train_labels = read_kv(osp.join(datalist_dir, 'train.txt')) 13 | val_files, val_labels = read_kv(osp.join(datalist_dir, 'val.txt')) 14 | unique_ids = set(map(int, train_labels + val_labels)) 15 | id_mapping = {idx: i + id_offset for i, idx in enumerate(unique_ids)} 16 | for k, v in zip(train_files, train_labels): 17 | merged_train_kv[k] = id_mapping[int(v)] 18 | for k, v in zip(val_files, val_labels): 19 | merged_val_kv[k] = id_mapping[int(v)] 20 | id_offset += len(id_mapping) 21 | mkdir_if_missing(osp.join(args.output_dir)) 22 | write_kv(merged_train_kv.keys(), map(str, merged_train_kv.values()), 23 | osp.join(args.output_dir, 'train.txt')) 24 | write_kv(merged_val_kv.keys(), map(str, merged_val_kv.values()), 25 | osp.join(args.output_dir, 'val.txt')) 26 | print "Max ID:", id_offset 27 | with open(osp.join(args.output_dir, 'sum_id.txt'), 'w') as f: 28 | f.write(str(id_offset) + '\n') 29 | 30 | 31 | if __name__ == '__main__': 32 | parser = ArgumentParser( 33 | description="Merge multiple lists of train / val image file and " 34 | "label into a single-task one") 35 | parser.add_argument( 36 | '--datalist-dirs', type=str, nargs='+', 37 | help="Datalist directories containing train.txt and val.txt.") 38 | parser.add_argument( 39 | 'output_dir', 40 | help="Output directories for the lists") 41 | args = parser.parse_args() 42 | assert args.datalist_dirs != None 43 | main(args) 44 | 45 | -------------------------------------------------------------------------------- /utils/__init__.py: -------------------------------------------------------------------------------- 1 | from .core import * 2 | -------------------------------------------------------------------------------- /utils/core.py: -------------------------------------------------------------------------------- 1 | import os 2 | import os.path as osp 3 | import json 4 | import codecs 5 | 6 | 7 | def mkdir_if_missing(d): 8 | if not osp.isdir(d): 9 | os.makedirs(d) 10 | 11 | 12 | def read_list(file_path, coding=None): 13 | if coding is None: 14 | with open(file_path, 'r') as f: 15 | arr = [line.strip() for line in f.readlines()] 16 | else: 17 | with codecs.open(file_path, 'r', coding) as f: 18 | arr = [line.strip() for line in f.readlines()] 19 | return arr 20 | 21 | 22 | def write_list(arr, file_path, coding=None): 23 | arr = ['{}'.format(item) for item in arr] 24 | if coding is None: 25 | with open(file_path, 'w') as f: 26 | for item in arr: 27 | f.write(item + '\n') 28 | else: 29 | with codecs.open(file_path, 'w', coding) as f: 30 | for item in arr: 31 | f.write(item + u'\n') 32 | 33 | 34 | def read_kv(file_path, coding=None): 35 | arr = read_list(file_path, coding) 36 | if len(arr) == 0: 37 | return [], [] 38 | return zip(*map(str.split, arr)) 39 | 40 | 41 | def write_kv(k, v, file_path, coding=None): 42 | arr = zip(k, v) 43 | arr = [' '.join(item) for item in arr] 44 | write_list(arr, file_path, coding) 45 | 46 | 47 | def read_json(file_path): 48 | with open(file_path, 'r') as f: 49 | obj = json.load(f) 50 | return obj 51 | 52 | 53 | def write_json(obj, file_path): 54 | with open(file_path, 'w') as f: 55 | json.dump(obj, f, indent=2, separators=(',', ': ')) 56 | 57 | --------------------------------------------------------------------------------