├── .gitignore
├── README.md
├── afw_gtmiss.mat
├── predict
├── model
│ ├── ResNet_3b_s16_f2r
│ │ ├── solver_test.prototxt
│ │ └── test.prototxt
│ ├── ResNet_3b_s16_fm2fm_pool2_deep
│ │ ├── solver_test.prototxt
│ │ └── test.prototxt
│ └── res_pool2
│ │ ├── solver_test.prototxt
│ │ └── test.prototxt
├── output
│ ├── ResNet_3b_s16
│ │ ├── param.mat
│ │ └── tot_wometa_1epoch
│ └── ResNet_3b_s16_fm2fm_pool2_deep
│ │ └── 65w
├── result1.jpg
├── result2.jpg
├── result3.jpg
├── result4.jpg
├── scale_fddb.mat
├── script_baseline_rpn.m
├── script_featmap_2_result.m
├── script_featmap_transfer.m
├── script_gen_featmap.m
├── script_start.m
├── testimg1.jpg
├── testimg2.jpg
├── testimg3.jpg
├── testimg4.jpg
└── utils
│ ├── configure.m
│ ├── convert_caffe2img.m
│ ├── convert_caffe2mat.m
│ ├── convert_ignore.m
│ ├── convert_img2caffe.m
│ ├── convert_input_to_struct.m
│ ├── convert_mat2caffe.m
│ ├── convert_output_to_struct.m
│ ├── convert_pts.m
│ ├── convert_rect.m
│ ├── cpu_cores.m
│ ├── detect_all.m
│ ├── detect_all_by_featmap.m
│ ├── detect_img.m
│ ├── detect_large_img.m
│ ├── draw_missed.m
│ ├── dump_error.m
│ ├── fetch_output.m
│ ├── gen_featmap.m
│ ├── gen_trans_featmap.m
│ ├── get_net_attr.m
│ ├── get_overlap_1toN.m
│ ├── get_overlap_MtoN.m
│ ├── get_rect_from_pts.m
│ ├── get_subset_of_struct.m
│ ├── new_parpool.m
│ ├── nms.m
│ ├── nms_gpu_mex.cu
│ ├── nms_mex.cpp
│ ├── nms_mex.mexw64
│ ├── nms_multiclass.m
│ ├── nms_parsed.m
│ ├── nms_pts.m
│ ├── nvmex.m
│ ├── plot_detected.m
│ ├── plot_parsed.m
│ ├── plot_result.m
│ ├── prepare_data_for_input.m
│ ├── prepare_data_for_test.m
│ ├── randomid.m
│ ├── scale_img_to_target.m
│ ├── script_analyze_dataset.m
│ ├── script_diagnosis.m
│ └── script_from_parsed_to_pts.m
└── result.jpg
/.gitignore:
--------------------------------------------------------------------------------
1 | train/
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Recurrent Scale Approximation (RSA) for Object Detection
2 |
3 | 
4 |
5 | Codebase for *Recurrent Scale Approximation for Object Detection in CNN* published at **ICCV 2017**, [[arXiv]](https://arxiv.org/abs/1707.09531). Here we offer the training and test code for two modules in the paper, `scale-forecast network` and `recurrent scale approximation (RSA)`. Models for face detection trained on some open datasets are also provided.
6 |
7 | **Note: This project is still underway. Please stay tuned for more features soon!**
8 |
9 | ## Codebase at a Glance
10 |
11 | `train/`: Training code for modules `scale-forecast network` and `RSA`
12 |
13 | `predict/`: Test code for the whole detection pipeline
14 |
15 | `afw_gtmiss.mat`: Revised face data annotation mentioned in Section 4.1 in the paper.
16 |
17 |
18 | ## Grab and Go (Demo)
19 |
20 | **Caffe models** for face detection trained on popular datasets.
21 |
22 | + Base RPN model: `predict/output/ResNet_3b_s16/tot_wometa_1epoch`, trained on `Widerface` (fg/bg), `COCO` (bg only) and `ImageNet Det` (bg only)
23 | + RSA model: `predict/output/ResNet_3b_s16_fm2fm_pool2_deep/65w`, trained on `Widerface`, `COCO`, and `ImageNet Det`
24 |
25 | Steps to run the **test** code:
26 |
27 | 1. Compile [CaffeMex_v2](https://github.com/sciencefans/CaffeMex_v2/) with matlab interface
28 |
29 | 2. Add `CaffeMex_v2/matlab/` to matlab search path
30 |
31 | 3. See tips in `predict/script_start.m` and run it!
32 |
33 | 4. After processing for a few minutes, the detection and alignment results will be shown in an image window. Please click the image window to view all results. If you set line 8 in `script_start.m` to `false` as default, you should observe some results as above.
34 |
35 | ## Train Your Own Model
36 |
37 | Still in progress, this part will be released later.
38 |
39 |
40 | ## FAQ
41 | We will list the common issues of this project as time goes. Stay tuned! :)
42 |
43 |
44 | ## Citation
45 | Please kindly cite our work if it helps your research:
46 |
47 | @inproceedings{liu_2017_rsa,
48 | Author = {Yu Liu and Hongyang Li and Junjie Yan and Fangyin Wei and Xiaogang Wang and Xiaoou Tang},
49 | Title = {Recurrent Scale Approximation for Object Detection in CNN},
50 | Journal = {IEEE International Conference on Computer Vision},
51 | Year = {2017}
52 | }
53 |
54 | ## Acknowledgment
55 | We appreciate the contribution of the following researchers:
56 |
57 | [Dong Chen](https://www.microsoft.com/en-us/research/people/doch/) @Microsoft Research, some basic ideas are inspired by him when Yu Liu worked as an intern at MSR.
58 |
59 | Jiongchao Jin @Beihang University, some baseline results are provided by him.
60 |
--------------------------------------------------------------------------------
/afw_gtmiss.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liuyuisanai/RSA-for-object-detection/626ad81172b260ecf8257a80731e5236fe41cb63/afw_gtmiss.mat
--------------------------------------------------------------------------------
/predict/model/ResNet_3b_s16_f2r/solver_test.prototxt:
--------------------------------------------------------------------------------
1 | net: "model/ResNet_3b_s16_f2r/test.prototxt"
2 | base_lr: 0.0009
3 | lr_policy: "step"
4 | gamma: 0.9
5 | stepsize: 20000
6 | display: 20
7 | max_iter: 1000000
8 | momentum: 0.9
9 | weight_decay: 0.0001
10 | # We disable standard caffe solver snapshotting and implement our own snapshot
11 | snapshot: 0
12 | #debug_info: true
13 |
14 |
--------------------------------------------------------------------------------
/predict/model/ResNet_3b_s16_f2r/test.prototxt:
--------------------------------------------------------------------------------
1 | name: "ResNet_3b_s16"
2 |
3 | input: "res2b"
4 | input_dim: 1
5 | input_dim: 256
6 | input_dim: 16
7 | input_dim: 16
8 | mem_param {
9 | optimize_test: true
10 | }
11 |
12 | layer {
13 | bottom: "res2b"
14 | top: "res2b"
15 | name: "res2b_relu"
16 | type: "ReLU"
17 | }
18 |
19 | layer {
20 | bottom: "res2b"
21 | top: "res2c_branch2a"
22 | name: "res2c_branch2a"
23 | type: "Convolution"
24 | convolution_param {
25 | num_output: 64
26 | kernel_size: 1
27 | pad: 0
28 | stride: 1
29 | bias_term: false
30 | }
31 | param {
32 | lr_mult: 1.0
33 | }
34 | }
35 |
36 | layer {
37 | bottom: "res2c_branch2a"
38 | top: "res2c_branch2a"
39 | name: "bn2c_branch2a"
40 | type: "BatchNorm"
41 | batch_norm_param {
42 | use_global_stats: true
43 | }
44 | param {
45 | lr_mult: 0.0
46 | decay_mult: 0.0
47 | }
48 | param {
49 | lr_mult: 0.0
50 | decay_mult: 0.0
51 | }
52 | param {
53 | lr_mult: 0.0
54 | decay_mult: 0.0
55 | }
56 | }
57 |
58 | layer {
59 | bottom: "res2c_branch2a"
60 | top: "res2c_branch2a"
61 | name: "scale2c_branch2a"
62 | type: "Scale"
63 | scale_param {
64 | bias_term: true
65 | }
66 | param {
67 | lr_mult: 0.0
68 | decay_mult: 0.0
69 | }
70 | param {
71 | lr_mult: 0.0
72 | decay_mult: 0.0
73 | }
74 | }
75 |
76 | layer {
77 | top: "res2c_branch2a"
78 | bottom: "res2c_branch2a"
79 | name: "res2c_branch2a_relu"
80 | type: "ReLU"
81 | }
82 |
83 | layer {
84 | bottom: "res2c_branch2a"
85 | top: "res2c_branch2b"
86 | name: "res2c_branch2b"
87 | type: "Convolution"
88 | convolution_param {
89 | num_output: 64
90 | kernel_size: 3
91 | pad: 1
92 | stride: 1
93 | bias_term: false
94 | }
95 | param {
96 | lr_mult: 1.0
97 | }
98 | }
99 |
100 | layer {
101 | bottom: "res2c_branch2b"
102 | top: "res2c_branch2b"
103 | name: "bn2c_branch2b"
104 | type: "BatchNorm"
105 | batch_norm_param {
106 | use_global_stats: true
107 | }
108 | param {
109 | lr_mult: 0.0
110 | decay_mult: 0.0
111 | }
112 | param {
113 | lr_mult: 0.0
114 | decay_mult: 0.0
115 | }
116 | param {
117 | lr_mult: 0.0
118 | decay_mult: 0.0
119 | }
120 | }
121 |
122 | layer {
123 | bottom: "res2c_branch2b"
124 | top: "res2c_branch2b"
125 | name: "scale2c_branch2b"
126 | type: "Scale"
127 | scale_param {
128 | bias_term: true
129 | }
130 | param {
131 | lr_mult: 0.0
132 | decay_mult: 0.0
133 | }
134 | param {
135 | lr_mult: 0.0
136 | decay_mult: 0.0
137 | }
138 | }
139 |
140 | layer {
141 | top: "res2c_branch2b"
142 | bottom: "res2c_branch2b"
143 | name: "res2c_branch2b_relu"
144 | type: "ReLU"
145 | }
146 |
147 | layer {
148 | bottom: "res2c_branch2b"
149 | top: "res2c_branch2c"
150 | name: "res2c_branch2c"
151 | type: "Convolution"
152 | convolution_param {
153 | num_output: 256
154 | kernel_size: 1
155 | pad: 0
156 | stride: 1
157 | bias_term: false
158 | }
159 | param {
160 | lr_mult: 1.0
161 | }
162 | }
163 |
164 | layer {
165 | bottom: "res2c_branch2c"
166 | top: "res2c_branch2c"
167 | name: "bn2c_branch2c"
168 | type: "BatchNorm"
169 | batch_norm_param {
170 | use_global_stats: true
171 | }
172 | param {
173 | lr_mult: 0.0
174 | decay_mult: 0.0
175 | }
176 | param {
177 | lr_mult: 0.0
178 | decay_mult: 0.0
179 | }
180 | param {
181 | lr_mult: 0.0
182 | decay_mult: 0.0
183 | }
184 | }
185 |
186 | layer {
187 | bottom: "res2c_branch2c"
188 | top: "res2c_branch2c"
189 | name: "scale2c_branch2c"
190 | type: "Scale"
191 | scale_param {
192 | bias_term: true
193 | }
194 | param {
195 | lr_mult: 0.0
196 | decay_mult: 0.0
197 | }
198 | param {
199 | lr_mult: 0.0
200 | decay_mult: 0.0
201 | }
202 | }
203 |
204 | layer {
205 | bottom: "res2b"
206 | bottom: "res2c_branch2c"
207 | top: "res2c"
208 | name: "res2c"
209 | type: "Eltwise"
210 | }
211 |
212 | layer {
213 | bottom: "res2c"
214 | top: "res2c"
215 | name: "res2c_relu"
216 | type: "ReLU"
217 | }
218 |
219 | layer {
220 | bottom: "res2c"
221 | top: "res3a_branch1"
222 | name: "res3a_branch1"
223 | type: "Convolution"
224 | convolution_param {
225 | num_output: 512
226 | kernel_size: 1
227 | pad: 0
228 | stride: 2
229 | bias_term: false
230 | }
231 | param {
232 | lr_mult: 1.0
233 | }
234 | }
235 |
236 | layer {
237 | bottom: "res3a_branch1"
238 | top: "res3a_branch1"
239 | name: "bn3a_branch1"
240 | type: "BatchNorm"
241 | batch_norm_param {
242 | use_global_stats: true
243 | }
244 | param {
245 | lr_mult: 0.0
246 | decay_mult: 0.0
247 | }
248 | param {
249 | lr_mult: 0.0
250 | decay_mult: 0.0
251 | }
252 | param {
253 | lr_mult: 0.0
254 | decay_mult: 0.0
255 | }
256 | }
257 |
258 | layer {
259 | bottom: "res3a_branch1"
260 | top: "res3a_branch1"
261 | name: "scale3a_branch1"
262 | type: "Scale"
263 | scale_param {
264 | bias_term: true
265 | }
266 | param {
267 | lr_mult: 0.0
268 | decay_mult: 0.0
269 | }
270 | param {
271 | lr_mult: 0.0
272 | decay_mult: 0.0
273 | }
274 | }
275 |
276 | layer {
277 | bottom: "res2c"
278 | top: "res3a_branch2a"
279 | name: "res3a_branch2a"
280 | type: "Convolution"
281 | convolution_param {
282 | num_output: 128
283 | kernel_size: 1
284 | pad: 0
285 | stride: 2
286 | bias_term: false
287 | }
288 | param {
289 | lr_mult: 1.0
290 | }
291 | }
292 |
293 | layer {
294 | bottom: "res3a_branch2a"
295 | top: "res3a_branch2a"
296 | name: "bn3a_branch2a"
297 | type: "BatchNorm"
298 | batch_norm_param {
299 | use_global_stats: true
300 | }
301 | param {
302 | lr_mult: 0.0
303 | decay_mult: 0.0
304 | }
305 | param {
306 | lr_mult: 0.0
307 | decay_mult: 0.0
308 | }
309 | param {
310 | lr_mult: 0.0
311 | decay_mult: 0.0
312 | }
313 | }
314 |
315 | layer {
316 | bottom: "res3a_branch2a"
317 | top: "res3a_branch2a"
318 | name: "scale3a_branch2a"
319 | type: "Scale"
320 | scale_param {
321 | bias_term: true
322 | }
323 | param {
324 | lr_mult: 0.0
325 | decay_mult: 0.0
326 | }
327 | param {
328 | lr_mult: 0.0
329 | decay_mult: 0.0
330 | }
331 | }
332 |
333 | layer {
334 | top: "res3a_branch2a"
335 | bottom: "res3a_branch2a"
336 | name: "res3a_branch2a_relu"
337 | type: "ReLU"
338 | }
339 |
340 | layer {
341 | bottom: "res3a_branch2a"
342 | top: "res3a_branch2b"
343 | name: "res3a_branch2b"
344 | type: "Convolution"
345 | convolution_param {
346 | num_output: 128
347 | kernel_size: 3
348 | pad: 1
349 | stride: 1
350 | bias_term: false
351 | }
352 | param {
353 | lr_mult: 1.0
354 | }
355 | }
356 |
357 | layer {
358 | bottom: "res3a_branch2b"
359 | top: "res3a_branch2b"
360 | name: "bn3a_branch2b"
361 | type: "BatchNorm"
362 | batch_norm_param {
363 | use_global_stats: true
364 | }
365 | param {
366 | lr_mult: 0.0
367 | decay_mult: 0.0
368 | }
369 | param {
370 | lr_mult: 0.0
371 | decay_mult: 0.0
372 | }
373 | param {
374 | lr_mult: 0.0
375 | decay_mult: 0.0
376 | }
377 | }
378 |
379 | layer {
380 | bottom: "res3a_branch2b"
381 | top: "res3a_branch2b"
382 | name: "scale3a_branch2b"
383 | type: "Scale"
384 | scale_param {
385 | bias_term: true
386 | }
387 | param {
388 | lr_mult: 0.0
389 | decay_mult: 0.0
390 | }
391 | param {
392 | lr_mult: 0.0
393 | decay_mult: 0.0
394 | }
395 | }
396 |
397 | layer {
398 | top: "res3a_branch2b"
399 | bottom: "res3a_branch2b"
400 | name: "res3a_branch2b_relu"
401 | type: "ReLU"
402 | }
403 |
404 | layer {
405 | bottom: "res3a_branch2b"
406 | top: "res3a_branch2c"
407 | name: "res3a_branch2c"
408 | type: "Convolution"
409 | convolution_param {
410 | num_output: 512
411 | kernel_size: 1
412 | pad: 0
413 | stride: 1
414 | bias_term: false
415 | }
416 | param {
417 | lr_mult: 1.0
418 | }
419 | }
420 |
421 | layer {
422 | bottom: "res3a_branch2c"
423 | top: "res3a_branch2c"
424 | name: "bn3a_branch2c"
425 | type: "BatchNorm"
426 | batch_norm_param {
427 | use_global_stats: true
428 | }
429 | param {
430 | lr_mult: 0.0
431 | decay_mult: 0.0
432 | }
433 | param {
434 | lr_mult: 0.0
435 | decay_mult: 0.0
436 | }
437 | param {
438 | lr_mult: 0.0
439 | decay_mult: 0.0
440 | }
441 | }
442 |
443 | layer {
444 | bottom: "res3a_branch2c"
445 | top: "res3a_branch2c"
446 | name: "scale3a_branch2c"
447 | type: "Scale"
448 | scale_param {
449 | bias_term: true
450 | }
451 | param {
452 | lr_mult: 0.0
453 | decay_mult: 0.0
454 | }
455 | param {
456 | lr_mult: 0.0
457 | decay_mult: 0.0
458 | }
459 | }
460 |
461 | layer {
462 | bottom: "res3a_branch1"
463 | bottom: "res3a_branch2c"
464 | top: "res3a"
465 | name: "res3a"
466 | type: "Eltwise"
467 | }
468 |
469 | layer {
470 | bottom: "res3a"
471 | top: "res3a"
472 | name: "res3a_relu"
473 | type: "ReLU"
474 | }
475 |
476 | layer {
477 | bottom: "res3a"
478 | top: "res3b1_branch2a"
479 | name: "res3b1_branch2a"
480 | type: "Convolution"
481 | convolution_param {
482 | num_output: 128
483 | kernel_size: 1
484 | pad: 0
485 | stride: 1
486 | bias_term: false
487 | }
488 | param {
489 | lr_mult: 1.0
490 | }
491 | }
492 |
493 | layer {
494 | bottom: "res3b1_branch2a"
495 | top: "res3b1_branch2a"
496 | name: "bn3b1_branch2a"
497 | type: "BatchNorm"
498 | batch_norm_param {
499 | use_global_stats: true
500 | }
501 | param {
502 | lr_mult: 0.0
503 | decay_mult: 0.0
504 | }
505 | param {
506 | lr_mult: 0.0
507 | decay_mult: 0.0
508 | }
509 | param {
510 | lr_mult: 0.0
511 | decay_mult: 0.0
512 | }
513 | }
514 |
515 | layer {
516 | bottom: "res3b1_branch2a"
517 | top: "res3b1_branch2a"
518 | name: "scale3b1_branch2a"
519 | type: "Scale"
520 | scale_param {
521 | bias_term: true
522 | }
523 | param {
524 | lr_mult: 0.0
525 | decay_mult: 0.0
526 | }
527 | param {
528 | lr_mult: 0.0
529 | decay_mult: 0.0
530 | }
531 | }
532 |
533 | layer {
534 | top: "res3b1_branch2a"
535 | bottom: "res3b1_branch2a"
536 | name: "res3b1_branch2a_relu"
537 | type: "ReLU"
538 | }
539 |
540 | layer {
541 | bottom: "res3b1_branch2a"
542 | top: "res3b1_branch2b"
543 | name: "res3b1_branch2b"
544 | type: "Convolution"
545 | convolution_param {
546 | num_output: 128
547 | kernel_size: 3
548 | pad: 1
549 | stride: 1
550 | bias_term: false
551 | }
552 | param {
553 | lr_mult: 1.0
554 | }
555 | }
556 |
557 | layer {
558 | bottom: "res3b1_branch2b"
559 | top: "res3b1_branch2b"
560 | name: "bn3b1_branch2b"
561 | type: "BatchNorm"
562 | batch_norm_param {
563 | use_global_stats: true
564 | }
565 | param {
566 | lr_mult: 0.0
567 | decay_mult: 0.0
568 | }
569 | param {
570 | lr_mult: 0.0
571 | decay_mult: 0.0
572 | }
573 | param {
574 | lr_mult: 0.0
575 | decay_mult: 0.0
576 | }
577 | }
578 |
579 | layer {
580 | bottom: "res3b1_branch2b"
581 | top: "res3b1_branch2b"
582 | name: "scale3b1_branch2b"
583 | type: "Scale"
584 | scale_param {
585 | bias_term: true
586 | }
587 | param {
588 | lr_mult: 0.0
589 | decay_mult: 0.0
590 | }
591 | param {
592 | lr_mult: 0.0
593 | decay_mult: 0.0
594 | }
595 | }
596 |
597 | layer {
598 | top: "res3b1_branch2b"
599 | bottom: "res3b1_branch2b"
600 | name: "res3b1_branch2b_relu"
601 | type: "ReLU"
602 | }
603 |
604 | layer {
605 | bottom: "res3b1_branch2b"
606 | top: "res3b1_branch2c"
607 | name: "res3b1_branch2c"
608 | type: "Convolution"
609 | convolution_param {
610 | num_output: 512
611 | kernel_size: 1
612 | pad: 0
613 | stride: 1
614 | bias_term: false
615 | }
616 | param {
617 | lr_mult: 1.0
618 | }
619 | }
620 |
621 | layer {
622 | bottom: "res3b1_branch2c"
623 | top: "res3b1_branch2c"
624 | name: "bn3b1_branch2c"
625 | type: "BatchNorm"
626 | batch_norm_param {
627 | use_global_stats: true
628 | }
629 | param {
630 | lr_mult: 0.0
631 | decay_mult: 0.0
632 | }
633 | param {
634 | lr_mult: 0.0
635 | decay_mult: 0.0
636 | }
637 | param {
638 | lr_mult: 0.0
639 | decay_mult: 0.0
640 | }
641 | }
642 |
643 | layer {
644 | bottom: "res3b1_branch2c"
645 | top: "res3b1_branch2c"
646 | name: "scale3b1_branch2c"
647 | type: "Scale"
648 | scale_param {
649 | bias_term: true
650 | }
651 | param {
652 | lr_mult: 0.0
653 | decay_mult: 0.0
654 | }
655 | param {
656 | lr_mult: 0.0
657 | decay_mult: 0.0
658 | }
659 | }
660 |
661 | layer {
662 | bottom: "res3a"
663 | bottom: "res3b1_branch2c"
664 | top: "res3b1"
665 | name: "res3b1"
666 | type: "Eltwise"
667 | }
668 |
669 | layer {
670 | bottom: "res3b1"
671 | top: "res3b1"
672 | name: "res3b1_relu"
673 | type: "ReLU"
674 | }
675 |
676 | layer {
677 | bottom: "res3b1"
678 | top: "res3b2_branch2a"
679 | name: "res3b2_branch2a"
680 | type: "Convolution"
681 | convolution_param {
682 | num_output: 128
683 | kernel_size: 1
684 | pad: 0
685 | stride: 1
686 | bias_term: false
687 | }
688 | param {
689 | lr_mult: 1.0
690 | }
691 | }
692 |
693 | layer {
694 | bottom: "res3b2_branch2a"
695 | top: "res3b2_branch2a"
696 | name: "bn3b2_branch2a"
697 | type: "BatchNorm"
698 | batch_norm_param {
699 | use_global_stats: true
700 | }
701 | param {
702 | lr_mult: 0.0
703 | decay_mult: 0.0
704 | }
705 | param {
706 | lr_mult: 0.0
707 | decay_mult: 0.0
708 | }
709 | param {
710 | lr_mult: 0.0
711 | decay_mult: 0.0
712 | }
713 | }
714 |
715 | layer {
716 | bottom: "res3b2_branch2a"
717 | top: "res3b2_branch2a"
718 | name: "scale3b2_branch2a"
719 | type: "Scale"
720 | scale_param {
721 | bias_term: true
722 | }
723 | param {
724 | lr_mult: 0.0
725 | decay_mult: 0.0
726 | }
727 | param {
728 | lr_mult: 0.0
729 | decay_mult: 0.0
730 | }
731 | }
732 |
733 | layer {
734 | top: "res3b2_branch2a"
735 | bottom: "res3b2_branch2a"
736 | name: "res3b2_branch2a_relu"
737 | type: "ReLU"
738 | }
739 |
740 | layer {
741 | bottom: "res3b2_branch2a"
742 | top: "res3b2_branch2b"
743 | name: "res3b2_branch2b"
744 | type: "Convolution"
745 | convolution_param {
746 | num_output: 128
747 | kernel_size: 3
748 | pad: 1
749 | stride: 1
750 | bias_term: false
751 | }
752 | param {
753 | lr_mult: 1.0
754 | }
755 | }
756 |
757 | layer {
758 | bottom: "res3b2_branch2b"
759 | top: "res3b2_branch2b"
760 | name: "bn3b2_branch2b"
761 | type: "BatchNorm"
762 | batch_norm_param {
763 | use_global_stats: true
764 | }
765 | param {
766 | lr_mult: 0.0
767 | decay_mult: 0.0
768 | }
769 | param {
770 | lr_mult: 0.0
771 | decay_mult: 0.0
772 | }
773 | param {
774 | lr_mult: 0.0
775 | decay_mult: 0.0
776 | }
777 | }
778 |
779 | layer {
780 | bottom: "res3b2_branch2b"
781 | top: "res3b2_branch2b"
782 | name: "scale3b2_branch2b"
783 | type: "Scale"
784 | scale_param {
785 | bias_term: true
786 | }
787 | param {
788 | lr_mult: 0.0
789 | decay_mult: 0.0
790 | }
791 | param {
792 | lr_mult: 0.0
793 | decay_mult: 0.0
794 | }
795 | }
796 |
797 | layer {
798 | top: "res3b2_branch2b"
799 | bottom: "res3b2_branch2b"
800 | name: "res3b2_branch2b_relu"
801 | type: "ReLU"
802 | }
803 |
804 | layer {
805 | bottom: "res3b2_branch2b"
806 | top: "res3b2_branch2c"
807 | name: "res3b2_branch2c"
808 | type: "Convolution"
809 | convolution_param {
810 | num_output: 512
811 | kernel_size: 1
812 | pad: 0
813 | stride: 1
814 | bias_term: false
815 | }
816 | param {
817 | lr_mult: 1.0
818 | }
819 | }
820 |
821 | layer {
822 | bottom: "res3b2_branch2c"
823 | top: "res3b2_branch2c"
824 | name: "bn3b2_branch2c"
825 | type: "BatchNorm"
826 | batch_norm_param {
827 | use_global_stats: true
828 | }
829 | param {
830 | lr_mult: 0.0
831 | decay_mult: 0.0
832 | }
833 | param {
834 | lr_mult: 0.0
835 | decay_mult: 0.0
836 | }
837 | param {
838 | lr_mult: 0.0
839 | decay_mult: 0.0
840 | }
841 | }
842 |
843 | layer {
844 | bottom: "res3b2_branch2c"
845 | top: "res3b2_branch2c"
846 | name: "scale3b2_branch2c"
847 | type: "Scale"
848 | scale_param {
849 | bias_term: true
850 | }
851 | param {
852 | lr_mult: 0.0
853 | decay_mult: 0.0
854 | }
855 | param {
856 | lr_mult: 0.0
857 | decay_mult: 0.0
858 | }
859 | }
860 |
861 | layer {
862 | bottom: "res3b1"
863 | bottom: "res3b2_branch2c"
864 | top: "res3b2"
865 | name: "res3b2"
866 | type: "Eltwise"
867 | }
868 |
869 | layer {
870 | bottom: "res3b2"
871 | top: "res3b2"
872 | name: "res3b2_relu"
873 | type: "ReLU"
874 | }
875 |
876 | layer {
877 | bottom: "res3b2"
878 | top: "res3b3_branch2a"
879 | name: "res3b3_branch2a"
880 | type: "Convolution"
881 | convolution_param {
882 | num_output: 128
883 | kernel_size: 1
884 | pad: 0
885 | stride: 1
886 | bias_term: false
887 | }
888 | param {
889 | lr_mult: 1.0
890 | }
891 | }
892 |
893 | layer {
894 | bottom: "res3b3_branch2a"
895 | top: "res3b3_branch2a"
896 | name: "bn3b3_branch2a"
897 | type: "BatchNorm"
898 | batch_norm_param {
899 | use_global_stats: true
900 | }
901 | param {
902 | lr_mult: 0.0
903 | decay_mult: 0.0
904 | }
905 | param {
906 | lr_mult: 0.0
907 | decay_mult: 0.0
908 | }
909 | param {
910 | lr_mult: 0.0
911 | decay_mult: 0.0
912 | }
913 | }
914 |
915 | layer {
916 | bottom: "res3b3_branch2a"
917 | top: "res3b3_branch2a"
918 | name: "scale3b3_branch2a"
919 | type: "Scale"
920 | scale_param {
921 | bias_term: true
922 | }
923 | param {
924 | lr_mult: 0.0
925 | decay_mult: 0.0
926 | }
927 | param {
928 | lr_mult: 0.0
929 | decay_mult: 0.0
930 | }
931 | }
932 |
933 | layer {
934 | top: "res3b3_branch2a"
935 | bottom: "res3b3_branch2a"
936 | name: "res3b3_branch2a_relu"
937 | type: "ReLU"
938 | }
939 |
940 | layer {
941 | bottom: "res3b3_branch2a"
942 | top: "res3b3_branch2b"
943 | name: "res3b3_branch2b"
944 | type: "Convolution"
945 | convolution_param {
946 | num_output: 128
947 | kernel_size: 3
948 | pad: 1
949 | stride: 1
950 | bias_term: false
951 | }
952 | param {
953 | lr_mult: 1.0
954 | }
955 | }
956 |
957 | layer {
958 | bottom: "res3b3_branch2b"
959 | top: "res3b3_branch2b"
960 | name: "bn3b3_branch2b"
961 | type: "BatchNorm"
962 | batch_norm_param {
963 | use_global_stats: true
964 | }
965 | param {
966 | lr_mult: 0.0
967 | decay_mult: 0.0
968 | }
969 | param {
970 | lr_mult: 0.0
971 | decay_mult: 0.0
972 | }
973 | param {
974 | lr_mult: 0.0
975 | decay_mult: 0.0
976 | }
977 | }
978 |
979 | layer {
980 | bottom: "res3b3_branch2b"
981 | top: "res3b3_branch2b"
982 | name: "scale3b3_branch2b"
983 | type: "Scale"
984 | scale_param {
985 | bias_term: true
986 | }
987 | param {
988 | lr_mult: 0.0
989 | decay_mult: 0.0
990 | }
991 | param {
992 | lr_mult: 0.0
993 | decay_mult: 0.0
994 | }
995 | }
996 |
997 | layer {
998 | top: "res3b3_branch2b"
999 | bottom: "res3b3_branch2b"
1000 | name: "res3b3_branch2b_relu"
1001 | type: "ReLU"
1002 | }
1003 |
1004 | layer {
1005 | bottom: "res3b3_branch2b"
1006 | top: "res3b3_branch2c"
1007 | name: "res3b3_branch2c"
1008 | type: "Convolution"
1009 | convolution_param {
1010 | num_output: 512
1011 | kernel_size: 1
1012 | pad: 0
1013 | stride: 1
1014 | bias_term: false
1015 | }
1016 | param {
1017 | lr_mult: 1.0
1018 | }
1019 | }
1020 |
1021 | layer {
1022 | bottom: "res3b3_branch2c"
1023 | top: "res3b3_branch2c"
1024 | name: "bn3b3_branch2c"
1025 | type: "BatchNorm"
1026 | batch_norm_param {
1027 | use_global_stats: true
1028 | }
1029 | param {
1030 | lr_mult: 0.0
1031 | decay_mult: 0.0
1032 | }
1033 | param {
1034 | lr_mult: 0.0
1035 | decay_mult: 0.0
1036 | }
1037 | param {
1038 | lr_mult: 0.0
1039 | decay_mult: 0.0
1040 | }
1041 | }
1042 |
1043 | layer {
1044 | bottom: "res3b3_branch2c"
1045 | top: "res3b3_branch2c"
1046 | name: "scale3b3_branch2c"
1047 | type: "Scale"
1048 | scale_param {
1049 | bias_term: true
1050 | }
1051 | param {
1052 | lr_mult: 0.0
1053 | decay_mult: 0.0
1054 | }
1055 | param {
1056 | lr_mult: 0.0
1057 | decay_mult: 0.0
1058 | }
1059 | }
1060 |
1061 | layer {
1062 | bottom: "res3b2"
1063 | bottom: "res3b3_branch2c"
1064 | top: "res3b3"
1065 | name: "res3b3"
1066 | type: "Eltwise"
1067 | }
1068 |
1069 | layer {
1070 | bottom: "res3b3"
1071 | top: "res3b3"
1072 | name: "res3b3_relu"
1073 | type: "ReLU"
1074 | }
1075 | #------------------- feature layer --------------
1076 |
1077 | layer {
1078 | bottom: "res3b3"
1079 | top: "conv_new"
1080 | name: "conv_new"
1081 | type: "Convolution"
1082 | convolution_param {
1083 | num_output: 256
1084 | kernel_size: 1
1085 | pad: 0
1086 | stride: 1
1087 | weight_filler {
1088 | type: "gaussian"
1089 | std: 0.01
1090 | }
1091 | bias_filler {
1092 | type: "constant"
1093 | value: 0
1094 | }
1095 | }
1096 | param {
1097 | lr_mult: 1.0
1098 | }
1099 | param {
1100 | lr_mult: 2.0
1101 | }
1102 | }
1103 |
1104 | layer {
1105 | bottom: "conv_new"
1106 | top: "conv_new"
1107 | name: "conv_new_relu"
1108 | type: "ReLU"
1109 | }
1110 |
1111 | #------------------- predict layer --------------
1112 |
1113 | layer {
1114 | bottom: "conv_new"
1115 | top: "rpn_cls"
1116 | name: "rpn_cls"
1117 | type: "Convolution"
1118 | convolution_param {
1119 | num_output: 1
1120 | kernel_size: 1
1121 | pad: 0
1122 | weight_filler {
1123 | type: "gaussian"
1124 | std: 0.01
1125 | }
1126 | bias_filler {
1127 | type: "constant"
1128 | value: 0
1129 | }
1130 | }
1131 | param {
1132 | lr_mult: 1.0
1133 | }
1134 | param {
1135 | lr_mult: 2.0
1136 | }
1137 | }
1138 |
1139 | layer {
1140 | bottom: "conv_new"
1141 | top: "rpn_reg"
1142 | name: "rpn_reg"
1143 | type: "Convolution"
1144 | convolution_param {
1145 | num_output: 10
1146 | kernel_size: 1
1147 | pad: 0
1148 | weight_filler {
1149 | type: "gaussian"
1150 | std: 0.01
1151 | }
1152 | bias_filler {
1153 | type: "constant"
1154 | value: 0
1155 | }
1156 | }
1157 | param {
1158 | lr_mult: 1.0
1159 | }
1160 | param {
1161 | lr_mult: 2.0
1162 | }
1163 | }
--------------------------------------------------------------------------------
/predict/model/ResNet_3b_s16_fm2fm_pool2_deep/solver_test.prototxt:
--------------------------------------------------------------------------------
1 | net: "model/ResNet_3b_s16_fm2fm_pool2_deep/test.prototxt"
2 | base_lr: 0.0009
3 | lr_policy: "step"
4 | gamma: 0.9
5 | stepsize: 20000
6 | display: 20
7 | max_iter: 1000000
8 | momentum: 0.9
9 | weight_decay: 0.0001
10 | # We disable standard caffe solver snapshotting and implement our own snapshot
11 | snapshot: 0
12 | #debug_info: true
13 |
14 |
--------------------------------------------------------------------------------
/predict/model/ResNet_3b_s16_fm2fm_pool2_deep/test.prototxt:
--------------------------------------------------------------------------------
1 | name: "ResNet-50"
2 |
3 | input: "data"
4 | input_dim: 1
5 | input_dim: 256
6 | input_dim: 16
7 | input_dim: 16
8 |
9 | mem_param {
10 | optimize_test: true
11 | }
12 | #----------------------transfer gate-------------
13 | layer {
14 | bottom: "data"
15 | top: "res2b_trans_1"
16 | name: "conv_transfer_1"
17 | type: "Convolution"
18 | convolution_param {
19 | num_output: 256
20 | kernel_size: 3
21 | pad: 1
22 | stride: 1
23 | bias_term: true
24 | weight_filler {
25 | type: "xavier"
26 | }
27 | bias_filler {
28 | type: "constant"
29 | value: 0
30 | }
31 | }
32 | param {
33 | lr_mult: 1
34 | decay_mult: 1
35 | }
36 | param {
37 | lr_mult: 1
38 | decay_mult: 0
39 | }
40 | }
41 |
42 | layer {
43 | bottom: "res2b_trans_1"
44 | top: "res2b_trans_1"
45 | name: "res2b_trans_1_relu"
46 | type: "ReLU"
47 | }
48 |
49 | layer {
50 | bottom: "res2b_trans_1"
51 | top: "res2b_trans_2"
52 | name: "conv_transfer_2"
53 | type: "Convolution"
54 | convolution_param {
55 | num_output: 256
56 | kernel_size: 3
57 | pad: 1
58 | stride: 2
59 | bias_term: true
60 | weight_filler {
61 | type: "xavier"
62 | }
63 | bias_filler {
64 | type: "constant"
65 | value: 0
66 | }
67 | }
68 | param {
69 | lr_mult: 1
70 | decay_mult: 1
71 | }
72 | param {
73 | lr_mult: 1
74 | decay_mult: 0
75 | }
76 | }
77 |
78 | layer {
79 | bottom: "res2b_trans_2"
80 | top: "res2b_trans_2"
81 | name: "res2b_trans_2_relu"
82 | type: "ReLU"
83 | }
84 |
85 | layer {
86 | bottom: "res2b_trans_2"
87 | top: "res2b_trans_3"
88 | name: "conv_transfer_3"
89 | type: "Convolution"
90 | convolution_param {
91 | num_output: 256
92 | kernel_size: 3
93 | pad: 1
94 | stride: 1
95 | bias_term: true
96 | weight_filler {
97 | type: "xavier"
98 | }
99 | bias_filler {
100 | type: "constant"
101 | value: 0
102 | }
103 | }
104 | param {
105 | lr_mult: 1
106 | decay_mult: 1
107 | }
108 | param {
109 | lr_mult: 1
110 | decay_mult: 0
111 | }
112 | }
113 |
114 | layer {
115 | bottom: "res2b_trans_3"
116 | top: "res2b_trans_3"
117 | name: "res2b_trans_3_relu"
118 | type: "ReLU"
119 | }
120 |
121 | layer {
122 | bottom: "res2b_trans_3"
123 | top: "res2b_trans_4"
124 | name: "conv_transfer_4"
125 | type: "Convolution"
126 | convolution_param {
127 | num_output: 256
128 | kernel_size: 3
129 | pad: 1
130 | stride: 1
131 | bias_term: true
132 | weight_filler {
133 | type: "xavier"
134 | }
135 | bias_filler {
136 | type: "constant"
137 | value: 0
138 | }
139 | }
140 | param {
141 | lr_mult: 1
142 | decay_mult: 1
143 | }
144 | param {
145 | lr_mult: 1
146 | decay_mult: 0
147 | }
148 | }
149 |
150 |
151 | layer {
152 | bottom: "res2b_trans_4"
153 | top: "res2b_trans_4"
154 | name: "res2b_trans_4_relu"
155 | type: "ReLU"
156 | }
157 |
158 | layer {
159 | bottom: "res2b_trans_4"
160 | top: "res2b_trans_5"
161 | name: "conv_transfer_5"
162 | type: "Convolution"
163 | convolution_param {
164 | num_output: 256
165 | kernel_size: 1
166 | pad: 0
167 | stride: 1
168 | bias_term: true
169 | weight_filler {
170 | type: "xavier"
171 | }
172 | bias_filler {
173 | type: "constant"
174 | value: 0
175 | }
176 | }
177 | param {
178 | lr_mult: 1
179 | decay_mult: 1
180 | }
181 | param {
182 | lr_mult: 1
183 | decay_mult: 0
184 | }
185 | }
186 |
--------------------------------------------------------------------------------
/predict/model/res_pool2/solver_test.prototxt:
--------------------------------------------------------------------------------
1 | net: "model/res_pool2/test.prototxt"
2 | base_lr: 0.08
3 | lr_policy: "step"
4 | gamma: 0.8
5 | stepsize: 50000
6 | display: 100
7 | max_iter: 2000000
8 | momentum: 0.9
9 | weight_decay: 0.0001
10 | # We disable standard caffe solver snapshotting and implement our own snapshot
11 | snapshot: 0
12 | #debug_info: true
13 |
14 |
--------------------------------------------------------------------------------
/predict/model/res_pool2/test.prototxt:
--------------------------------------------------------------------------------
1 | name: "ResNet-50"
2 |
3 | input: "data"
4 | input_dim: 1
5 | input_dim: 3
6 | input_dim: 16
7 | input_dim: 16
8 |
9 |
10 | mem_param {
11 | optimize_test: true
12 | }
13 |
14 | # ------------------------ conv1 -----------------------------
15 | layer {
16 | bottom: "data"
17 | top: "conv1"
18 | name: "conv1"
19 | type: "Convolution"
20 | convolution_param {
21 | num_output: 64
22 | kernel_size: 7
23 | pad: 3
24 | stride: 2
25 | bias_term: false
26 | }
27 | param {
28 | lr_mult: 0
29 | decay_mult: 0
30 | }
31 | }
32 |
33 | layer {
34 | bottom: "conv1"
35 | top: "conv1"
36 | name: "bn_conv1"
37 | type: "BatchNorm"
38 | batch_norm_param {
39 | use_global_stats: true
40 | }
41 | param {
42 | lr_mult: 0
43 | decay_mult: 0
44 | }
45 | param {
46 | lr_mult: 0
47 | decay_mult: 0
48 | }
49 | param {
50 | lr_mult: 0
51 | decay_mult: 0
52 | }
53 | }
54 |
55 | layer {
56 | bottom: "conv1"
57 | top: "conv1"
58 | name: "scale_conv1"
59 | type: "Scale"
60 | scale_param {
61 | bias_term: true
62 | }
63 | param {
64 | lr_mult: 0
65 | decay_mult: 0
66 | }
67 | param {
68 | lr_mult: 0
69 | decay_mult: 0
70 | }
71 | }
72 |
73 | layer {
74 | top: "conv1"
75 | bottom: "conv1"
76 | name: "conv1_relu"
77 | type: "ReLU"
78 | }
79 |
80 | layer {
81 | bottom: "conv1"
82 | top: "pool1"
83 | name: "pool1"
84 | type: "Pooling"
85 | pooling_param {
86 | kernel_size: 2
87 | stride: 2
88 | pool: MAX
89 | }
90 | }
91 |
92 | layer {
93 | bottom: "pool1"
94 | top: "res2a_branch1"
95 | name: "res2a_branch1"
96 | type: "Convolution"
97 | convolution_param {
98 | num_output: 256
99 | kernel_size: 1
100 | pad: 0
101 | stride: 1
102 | bias_term: false
103 | }
104 | param {
105 | lr_mult: 0
106 | decay_mult: 0
107 | }
108 | }
109 |
110 | layer {
111 | bottom: "res2a_branch1"
112 | top: "res2a_branch1"
113 | name: "bn2a_branch1"
114 | type: "BatchNorm"
115 | batch_norm_param {
116 | use_global_stats: true
117 | }
118 | param {
119 | lr_mult: 0
120 | decay_mult: 0
121 | }
122 | param {
123 | lr_mult: 0
124 | decay_mult: 0
125 | }
126 | param {
127 | lr_mult: 0
128 | decay_mult: 0
129 | }
130 | }
131 |
132 | layer {
133 | bottom: "res2a_branch1"
134 | top: "res2a_branch1"
135 | name: "scale2a_branch1"
136 | type: "Scale"
137 | scale_param {
138 | bias_term: true
139 | }
140 | param {
141 | lr_mult: 0
142 | decay_mult: 0
143 | }
144 | param {
145 | lr_mult: 0
146 | decay_mult: 0
147 | }
148 | }
149 |
150 | layer {
151 | bottom: "pool1"
152 | top: "res2a_branch2a"
153 | name: "res2a_branch2a"
154 | type: "Convolution"
155 | convolution_param {
156 | num_output: 64
157 | kernel_size: 1
158 | pad: 0
159 | stride: 1
160 | bias_term: false
161 | }
162 | param {
163 | lr_mult: 0
164 | decay_mult: 0
165 | }
166 | }
167 |
168 | layer {
169 | bottom: "res2a_branch2a"
170 | top: "res2a_branch2a"
171 | name: "bn2a_branch2a"
172 | type: "BatchNorm"
173 | batch_norm_param {
174 | use_global_stats: true
175 | }
176 | param {
177 | lr_mult: 0
178 | decay_mult: 0
179 | }
180 | param {
181 | lr_mult: 0
182 | decay_mult: 0
183 | }
184 | param {
185 | lr_mult: 0
186 | decay_mult: 0
187 | }
188 | }
189 |
190 | layer {
191 | bottom: "res2a_branch2a"
192 | top: "res2a_branch2a"
193 | name: "scale2a_branch2a"
194 | type: "Scale"
195 | scale_param {
196 | bias_term: true
197 | }
198 | param {
199 | lr_mult: 0
200 | decay_mult: 0
201 | }
202 | param {
203 | lr_mult: 0
204 | decay_mult: 0
205 | }
206 | }
207 |
208 | layer {
209 | top: "res2a_branch2a"
210 | bottom: "res2a_branch2a"
211 | name: "res2a_branch2a_relu"
212 | type: "ReLU"
213 | }
214 |
215 | layer {
216 | bottom: "res2a_branch2a"
217 | top: "res2a_branch2b"
218 | name: "res2a_branch2b"
219 | type: "Convolution"
220 | convolution_param {
221 | num_output: 64
222 | kernel_size: 3
223 | pad: 1
224 | stride: 1
225 | bias_term: false
226 | }
227 | param {
228 | lr_mult: 0
229 | decay_mult: 0
230 | }
231 | }
232 |
233 | layer {
234 | bottom: "res2a_branch2b"
235 | top: "res2a_branch2b"
236 | name: "bn2a_branch2b"
237 | type: "BatchNorm"
238 | batch_norm_param {
239 | use_global_stats: true
240 | }
241 | param {
242 | lr_mult: 0
243 | decay_mult: 0
244 | }
245 | param {
246 | lr_mult: 0
247 | decay_mult: 0
248 | }
249 | param {
250 | lr_mult: 0
251 | decay_mult: 0
252 | }
253 | }
254 |
255 | layer {
256 | bottom: "res2a_branch2b"
257 | top: "res2a_branch2b"
258 | name: "scale2a_branch2b"
259 | type: "Scale"
260 | scale_param {
261 | bias_term: true
262 | }
263 | param {
264 | lr_mult: 0
265 | decay_mult: 0
266 | }
267 | param {
268 | lr_mult: 0
269 | decay_mult: 0
270 | }
271 | }
272 |
273 | layer {
274 | top: "res2a_branch2b"
275 | bottom: "res2a_branch2b"
276 | name: "res2a_branch2b_relu"
277 | type: "ReLU"
278 | }
279 |
280 | layer {
281 | bottom: "res2a_branch2b"
282 | top: "res2a_branch2c"
283 | name: "res2a_branch2c"
284 | type: "Convolution"
285 | convolution_param {
286 | num_output: 256
287 | kernel_size: 1
288 | pad: 0
289 | stride: 1
290 | bias_term: false
291 | }
292 | param {
293 | lr_mult: 0
294 | decay_mult: 0
295 | }
296 | }
297 |
298 | layer {
299 | bottom: "res2a_branch2c"
300 | top: "res2a_branch2c"
301 | name: "bn2a_branch2c"
302 | type: "BatchNorm"
303 | batch_norm_param {
304 | use_global_stats: true
305 | }
306 | param {
307 | lr_mult: 0
308 | decay_mult: 0
309 | }
310 | param {
311 | lr_mult: 0
312 | decay_mult: 0
313 | }
314 | param {
315 | lr_mult: 0
316 | decay_mult: 0
317 | }
318 | }
319 |
320 | layer {
321 | bottom: "res2a_branch2c"
322 | top: "res2a_branch2c"
323 | name: "scale2a_branch2c"
324 | type: "Scale"
325 | scale_param {
326 | bias_term: true
327 | }
328 | param {
329 | lr_mult: 0
330 | decay_mult: 0
331 | }
332 | param {
333 | lr_mult: 0
334 | decay_mult: 0
335 | }
336 | }
337 |
338 | layer {
339 | bottom: "res2a_branch1"
340 | bottom: "res2a_branch2c"
341 | top: "res2a"
342 | name: "res2a"
343 | type: "Eltwise"
344 | }
345 |
346 | layer {
347 | bottom: "res2a"
348 | top: "res2a"
349 | name: "res2a_relu"
350 | type: "ReLU"
351 | }
352 |
353 | layer {
354 | bottom: "res2a"
355 | top: "pool2"
356 | name: "pool2"
357 | type: "Pooling"
358 | pooling_param {
359 | kernel_size: 2
360 | stride: 2
361 | pool: MAX
362 | }
363 | }
364 |
365 |
366 | layer {
367 | bottom: "pool2"
368 | top: "res2b_branch2a"
369 | name: "res2b_branch2a"
370 | type: "Convolution"
371 | convolution_param {
372 | num_output: 64
373 | kernel_size: 1
374 | pad: 0
375 | stride: 1
376 | bias_term: false
377 | }
378 | param {
379 | lr_mult: 0
380 | decay_mult: 0
381 | }
382 | }
383 |
384 | layer {
385 | bottom: "res2b_branch2a"
386 | top: "res2b_branch2a"
387 | name: "bn2b_branch2a"
388 | type: "BatchNorm"
389 | batch_norm_param {
390 | use_global_stats: true
391 | }
392 | param {
393 | lr_mult: 0.0
394 | decay_mult: 0.0
395 | }
396 | param {
397 | lr_mult: 0.0
398 | decay_mult: 0.0
399 | }
400 | param {
401 | lr_mult: 0.0
402 | decay_mult: 0.0
403 | }
404 | }
405 |
406 | layer {
407 | bottom: "res2b_branch2a"
408 | top: "res2b_branch2a"
409 | name: "scale2b_branch2a"
410 | type: "Scale"
411 | scale_param {
412 | bias_term: true
413 | }
414 | param {
415 | lr_mult: 0.0
416 | decay_mult: 0.0
417 | }
418 | param {
419 | lr_mult: 0.0
420 | decay_mult: 0.0
421 | }
422 | }
423 |
424 | layer {
425 | top: "res2b_branch2a"
426 | bottom: "res2b_branch2a"
427 | name: "res2b_branch2a_relu"
428 | type: "ReLU"
429 | }
430 |
431 | layer {
432 | bottom: "res2b_branch2a"
433 | top: "res2b_branch2b"
434 | name: "res2b_branch2b"
435 | type: "Convolution"
436 | convolution_param {
437 | num_output: 64
438 | kernel_size: 3
439 | pad: 1
440 | stride: 1
441 | bias_term: false
442 | }
443 | param {
444 | lr_mult: 0
445 | decay_mult: 0
446 | }
447 | }
448 |
449 | layer {
450 | bottom: "res2b_branch2b"
451 | top: "res2b_branch2b"
452 | name: "bn2b_branch2b"
453 | type: "BatchNorm"
454 | batch_norm_param {
455 | use_global_stats: true
456 | }
457 | param {
458 | lr_mult: 0.0
459 | decay_mult: 0.0
460 | }
461 | param {
462 | lr_mult: 0.0
463 | decay_mult: 0.0
464 | }
465 | param {
466 | lr_mult: 0.0
467 | decay_mult: 0.0
468 | }
469 | }
470 |
471 | layer {
472 | bottom: "res2b_branch2b"
473 | top: "res2b_branch2b"
474 | name: "scale2b_branch2b"
475 | type: "Scale"
476 | scale_param {
477 | bias_term: true
478 | }
479 | param {
480 | lr_mult: 0.0
481 | decay_mult: 0.0
482 | }
483 | param {
484 | lr_mult: 0.0
485 | decay_mult: 0.0
486 | }
487 | }
488 |
489 | layer {
490 | top: "res2b_branch2b"
491 | bottom: "res2b_branch2b"
492 | name: "res2b_branch2b_relu"
493 | type: "ReLU"
494 | }
495 |
496 | layer {
497 | bottom: "res2b_branch2b"
498 | top: "res2b_branch2c"
499 | name: "res2b_branch2c"
500 | type: "Convolution"
501 | convolution_param {
502 | num_output: 256
503 | kernel_size: 1
504 | pad: 0
505 | stride: 1
506 | bias_term: false
507 | }
508 | param {
509 | lr_mult: 0
510 | decay_mult: 0
511 | }
512 | }
513 |
514 | layer {
515 | bottom: "res2b_branch2c"
516 | top: "res2b_branch2c"
517 | name: "bn2b_branch2c"
518 | type: "BatchNorm"
519 | batch_norm_param {
520 | use_global_stats: true
521 | }
522 | param {
523 | lr_mult: 0.0
524 | decay_mult: 0.0
525 | }
526 | param {
527 | lr_mult: 0.0
528 | decay_mult: 0.0
529 | }
530 | param {
531 | lr_mult: 0.0
532 | decay_mult: 0.0
533 | }
534 | }
535 |
536 | layer {
537 | bottom: "res2b_branch2c"
538 | top: "res2b_branch2c"
539 | name: "scale2b_branch2c"
540 | type: "Scale"
541 | scale_param {
542 | bias_term: true
543 | }
544 | param {
545 | lr_mult: 0.0
546 | decay_mult: 0.0
547 | }
548 | param {
549 | lr_mult: 0.0
550 | decay_mult: 0.0
551 | }
552 | }
553 |
554 | layer {
555 | bottom: "pool2"
556 | bottom: "res2b_branch2c"
557 | top: "res2b"
558 | name: "res2b"
559 | type: "Eltwise"
560 | }
561 |
--------------------------------------------------------------------------------
/predict/output/ResNet_3b_s16/param.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liuyuisanai/RSA-for-object-detection/626ad81172b260ecf8257a80731e5236fe41cb63/predict/output/ResNet_3b_s16/param.mat
--------------------------------------------------------------------------------
/predict/output/ResNet_3b_s16/tot_wometa_1epoch:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liuyuisanai/RSA-for-object-detection/626ad81172b260ecf8257a80731e5236fe41cb63/predict/output/ResNet_3b_s16/tot_wometa_1epoch
--------------------------------------------------------------------------------
/predict/output/ResNet_3b_s16_fm2fm_pool2_deep/65w:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liuyuisanai/RSA-for-object-detection/626ad81172b260ecf8257a80731e5236fe41cb63/predict/output/ResNet_3b_s16_fm2fm_pool2_deep/65w
--------------------------------------------------------------------------------
/predict/result1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liuyuisanai/RSA-for-object-detection/626ad81172b260ecf8257a80731e5236fe41cb63/predict/result1.jpg
--------------------------------------------------------------------------------
/predict/result2.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liuyuisanai/RSA-for-object-detection/626ad81172b260ecf8257a80731e5236fe41cb63/predict/result2.jpg
--------------------------------------------------------------------------------
/predict/result3.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liuyuisanai/RSA-for-object-detection/626ad81172b260ecf8257a80731e5236fe41cb63/predict/result3.jpg
--------------------------------------------------------------------------------
/predict/result4.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liuyuisanai/RSA-for-object-detection/626ad81172b260ecf8257a80731e5236fe41cb63/predict/result4.jpg
--------------------------------------------------------------------------------
/predict/scale_fddb.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liuyuisanai/RSA-for-object-detection/626ad81172b260ecf8257a80731e5236fe41cb63/predict/scale_fddb.mat
--------------------------------------------------------------------------------
/predict/script_baseline_rpn.m:
--------------------------------------------------------------------------------
1 | addpath('utils');
2 | clear parsed;
3 | %% configure your own
4 | root = ''; % lfw
5 | %% load param
6 | timestamp = strrep(mat2str(clock), ' ', '');
7 | load(fullfile(pwd, 'output', 'ResNet_3b_s16', 'param.mat'));
8 | % diary(fullfile(param.output_dir, ['test-' fullfile(timestamp) '.txt']));
9 | param.init_model = fullfile(pwd, 'output/ResNet_3b_s16/tot_wometa_1epoch'); %fullfile(pwd, 'output/ResNet-101L/1000000');
10 | param.solverfile = 'solver_test.prototxt';
11 | param.net_def_dir = fullfile(pwd, 'model', 'ResNet_3b_s16');
12 | param.max_img =300;
13 | %% load list
14 | list_test = list;
15 | param.dataset_root = '';
16 | if ~exist('work_num', 'var')
17 | work_num = 1;
18 | end
19 | if ~exist('work_id', 'var')
20 | work_id = 1;
21 | end
22 | param.gpu_id = 0:3;
23 | l = length(list_test)/work_num;
24 | thisid = floor(l * (work_id-1)+1):ceil(l*work_id);
25 | thisid(thisid>length(list_test))=[];
26 | list = list_test(thisid);
27 | fprintf('worker: %d / %d\n', work_id, work_num);
28 | %% active mex
29 |
30 |
31 | %% init caffe solver
32 | caffe.reset_all;
33 | caffe_solver = caffe.get_solver(fullfile(param.net_def_dir, param.solverfile), param.gpu_id);
34 | if ~isempty(param.init_model)
35 | assert(exist(param.init_model)==2, 'Cannot find caffemodel.');
36 | caffe_solver.use_caffemodel(param.init_model);
37 | end
38 | num = length(list);
39 | test_num = ceil(num/length(param.gpu_id));
40 | tot_time = tic();
41 | for i = 1 : test_num
42 | drawnow;
43 | if mod(i, 100) < 1
44 | fprintf('Testing %d/%d...', i, test_num);
45 | toc(tot_time);
46 | end
47 | now_id = mod((i-1)*length(param.gpu_id):i*length(param.gpu_id)-1, num) + 1;
48 | list_t = cellfun(@(x) fullfile(root, x), list(now_id), 'uni', 0);
49 | parsed_all = detect_all( list_t, param, caffe_solver, [0], 5 );
50 | for ii = 1 : length(param.gpu_id)
51 | score = parsed_all(ii).cls_score;
52 | point = parsed_all(ii).point;
53 | box = parsed_all(ii).box;
54 | %box = [box, score];
55 | if true % traditional nms
56 | id = nms(box, 0.3);
57 | if length(id) > 2000
58 | id = id(1:2000);
59 | points_new = points_new(1:2000);
60 | end
61 | parsed_all(ii).cls_score = score(id);
62 | parsed_all(ii).box = box(id,:);
63 | parsed_all(ii).point = point(id,:);
64 | else % points nms
65 | [ id, points_new ] = nms_pts( box, point, score, 0.3, 0 );
66 | if length(id) > 2000
67 | id = id(1:2000);
68 | points_new = points_new(1:2000,:);
69 | end
70 | parsed_all(ii).cls_score = score(id);
71 | parsed_all(ii).box = box(id,:);
72 | parsed_all(ii).point = points_new;
73 | end
74 | end
75 | parsed(now_id) = parsed_all;
76 | end
77 | % save('-v7.3', ['worker' num2str(work_id) 'roi.mat'], 'roi', 'list');
78 | plot_parsed(parsed, 5.81, root, list)
--------------------------------------------------------------------------------
/predict/script_featmap_2_result.m:
--------------------------------------------------------------------------------
1 | %% load param
2 | param.net_def_dir = 'model/ResNet_3b_s16_f2r';
3 | param.init_model = fullfile(pwd, 'output/ResNet_3b_s16/tot_wometa_1epoch'); %fullfile(pwd, 'output/ResNet-101L/1000000');
4 | param.solverfile = 'solver_test.prototxt';
5 | param.gpu_id = 0;
6 | mins = 3;
7 | maxs = 14;
8 | %% init caffe solver
9 | caffe.reset_all;
10 | caffe_solver = caffe.get_solver(fullfile(param.net_def_dir, param.solverfile), param.gpu_id);
11 | if ~isempty(param.init_model)
12 | assert(exist(param.init_model)==2, 'Cannot find caffemodel.');
13 | caffe_solver.use_caffemodel(param.init_model);
14 | end
15 | caffe_solver.set_phase('test');
16 | num = length(featmap_trans);
17 | cons = 0;
18 | hit = 0;
19 | sums = 0;
20 | missed = cell(num, 1);
21 | recall = 0;
22 | for i = 1 : num
23 | ti = tic();
24 | drawnow;
25 | fprintf('Step 3 rpn: %d/%d...', i, num);
26 | parsed(i) = detect_all_by_featmap( featmap_trans{i}, unique(min(scale{i}, 5)), param, caffe_solver, mins);
27 | toc(ti);
28 | end
29 | fprintf('Modify scale...')
30 | for i = 1 : num
31 | s = max(size(imread(fullfile(param.test_root, list{i}))))/param.max_img;
32 | parsed(i).box(:,1:4) = parsed(i).box(:,1:4) * s;
33 | parsed(i).point = parsed(i).point * s;
34 | end
35 | fprintf('Done\n');
36 |
--------------------------------------------------------------------------------
/predict/script_featmap_transfer.m:
--------------------------------------------------------------------------------
1 | %% modify param
2 | param.net_def_dir = 'model/ResNet_3b_s16_fm2fm_pool2_deep';
3 | param.gpu_id = 0;
4 | param.init_model = fullfile(pwd, 'output/ResNet_3b_s16_fm2fm_pool2_deep/65w');
5 |
6 | %% init caffe solver
7 | caffe.reset_all;
8 | caffe_solver = caffe.get_solver(fullfile(param.net_def_dir, param.solverfile), param.gpu_id);
9 | if ~isempty(param.init_model)
10 | assert(exist(param.init_model)==2, 'Cannot find caffemodel.');
11 | caffe_solver.use_caffemodel(param.init_model);
12 | end
13 | caffe_solver.set_phase('test');
14 | num = length(list);
15 | clear featmap_trans
16 | for i = 1 : num
17 | ti = tic();
18 | drawnow;
19 | fprintf('Step 2 rsa: %d/%d...', i, num);
20 | scale{i} = unique(min(scale{i},5));
21 | if numel(find(scale{i}<=5)) > 1
22 | featmap_t = featmap{i};
23 | scale_t = scale{i};
24 | featmap_trans{i} = gen_trans_featmap( featmap_t, caffe_solver, scale_t );
25 | else
26 | featmap_trans{i} = featmap(i);
27 | end
28 | toc(ti);
29 | end
30 |
31 | % GOTO Step3: featmap 2 result -- script_featmap_2_result.m
32 |
--------------------------------------------------------------------------------
/predict/script_gen_featmap.m:
--------------------------------------------------------------------------------
1 |
2 | clear parsed scale_t featmap;
3 | addpath(fullfile(pwd, 'utils'));
4 | %% load param (please do not modify)
5 | param.net_def_dir = fullfile(pwd, 'model/res_pool2');
6 | param.init_model = fullfile(pwd, 'output/ResNet_3b_s16/tot_wometa_1epoch');
7 | param.solverfile = 'solver_test.prototxt';
8 |
9 | %% load list
10 |
11 | %% active mex, no need if the path has been set
12 | if false
13 | addpath(pwd);
14 | t = pwd;
15 | cd(fullfile( '+caffe', 'private'));
16 | caffe.init_log(fullfile(param.caffe_log_dir, 'caffe'));
17 | cd(t);
18 | end
19 | clear t parsed;
20 |
21 | %% init caffe solver
22 | caffe.reset_all;
23 | caffe_solver = caffe.get_solver(fullfile(param.net_def_dir, param.solverfile), param.gpu_id);
24 | if ~isempty(param.init_model)
25 | assert(exist(param.init_model)==2, 'Cannot find caffemodel.');
26 | caffe_solver.use_caffemodel(param.init_model);
27 | end
28 | caffe_solver.set_phase('test');
29 | num = length(list);
30 | if ~exist('scale', 'var')
31 | scale = cellfun(@(x) 1:5, list, 'uni', 0);
32 | end
33 | test_num = ceil(num/length(param.gpu_id));
34 | for i = 1 : test_num
35 | ti = tic();
36 | drawnow;
37 | fprintf('Step 1 fcn: %d/%d...', i, test_num);
38 | now_id = mod((i-1)*length(param.gpu_id):i*length(param.gpu_id)-1, num) + 1;
39 | list_t = list(now_id);
40 | for j = 1 : length(now_id)
41 | scale_t(j) = min(max(scale{now_id(j)})-5, 0);
42 | end
43 | featmap(now_id) = gen_featmap( list_t, caffe_solver, param, scale_t );
44 | toc(ti);
45 | end
46 | % GOTO Step2: featmap 2 featmap -- script_featmap_transfer.m
47 |
--------------------------------------------------------------------------------
/predict/script_start.m:
--------------------------------------------------------------------------------
1 | %% Step0: compile CaffeMex_v2 and link matlab/+caffe to ./+caffe
2 | %% Step1: load your list in matlab workspace which should be an N-by-1 cell
3 | %% Step2: modify your configuration in "customize configuration"
4 | %% Step3: cd utils and mex nms_mex.cpp
5 | %% Step4: run this script
6 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
7 |
8 | if false % Set to true if you have FDDB dataset
9 | % Here we use some imgs in FDDB for an example
10 | list = textread('G:/temp/face/detection/FDDB/list.txt', '%s');
11 | list = list(1:64);
12 | load scale_fddb;
13 | scale = fddb_cls;
14 | else
15 | % Test imgs in full scales before we provid code for SFN.
16 | list = {'testimg1.jpg', 'testimg2.jpg', 'testimg3.jpg', 'testimg4.jpg'};
17 | clear scale
18 | end
19 |
20 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
21 | %% customize configuration
22 | load(fullfile(pwd, 'output', 'ResNet_3b_s16', 'param.mat'));
23 | param.test_root = ''; % Prefix of your list.;
24 | param.caffe_log_dir = fullfile(pwd, 'log/');
25 | param.max_img =2048;
26 | param.target_ratio = -4:0; % Pyramid layers, best leave it as default (-4:0)
27 | param.det_thresh = 5;
28 | param.plot_thresh = 7;
29 | param.gpu_id = 0:1; % For multiple gpus, just set it as a array. E.g. 0:7.
30 |
31 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
32 | %% run rpn+rsa
33 | caffe.init_log('log/');
34 | script_gen_featmap; % GPU runtime: 5ms per pic on Titan Xp @2048px
35 | script_featmap_transfer; % GPU runtime: 0.3ms per pic on Titan Xp
36 | script_featmap_2_result; % GPU runtime: 3.2ms per pic on Titan Xp
37 | % Detection result (bbox+landmarks) will be storaged in variable `parsed'
38 |
39 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
40 | %% post processing (NMS, set threshold, and show the result)
41 | threshold = 8;
42 | nms_ratio = 0.6;
43 | p = nms_parsed(parsed, threshold, nms_ratio);
44 | plot_parsed(p, threshold, '', list)
45 |
--------------------------------------------------------------------------------
/predict/testimg1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liuyuisanai/RSA-for-object-detection/626ad81172b260ecf8257a80731e5236fe41cb63/predict/testimg1.jpg
--------------------------------------------------------------------------------
/predict/testimg2.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liuyuisanai/RSA-for-object-detection/626ad81172b260ecf8257a80731e5236fe41cb63/predict/testimg2.jpg
--------------------------------------------------------------------------------
/predict/testimg3.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liuyuisanai/RSA-for-object-detection/626ad81172b260ecf8257a80731e5236fe41cb63/predict/testimg3.jpg
--------------------------------------------------------------------------------
/predict/testimg4.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/liuyuisanai/RSA-for-object-detection/626ad81172b260ecf8257a80731e5236fe41cb63/predict/testimg4.jpg
--------------------------------------------------------------------------------
/predict/utils/configure.m:
--------------------------------------------------------------------------------
1 | %% load data annotation
2 | param.anno_pos = {'annotation/my_pos', 'annotation/renren_pos', 'annotation/newrenren_pos', 'annotation/imdb1_pos.mat', 'annotation/meta_pos.mat'};
3 | param.anno_neg = {'annotation/my_neg', 'annotation/renren_neg', 'annotation/newrenren_neg'};
4 | param.dataset_root = 'G:\temp\face\detection';
5 |
6 |
7 | %% load net
8 | param.model_name = 'ResNet_3b_s16_fm2fm_feat_deep';
9 | param.solverfile = 'solver_train.prototxt';
10 | param.netnolossfile = 'net_noloss.prototxt';
11 | assert(~isempty(param.netnolossfile));
12 | param.net_def_dir = fullfile(pwd, 'model', 'rpn_prototxts', param.model_name);
13 | param.output_dir = fullfile(pwd, 'output', param.model_name);
14 |
15 | %% configure training setting
16 | param.gpu_id = 0:3;
17 | param.pos_gpu = 0:3;
18 | param.neg_gpu = 0;
19 | param.batch_size_per_gpu = 1;
20 | % param.anchor_is_stride_center = true; % always true
21 | param.speedup = 1;
22 | param.max_img = 1200;
23 | param.max_pos_img = 1200;
24 | param.min_img = 32;
25 | param.max_target = 1024;
26 | param.min_target = 32;
27 | param.val_num = 128;
28 | param.fast_valid = 1;
29 | param.test_interval = 10000;
30 | param.snapshot_interval = 10000;
31 | param.display_interval = 500;
32 | param.max_rand_offset = 5;
33 | param.reg_points_diff = true;
34 | param.anchor_scale = 1;
35 | param.multishape_anchor = false;
36 | param.pos_overlap_ratio = 0.7;
37 | param.neg_overlap_ratio = 0.5;
38 | param.gray_augment_ratio = 0.4;
39 | param.validation_ratio = 0.01;
40 | param.ignore_min_roi = 5;
41 |
42 | %% configure init setting
43 | %param.init_model = 'model/pre_trained_models/ResNet-101L/ResNet-101-model.caffemodel';
44 | param.init_model = 'weight_share_feat_new_68w';
45 | param.out_dir = fullfile(pwd, 'output', param.model_name);
46 | param.caffe_log_dir = fullfile(param.out_dir, 'caffe_log/');
--------------------------------------------------------------------------------
/predict/utils/convert_caffe2img.m:
--------------------------------------------------------------------------------
1 | function out = convert_caffe2img( out )
2 | assert(length(size(out)) <= 4, 'Only support at most 4-D data for convert.');
3 | out = single(permute(out(:,:,end:-1:1,:), [2 1 3 4]));
4 | end
5 |
--------------------------------------------------------------------------------
/predict/utils/convert_caffe2mat.m:
--------------------------------------------------------------------------------
1 | function out = convert_caffe2mat( out )
2 | assert(length(size(out)) <= 4, 'Only support at most 4-D data for convert.');
3 | out = single(permute(out, [2 1 3 4]));
4 | end
5 |
6 |
--------------------------------------------------------------------------------
/predict/utils/convert_ignore.m:
--------------------------------------------------------------------------------
1 | function rect = convert_ignore( reccell )
2 | rect = zeros(0, 4, 'int16');
3 | for i = 1 : length(reccell)
4 | rect(i, :) = round([reccell{i}.x, reccell{i}.y, reccell{i}.w, reccell{i}.h]);
5 | end
6 |
7 | end
8 |
9 |
--------------------------------------------------------------------------------
/predict/utils/convert_img2caffe.m:
--------------------------------------------------------------------------------
1 | function out = convert_img2caffe( out )
2 | assert(length(size(out)) <= 4, 'Only support at most 4-D data for convert.');
3 | out = single(permute(out(:,:,end:-1:1,:), [2 1 3 4]));
4 | end
5 |
--------------------------------------------------------------------------------
/predict/utils/convert_input_to_struct.m:
--------------------------------------------------------------------------------
1 | function parsed = convert_input_to_struct(input_data, param, thresh_cls)
2 | for gpu_id = 1:length(input_data)
3 | cls = convert_caffe2mat(input_data{gpu_id}{2});
4 | box = convert_caffe2mat(input_data{gpu_id}{3});
5 | pts = convert_caffe2mat(input_data{gpu_id}{4});
6 | pts_valid = convert_caffe2mat(input_data{gpu_id}{6});
7 | fmwidth = size(cls, 2);
8 | fmheight = size(cls, 1);
9 | % reshape to [h w anchor]
10 | cls = reshape(cls, fmheight, fmwidth, param.anchor_scale, 1);
11 | box = reshape(box, fmheight, fmwidth, 4, param.anchor_scale);
12 | pts = reshape(pts, fmheight, fmwidth, length(param.anchor_point), param.anchor_scale);
13 | pts_valid = reshape(pts_valid, fmheight, fmwidth, length(param.anchor_point)/2 , param.anchor_scale);
14 | for scale = 1 : param.anchor_scale
15 | anchor_box_len = [param.anchor_box(scale,3) - param.anchor_box(scale,1), param.anchor_box(scale,4) - param.anchor_box(scale,2)];
16 | [validy, validx] = find(cls(:,:,scale) >= thresh_cls);
17 | score_t{scale} = [];
18 | target_box_t{scale} = [];
19 | target_pts_t{scale} = [];
20 | pts_score_t{scale} = [];
21 | for valid_id = 1 : length(validy)
22 | score_t{scale}(valid_id) = cls(validy(valid_id),validx(valid_id),scale);
23 | anchor_center_now = [(validx(valid_id)-1)*param.stride (validy(valid_id)-1)*param.stride] + param.anchor_center;
24 | % for box
25 | box_delta = reshape(box(validy(valid_id), validx(valid_id), :, scale), 1, []);
26 | target_box_center = anchor_center_now + box_delta(1:2) .* anchor_box_len;
27 | target_box_length = anchor_box_len .* box_delta(3:4);
28 | target_box = [ 1, 0, -0.5, 0;
29 | 0, 1, 0, -0.5;
30 | 1, 0, 0.5, 0;
31 | 0, 1, 0, 0.5 ] * [target_box_center, target_box_length]';
32 | target_box_t{scale}(valid_id,:) = target_box';
33 | % for point
34 | pts_score = reshape(pts_valid(validy(valid_id), validx(valid_id), :, scale) , 1, []);
35 | pts_delta = reshape(pts(validy(valid_id), validx(valid_id), :, scale), 1, []);
36 | anchor_point_now = (param.anchor_point) * max(anchor_box_len) + reshape(repmat(anchor_center_now, [length(param.anchor_point)/2 1])', 1, []);
37 | target_pts = pts_delta * max(anchor_box_len) + anchor_point_now;
38 | target_pts_t{scale}(valid_id,:) = target_pts;
39 | pts_score_t{scale}(valid_id,:) = pts_score;
40 | end
41 | end
42 | parsed(gpu_id).img = convert_caffe2img(input_data{gpu_id}{1}+127)/255;
43 | parsed(gpu_id).active = cls;
44 | parsed(gpu_id).cls_score = cell2mat(score_t)';
45 | parsed(gpu_id).box = cell2mat(target_box_t');
46 | parsed(gpu_id).point = cell2mat(target_pts_t');
47 | parsed(gpu_id).point_score = cell2mat(pts_score_t');
48 | end
49 | end
--------------------------------------------------------------------------------
/predict/utils/convert_mat2caffe.m:
--------------------------------------------------------------------------------
1 | function out = convert_mat2caffe( out )
2 | assert(length(size(out)) <= 4, 'Only support at most 4-D data for convert.');
3 | out = single(permute(out, [2 1 3 4]));
4 | end
5 |
6 |
--------------------------------------------------------------------------------
/predict/utils/convert_output_to_struct.m:
--------------------------------------------------------------------------------
1 | function parsed = convert_output_to_struct( param, handle, issolver, thresh_cls)
2 |
3 | factor = 1;
4 | if param.multishape_anchor
5 | factor = 3;
6 | end
7 | if nargin < 4
8 | thresh_cls = 0;
9 | end
10 | if issolver
11 | handle = handle.nets;
12 | else
13 | handle = {handle};
14 | end
15 | parsed(length(handle)) = struct(...
16 | 'cls_score', zeros(0, 1),...
17 | 'box', zeros(0, 4),...
18 | 'active', []);
19 | for gpu_id = 1 : length(handle)
20 |
21 | %fetch output
22 | cls = convert_caffe2mat(handle{gpu_id}.blob_vec(handle{gpu_id}.name2blob_index('rpn_cls')).get_data());
23 | pts = convert_caffe2mat(handle{gpu_id}.blob_vec(handle{gpu_id}.name2blob_index('rpn_reg')).get_data());
24 | %pts = convert_caffe2mat(handle{gpu_id}.blob_vec(handle{gpu_id}.name2blob_index('rpn_pts')).get_data());
25 | %pts_valid = convert_caffe2mat(handle{gpu_id}.blob_vec(handle{gpu_id}.name2blob_index('rpn_pts_valid')).get_data());
26 | fmwidth = size(cls, 2);
27 | fmheight = size(cls, 1);
28 | % reshape to [h w anchor]
29 | cls = reshape(cls, fmheight, fmwidth, param.anchor_scale * factor, 1);
30 | pts = reshape(pts, fmheight, fmwidth, 10, param.anchor_scale * factor);
31 | pts_out = {};
32 | %pts = reshape(pts, fmheight, fmwidth, length(param.anchor_point), param.anchor_scale);
33 | %pts_valid = reshape(pts_valid, fmheight, fmwidth, length(param.anchor_point)/2 , param.anchor_scale);
34 | % for each anchor scale
35 | for anchor_id = 1 : param.anchor_scale * factor
36 | anchor_box_len = [param.anchor_box(anchor_id,3) - param.anchor_box(anchor_id,1), param.anchor_box(anchor_id,4) - param.anchor_box(anchor_id,2)];
37 | [validy, validx] = find(cls(:,:,anchor_id) >= thresh_cls);
38 | score_t{anchor_id} = [];
39 | target_box_t{anchor_id} = [];
40 | target_pts_t{anchor_id} = [];
41 | score_t{anchor_id} = diag(cls(validy,validx,anchor_id));
42 | for valid_id = 1 : length(validy)
43 | anchor_center_now = [(validx(valid_id)-1)*param.stride (validy(valid_id)-1)*param.stride] + param.anchor_center;
44 | % for box
45 | % box_delta = reshape(box(validy(valid_id), validx(valid_id), :, anchor_id), 1, []);
46 | % target_box_center = anchor_center_now + box_delta(1:2) .* anchor_box_len;
47 | % target_box_length = anchor_box_len .* box_delta(3:4);
48 | % target_box = [ 1, 0, -0.5, 0;
49 | % 0, 1, 0, -0.5;
50 | % 1, 0, 0.5, 0;
51 | % 0, 1, 0, 0.5 ] * [target_box_center, target_box_length]';
52 | % target_box_t{anchor_id}(valid_id,:) = target_box';
53 | % for pts
54 | anchor_points_now = param.anchor_pts * anchor_box_len(1)...
55 | + repmat(anchor_center_now, [1 5]);
56 | pts_delta = reshape(pts(validy(valid_id), validx(valid_id), :, anchor_id), 1, []) * anchor_box_len(1);
57 | pts_out{anchor_id, 1}(valid_id,:) = pts_delta + anchor_points_now;
58 | end
59 | end
60 | parsed(gpu_id).active = cls;
61 | parsed(gpu_id).cls_score = cell2mat(score_t');
62 | parsed(gpu_id).point = cell2mat(pts_out);
63 | parsed(gpu_id).box = get_rect_from_pts(parsed(gpu_id).point);
64 | %parsed(gpu_id).point = cell2mat(target_pts_t');
65 | % target_pts = pts_new(match_id(anchor_id),:);
66 | % anchor_point_now = (ptsmean-0.5) * (anchor_box_now(anchor_id, 3) - anchor_box_now(anchor_id, 1)) + anchor_center_now;
67 | % ptsdiff = (target_pts - anchor_point_now) / (anchor_box_now(anchor_id, 3) - anchor_box_now(anchor_id, 1));
68 |
69 | end
70 | end
71 |
72 |
--------------------------------------------------------------------------------
/predict/utils/convert_pts.m:
--------------------------------------------------------------------------------
1 | function pts = convert_pts( ptscell, ptsmap )
2 | pts = zeros(0, 42, 'int16');
3 | for i = 1 : length(ptscell)
4 | pf = fieldnames(ptscell{i});
5 | assert(length(pf) == 21);
6 | for j = 1 : 21
7 | field = pf{j};
8 | idx = ptsmap(field);
9 | pts(i, idx*2-1:idx*2) = round(getfield(ptscell{i}, field));
10 | end
11 | end
12 |
13 | end
14 |
--------------------------------------------------------------------------------
/predict/utils/convert_rect.m:
--------------------------------------------------------------------------------
1 | function rect = convert_rect( reccell )
2 | rect = zeros(0, 4, 'int16');
3 | for i = 1 : length(reccell)
4 | rect(i, :) = round([reccell{i}.x, reccell{i}.y, reccell{i}.w, reccell{i}.h]);
5 | end
6 |
7 | end
8 |
9 |
--------------------------------------------------------------------------------
/predict/utils/cpu_cores.m:
--------------------------------------------------------------------------------
1 | function num = cpu_cores()
2 | num = feature('numcores');
3 | end
--------------------------------------------------------------------------------
/predict/utils/detect_all.m:
--------------------------------------------------------------------------------
1 | function parsed_all = detect_all( list, param, caffe_solver, scale, thres, display, cutedge )
2 | num = length(list);
3 | if nargin < 6
4 | display = 0;
5 | end
6 | if ~exist('cutedge', 'var')
7 | cutedge = 1;
8 | end
9 | test_time = ceil(num / length(param.gpu_id));
10 | for i = 1 : test_time
11 | if display
12 | tic;
13 | fprintf('Testing: %d/%d...', i, test_time);
14 | end
15 | now_id = mod((i-1)*length(param.gpu_id):i*length(param.gpu_id)-1, num)+1;
16 | for id = 1 : length(param.gpu_id)
17 | if ischar(list{i})
18 | img_t{id} = cv.imread(fullfile(param.dataset_root, list{now_id(id)}));
19 | if size(img_t{id}, 3) < 3
20 | img_t{id} = img_t{id}(:,:,[1 1 1]);
21 | end
22 | else
23 | img_t = list(now_id(id));
24 | end
25 | end
26 | parsed = [];
27 | for s = scale
28 | target_now = param.max_img * 2^s;
29 | if target_now > param.max_img
30 | for j = 1 : length(param.gpu_id)
31 | [ img_{j}, scale_now(j) ] = scale_img_to_target( img_t{j}, target_now ); % scale = now / pre
32 | end
33 | parsed_t = detect_large_img(img_, caffe_solver, param, thres);
34 | else
35 | for j = 1 : length(param.gpu_id)
36 | [ img, scale_now(j) ] = scale_img_to_target( img_t{j}, target_now ); % scale = now / pre
37 | input{j}{1} = single(convert_img2caffe(img)) - 127.0;
38 | end
39 | caffe_solver.reshape_as_input(input);
40 | caffe_solver.set_input_data(input);
41 | caffe_solver.forward_prefilled();
42 | parsed_t = convert_output_to_struct( param, caffe_solver, 1, thres);
43 | end
44 | for j = 1 : length(parsed_t)
45 | parsed_t(j).box = round(parsed_t(j).box / scale_now(j));
46 | parsed_t(j).point = round(parsed_t(j).point / scale_now(j));
47 | if cutedge && ~isempty(parsed_t(j).box)
48 | parsed_t(j).box(:,[1, 2]) = max(1, parsed_t(j).box(:,[1, 2]));
49 | parsed_t(j).box(:,3) = min(parsed_t(j).box(:, 3), size(img_t{j}, 2));
50 | parsed_t(j).box(:,4) = min(parsed_t(j).box(:, 4), size(img_t{j}, 1));
51 | end
52 | parsed_t(j).box(:, end+1) = parsed_t(j).cls_score;
53 | end
54 | if s == scale(1)
55 | parsed = parsed_t;
56 | else
57 | for j = 1 : length(param.gpu_id)
58 | parsed(j).cls_score = cat(1, parsed(j).cls_score, parsed_t(j).cls_score);
59 | %parsed(j).active = cat(1, parsed(j).cls_score, parsed_t.cls_score);
60 | parsed(j).box = cat(1, parsed(j).box, parsed_t(j).box);
61 | parsed(j).point = cat(1, parsed(j).point, parsed_t(j).point);
62 | end
63 | end
64 | end
65 | parsed_all(now_id) = parsed;
66 | if display
67 | toc;
68 | end
69 | end
70 | end
71 |
72 |
--------------------------------------------------------------------------------
/predict/utils/detect_all_by_featmap.m:
--------------------------------------------------------------------------------
1 | function parsed_all = detect_all_by_featmap( featmap, scale, param, caffe_solver, thres )
2 | scale = 2.^(scale(end:-1:1)-5);
3 | parsed = [];
4 | for s = 1:numel(featmap)
5 | caffe_solver.reshape_as_input({{featmap{s}}});
6 | caffe_solver.set_input_data({{featmap{s}}});
7 | caffe_solver.forward_prefilled();
8 | parsed_t = convert_output_to_struct( param, caffe_solver, 1, thres);
9 | for j = 1 : length(parsed_t)
10 | parsed_t(j).box = round(parsed_t(j).box / scale(s));
11 | parsed_t(j).point = round(parsed_t(j).point / scale(s));
12 | parsed_t(j).box(:, end+1) = parsed_t(j).cls_score;
13 | end
14 | if s == 1
15 | parsed = parsed_t;
16 | else
17 | for j = 1 : length(param.gpu_id)
18 | parsed(j).cls_score = cat(1, parsed(j).cls_score, parsed_t(j).cls_score);
19 | %parsed(j).active = cat(1, parsed(j).cls_score, parsed_t.cls_score);
20 | parsed(j).box = cat(1, parsed(j).box, parsed_t(j).box);
21 | parsed(j).point = cat(1, parsed(j).point, parsed_t(j).point);
22 | end
23 | end
24 | end
25 | parsed_all = parsed;
26 | end
27 |
28 |
--------------------------------------------------------------------------------
/predict/utils/detect_img.m:
--------------------------------------------------------------------------------
1 | function [scores, rects, pts, pts_valid] = detect_img( param, caffe_solver, thres )
2 | num = length(list);
3 | test_time = ceil(num / length(param.gpu_id));
4 | for i = 1 : test_time
5 | target_scale_now = param.max_img *
6 | for j = 1 : length(param.gpu_id)
7 | [ img, scale ] = scale_img_to_target( img, target )
8 | end
9 | end
10 |
11 |
12 | end
13 |
14 |
--------------------------------------------------------------------------------
/predict/utils/detect_large_img.m:
--------------------------------------------------------------------------------
1 | function parsed = detect_large_img(img_t, caffe_solver, param, thres)
2 | grid = param.max_img;
3 | assert(length(img_t) == length(param.gpu_id));
4 | max_x = 0;
5 | max_y = 0;
6 | for j = 1 : length(param.gpu_id)
7 | max_x = max(max_x, size(img_t{j}, 2));
8 | max_y = max(max_y, size(img_t{j}, 1));
9 | end
10 | % pad = ceil(param.min_target * max(max_x, max_y) / param.max_img);
11 | pad = param.max_target;
12 | grid_x = ceil(max_x / (grid - pad));
13 | grid_y = ceil(max_y / (grid - pad));
14 | for j = 1 : length(param.gpu_id)
15 | img = img_t{j};
16 | s = size(img);
17 | g_l = ceil(s(1:2)./[grid_y grid_x]);
18 | x_start{j} = 1:g_l(2):s(2);
19 | x_end{j} = min(x_start{j} + g_l(2) + pad, size(img, 2));
20 | y_start{j} = 1:g_l(1):s(1);
21 | y_end{j} = min(y_start{j} + g_l(1) + pad, size(img, 1));
22 | end
23 | for x = 1 : grid_x
24 | for y = 1 : grid_y
25 | for j = 1 : length(param.gpu_id)
26 | if length(y_start{j}) < y || length(x_start{j}) < x
27 | input{j}{1} = zeros(50,50,3,'single');
28 | continue;
29 | end
30 | img = img_t{j}(y_start{j}(y):y_end{j}(y), x_start{j}(x):x_end{j}(x),:);
31 | input{j}{1} = single(convert_img2caffe(img)) - 127.0;
32 | end
33 | caffe_solver.reshape_as_input(input);
34 | caffe_solver.set_input_data(input);
35 | caffe_solver.forward_prefilled();
36 | parsed_t = convert_output_to_struct( param, caffe_solver, 1, thres);
37 | for j = 1 : length(parsed_t)
38 | if isempty(parsed_t(j).cls_score)
39 | continue;
40 | end
41 | parsed_t(j).box(:,[1 3]) = round(parsed_t(j).box(:,[1 3]) + x_start{j}(x));
42 | parsed_t(j).box(:,[2 4]) = round(parsed_t(j).box(:,[2 4]) + y_start{j}(y));
43 | parsed_t(j).point(:,1:2:end) = round(parsed_t(j).point(:,1:2:end) + x_start{j}(x));
44 | parsed_t(j).point(:,2:2:end) = round(parsed_t(j).point(:,2:2:end) + y_start{j}(y));
45 | end
46 | if ~exist('parsed', 'var')
47 | parsed = parsed_t;
48 | else
49 | for j = 1 : length(param.gpu_id)
50 | parsed(j).cls_score = cat(1, parsed(j).cls_score, parsed_t(j).cls_score);
51 | parsed(j).point = cat(1, parsed(j).point, parsed_t(j).point);
52 | parsed(j).box = cat(1, parsed(j).box, parsed_t(j).box);
53 | end
54 | end
55 | end
56 | end
57 | end
--------------------------------------------------------------------------------
/predict/utils/draw_missed.m:
--------------------------------------------------------------------------------
1 | for i = 1 : length(missed)
2 | if ~isempty(missed{i})
3 | I = imread(fullfile(param.dataset_root, val2db.list{i}));
4 | hold on
5 | r = missed{i};
6 | r=[r(:,[1,2]) r(:,3)-r(:,1) r(:,4)-r(:,2)];
7 | I = insertShape(I, 'rectangle', r);
8 | imshow(I)
9 | waitforbuttonpress;
10 | end
11 | end
--------------------------------------------------------------------------------
/predict/utils/dump_error.m:
--------------------------------------------------------------------------------
1 | function dump_error(list, errimg, errdet, misimg, misdet, gt, root)
2 | % dump_error(list, error_img, err_detail, miss_image, miss_detail, 'error_dump/')
3 | if nargin < 7
4 | dump = false;
5 | else
6 | dump = true;
7 | end
8 | all_neg = errimg; %union(errimg, misimg);
9 | for i = 1 : length(all_neg)
10 | id = all_neg(i);
11 | fprintf('%d...\n', id);
12 | img = imread(list{id});
13 | gtt = gt{i};
14 | gtt = [gtt(:,[1 2]) gtt(:,3)-gtt(:,1) gtt(:,4)-gtt(:,2)];
15 | img = insertShape(img, 'rectangle', gtt, 'Color', 'yellow', 'LineWidth', 2);
16 | if(any(errimg==id))
17 | err = errdet{id};
18 | err = [err(:,[1 2]) err(:,3)-err(:,1) err(:,4)-err(:,2)];
19 | img = insertShape(img, 'rectangle', err, 'Color', 'yellow', 'LineWidth', 2);
20 | end
21 | if(any(misimg==id))
22 | err = misdet{id};
23 | err = [err(:,[1 2]) err(:,3)-err(:,1) err(:,4)-err(:,2)];
24 | img = insertShape(img, 'rectangle', err, 'Color', 'red', 'LineWidth', 2);
25 | end
26 | hold off;
27 | imshow(img);
28 | if dump
29 | imwrite(img, fullfile(root, [num2str(id) '.jpg']));
30 | end
31 | waitforbuttonpress;
32 | end
33 |
34 | end
--------------------------------------------------------------------------------
/predict/utils/fetch_output.m:
--------------------------------------------------------------------------------
1 | function output = fetch_output(handle)
2 | if isa(handle, 'caffe.Net')
3 | output = fetch_net_output(handle);
4 | elseif isa(handle, 'caffe.Solver')
5 | cnt = zeros(20,1);
6 | for i = 1 : length(handle.nets)
7 | output_t = fetch_net_output(handle.nets{i});
8 | if i == 1
9 | output = output_t;
10 | for j = 1 : length(output_t)
11 | if output_t(j).value > 0
12 | cnt(j)=cnt(j)+1;
13 | end
14 | end
15 | else
16 | for j = 1 : length(output_t)
17 | output(j).value = output(j).value + output_t(j).value;
18 | if output_t(j).value > 0
19 | cnt(j)=cnt(j)+1;
20 | end
21 | end
22 | end
23 | end
24 | for j = 1 : length(output)
25 | output(j).value = output(j).value / cnt(j);
26 | end
27 | else
28 | error('only accept net or solver handle');
29 | end
30 | end
31 |
32 | function output = fetch_net_output(handle)
33 | outputs = handle.outputs;
34 | for i = 1 : length(outputs)
35 | output(i).name = outputs{i};
36 | output(i).value = handle.blob_vec(handle.name2blob_index(output(i).name)).get_data();
37 | end
38 | end
--------------------------------------------------------------------------------
/predict/utils/gen_featmap.m:
--------------------------------------------------------------------------------
1 | function featmaps = gen_featmap( list, caffe_solver, param, scale )
2 | num = length(list);
3 | featmaps = cell(num,1);
4 | test_time = ceil(num / length(param.gpu_id));
5 | for i = 1 : test_time
6 | now_id = mod((i-1)*length(param.gpu_id):i*length(param.gpu_id)-1, num)+1;
7 | for id = 1 : length(param.gpu_id)
8 | if ischar(list{i})
9 | img_t = imread(fullfile(param.test_root, list{now_id(id)}));
10 | if ~iscell(img_t)
11 | if size(img_t, 3) < 3
12 | img_t = img_t(:,:,[1 1 1]);
13 | end
14 | else
15 | img_t = img_t{id}(:,:,[1 1 1]);
16 | end
17 | else
18 | img_t = list(now_id(id));
19 | end
20 | input{id}{1} = single(convert_img2caffe(imresize(img_t, param.max_img/max(size(img_t))*2^scale(now_id(id))))) - 127.0;
21 | end
22 | caffe_solver.reshape_as_input(input);
23 | caffe_solver.set_input_data(input);
24 | caffe_solver.forward_prefilled();
25 | o = caffe_solver.get_output();
26 | for j = 1 : length(o)
27 | featmaps{now_id(j)} = o{j}.data;
28 | end
29 | end
30 | end
31 |
32 |
--------------------------------------------------------------------------------
/predict/utils/gen_trans_featmap.m:
--------------------------------------------------------------------------------
1 | function featmap_trans = gen_trans_featmap( featmap_t, caffe_solver, scale )
2 | scale = unique(min(scale, 5));
3 | ori_scale = max(scale);
4 | featmap_trans{1} = featmap_t;
5 | test_time = numel(find(scale
11 | #include
12 |
13 | #define DIVUP(m,n) ((m)/(n)+((m)%(n)>0))
14 | int const threadsPerBlock = (sizeof(unsigned long long) * 8);
15 |
16 | /*
17 | * Device code
18 | */
19 | __device__ inline float devIoU(float const * const a, float const * const b)
20 | {
21 | float left = max(a[0], b[0]), right = min(a[2], b[2]);
22 | float top = max(a[1], b[1]), bottom = min(a[3], b[3]);
23 | float width = max(right - left + 1, 0.f), height = max(bottom - top + 1, 0.f);
24 | float interS = width * height;
25 | float Sa = (a[2] - a[0] + 1) * (a[3] - a[1] + 1);
26 | float Sb = (b[2] - b[0] + 1) * (b[3] - b[1] + 1);
27 | return interS / (Sa + Sb - interS);
28 | }
29 |
30 | __global__ void nms_kernel(const int n_boxes, const float nms_overlap_thres, const float *dev_boxes, unsigned long long *dev_mask)
31 | {
32 | const int row_start = blockIdx.y, col_start = blockIdx.x;
33 | const int row_size = min(n_boxes - row_start * threadsPerBlock, threadsPerBlock), col_size = min(n_boxes - col_start * threadsPerBlock, threadsPerBlock);
34 |
35 | //if (row_start > col_start) return;
36 |
37 | __shared__ float block_boxes[threadsPerBlock * 5];
38 | if (threadIdx.x < col_size)
39 | {
40 | block_boxes[threadIdx.x * 5 + 0] = dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 0];
41 | block_boxes[threadIdx.x * 5 + 1] = dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 1];
42 | block_boxes[threadIdx.x * 5 + 2] = dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 2];
43 | block_boxes[threadIdx.x * 5 + 3] = dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 3];
44 | block_boxes[threadIdx.x * 5 + 4] = dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 4];
45 | }
46 | __syncthreads();
47 |
48 | if (threadIdx.x < row_size)
49 | {
50 | const int cur_box_idx = threadsPerBlock * row_start + threadIdx.x;
51 | const float *cur_box = dev_boxes + cur_box_idx * 5;
52 | int i = 0;
53 | unsigned long long t = 0;
54 | int start = 0;
55 | if (row_start == col_start) start = threadIdx.x + 1;
56 | for (i = start; i < col_size; i++)
57 | {
58 | if (devIoU(cur_box, block_boxes + i * 5) > nms_overlap_thres)
59 | {
60 | t |= 1ULL << i;
61 | }
62 | }
63 | const int col_blocks = DIVUP(n_boxes, threadsPerBlock);
64 | dev_mask[cur_box_idx * col_blocks + col_start] = t;
65 | }
66 | }
67 |
68 | /*
69 | * Host code
70 | */
71 | void mexFunction(int nlhs, mxArray *plhs[],
72 | int nrhs, const mxArray *prhs[])
73 | {
74 |
75 | /* Declare all variables.*/
76 | mxArray const *boxes, *ov_thres;
77 | float *boxes_host = NULL;
78 | float *boxes_dev = NULL;
79 | unsigned long long *mask_dev = NULL;
80 |
81 | /* Throw an error if the input is not a array. */
82 | if (nrhs != 2) {
83 | mexErrMsgTxt("nms_gpu_mex::need 2 inputs");
84 | }
85 |
86 | boxes = prhs[0];
87 | if (mxGetClassID(boxes) != mxSINGLE_CLASS) {
88 | mexErrMsgTxt("nms_gpu_mex::input boxes must be single");
89 | }
90 |
91 | ov_thres = prhs[1];
92 | if (mxGetClassID(ov_thres) != mxDOUBLE_CLASS) {
93 | mexErrMsgTxt("nms_gpu_mex::input boxes must be double");
94 | }
95 |
96 | float nms_overlap_thres = (float)mxGetScalar(ov_thres);
97 |
98 | int boxes_dim = mxGetM(boxes);
99 | int boxes_num = mxGetN(boxes);
100 | if (boxes_dim != 5)
101 | {
102 | mexErrMsgTxt("nms_gpu_mex::input boxes's row must be 5");
103 | }
104 |
105 | boxes_host = (float *)(mxGetPr(boxes));
106 | const int col_blocks = DIVUP(boxes_num, threadsPerBlock);
107 |
108 | cudaMalloc(&boxes_dev, mxGetNumberOfElements(boxes) * sizeof(float));
109 | cudaMemcpy(boxes_dev, boxes_host, mxGetNumberOfElements(boxes) * sizeof(float), cudaMemcpyHostToDevice);
110 |
111 | /* Create a GPUArray to hold the result and get its underlying pointer. */
112 | cudaMalloc(&mask_dev, boxes_num * col_blocks * sizeof(unsigned long long));
113 |
114 |
115 | /*
116 | * Call the kernel using the CUDA runtime API. We are using a 1-d grid here,
117 | * and it would be possible for the number of elements to be too large for
118 | * the grid. For this example we are not guarding against this possibility.
119 | */
120 |
121 | dim3 blocks(DIVUP(boxes_num, threadsPerBlock), DIVUP(boxes_num, threadsPerBlock));
122 | dim3 threads(threadsPerBlock);
123 | nms_kernel << > >(boxes_num, nms_overlap_thres, boxes_dev, mask_dev);
124 |
125 | std::vector mask_host(boxes_num * col_blocks);
126 | cudaMemcpy(&mask_host[0], mask_dev, sizeof(unsigned long long) * boxes_num * col_blocks, cudaMemcpyDeviceToHost);
127 |
128 | std::vector remv(col_blocks);
129 | memset(&remv[0], 0, sizeof(unsigned long long) * col_blocks);
130 |
131 | std::vector keep;
132 | keep.reserve(boxes_num);
133 | for (int i = 0; i < boxes_num; i++)
134 | {
135 | int nblock = i / threadsPerBlock;
136 | int inblock = i % threadsPerBlock;
137 |
138 | if (!(remv[nblock] & (1ULL << inblock)))
139 | {
140 | keep.push_back(i + 1); // to matlab's index
141 |
142 | unsigned long long *p = &mask_host[0] + i * col_blocks;
143 | for (int j = nblock; j < col_blocks; j++)
144 | {
145 | remv[j] |= p[j];
146 | }
147 | }
148 | }
149 |
150 | /* Wrap the result up as a MATLAB cpuArray for return. */
151 | mwSize dims[4] = { (int)keep.size(), 1, 1, 1 };
152 | plhs[0] = mxCreateNumericArray(4, dims, mxINT32_CLASS, mxREAL);
153 |
154 | int *output = (int *)(mxGetPr(plhs[0]));
155 | memcpy(output, &keep[0], (int)keep.size() * sizeof(int));
156 |
157 |
158 | cudaFree(boxes_dev);
159 | cudaFree(mask_dev);
160 | }
161 |
--------------------------------------------------------------------------------
/predict/utils/nms_mex.cpp:
--------------------------------------------------------------------------------
1 | #include "mex.h"
2 | #ifdef _MSC_VER
3 | #include
4 | #include
5 | #endif
6 | #include
7 | #include