├── README.md
└── deploy.prototxt


/README.md:
--------------------------------------------------------------------------------
 1 | # CSRNet (Try our [Pytorch Version](https://github.com/leeyeehoo/CSRNet-pytorch/tree/master)!)
 2 | This is the repo for [CSRNet: Dilated Convolutional Neural Networks for Understanding the Highly Congested Scenes](https://arxiv.org/abs/1802.10062) in CVPR 2018, which delivered a state-of-the-art, straightforward and end-to-end architecture for crowd counting tasks.
 3 | ## Datasets
 4 | ShanghaiTech Dataset: [Google Drive](https://drive.google.com/open?id=16dhJn7k4FWVwByRsQAEpl9lwjuV03jVI)
 5 | 
 6 | ## Models (Only for tests)
 7 | 
 8 | This is the model for test. The results should be similar to the results shown in the paper(slightly better or worse).
 9 | 
10 | 1) ShanghaiTech_Part_A: [Google Drive](https://drive.google.com/open?id=1odZ3B_ZDSepPcVFO_TfGUIrpF2DF7SwY)
11 | 
12 | 2) ShanghaiTech_Part_B: [Google Drive](https://drive.google.com/open?id=1NOpn0ztlye85vrHR2TMwOI2Qu_S8zANj)
13 | 
14 | ## Prerequisites
15 | 
16 | 1) A good CAFFE
17 | 
18 | We understand that it's tedious and difficult to config a custom input layer (even installing CAFFE on your own PC), thus we make a pytorch version for the csrnet: [CSRNet Pytorch Version](https://github.com/leeyeehoo/CSRNet-pytorch/tree/master)
19 | 
20 | ## References
21 | 
22 | If you find the CSRNet useful, please cite our paper. Thank you!
23 | 
24 | ```
25 | @inproceedings{li2018csrnet,
26 |   title={CSRNet: Dilated convolutional neural networks for understanding the highly congested scenes},
27 |   author={Li, Yuhong and Zhang, Xiaofan and Chen, Deming},
28 |   booktitle={Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition},
29 |   pages={1091--1100},
30 |   year={2018}
31 | }
32 | ```
33 | Please cite the Shanghai datasets and other works if you use them.
34 | 
35 | ```
36 | @inproceedings{zhang2016single,
37 |   title={Single-image crowd counting via multi-column convolutional neural network},
38 |   author={Zhang, Yingying and Zhou, Desen and Chen, Siqin and Gao, Shenghua and Ma, Yi},
39 |   booktitle={Proceedings of the IEEE conference on computer vision and pattern recognition},
40 |   pages={589--597},
41 |   year={2016}
42 | }
43 | ```
44 | 


--------------------------------------------------------------------------------
/deploy.prototxt:
--------------------------------------------------------------------------------
  1 | input: "data"
  2 | input_shape {
  3 |   dim: 1
  4 |   dim: 3
  5 |   dim: 224
  6 |   dim: 224
  7 | }
  8 | #First column network
  9 | 
 10 | layer {
 11 |   bottom: "data"
 12 |   top: "conv1_1"
 13 |   name: "conv1_1"
 14 |   type: "Convolution"
 15 |   param {
 16 |     lr_mult: 0
 17 |     decay_mult: 0
 18 |   }
 19 |   param {
 20 |     lr_mult: 0
 21 |     decay_mult: 0
 22 |   }
 23 |   convolution_param {
 24 |     num_output: 64
 25 |     pad: 1
 26 |     kernel_size: 3
 27 |     weight_filler {
 28 |       type: "gaussian"
 29 |       std: 0.01
 30 |     }
 31 |     bias_filler {
 32 |       type: "constant"
 33 |       value: 0
 34 |     }
 35 |   }
 36 | }
 37 | layer {
 38 |   bottom: "conv1_1"
 39 |   top: "conv1_1"
 40 |   name: "relu1_1"
 41 |   type: "ReLU"
 42 | }
 43 | layer {
 44 |   bottom: "conv1_1"
 45 |   top: "conv1_2"
 46 |   name: "conv1_2"
 47 |   type: "Convolution"
 48 |   param {
 49 |     lr_mult: 0
 50 |     decay_mult: 0
 51 |   }
 52 |   param {
 53 |     lr_mult: 0
 54 |     decay_mult: 0
 55 |   }
 56 |   convolution_param {
 57 |     num_output: 64
 58 |     pad: 1
 59 |     kernel_size: 3
 60 |     weight_filler {
 61 |       type: "gaussian"
 62 |       std: 0.01
 63 |     }
 64 |     bias_filler {
 65 |       type: "constant"
 66 |       value: 0
 67 |     }
 68 |   }
 69 | }
 70 | layer {
 71 |   bottom: "conv1_2"
 72 |   top: "conv1_2"
 73 |   name: "relu1_2"
 74 |   type: "ReLU"
 75 | }
 76 | layer {
 77 |   bottom: "conv1_2"
 78 |   top: "pool1"
 79 |   name: "pool1"
 80 |   type: "Pooling"
 81 |   pooling_param {
 82 |     pool: MAX
 83 |     kernel_size: 2
 84 |     stride: 2
 85 |   }
 86 | }
 87 | layer {
 88 |   bottom: "pool1"
 89 |   top: "conv2_1"
 90 |   name: "conv2_1"
 91 |   type: "Convolution"
 92 |   param {
 93 |     lr_mult: 0
 94 |     decay_mult: 0
 95 |   }
 96 |   param {
 97 |     lr_mult: 0
 98 |     decay_mult: 0
 99 |   }
100 |   convolution_param {
101 |     num_output: 128
102 |     pad: 1
103 |     kernel_size: 3
104 |     weight_filler {
105 |       type: "gaussian"
106 |       std: 0.01
107 |     }
108 |     bias_filler {
109 |       type: "constant"
110 |       value: 0
111 |     }
112 |   }
113 | }
114 | layer {
115 |   bottom: "conv2_1"
116 |   top: "conv2_1"
117 |   name: "relu2_1"
118 |   type: "ReLU"
119 | }
120 | layer {
121 |   bottom: "conv2_1"
122 |   top: "conv2_2"
123 |   name: "conv2_2"
124 |   type: "Convolution"
125 |   param {
126 |     lr_mult: 0
127 |     decay_mult: 0
128 |   }
129 |   param {
130 |     lr_mult: 0
131 |     decay_mult: 0
132 |   }
133 |   convolution_param {
134 |     num_output: 128
135 |     pad: 1
136 |     kernel_size: 3
137 |     weight_filler {
138 |       type: "gaussian"
139 |       std: 0.01
140 |     }
141 |     bias_filler {
142 |       type: "constant"
143 |       value: 0
144 |     }
145 |   }
146 | }
147 | layer {
148 |   bottom: "conv2_2"
149 |   top: "conv2_2"
150 |   name: "relu2_2"
151 |   type: "ReLU"
152 | }
153 | layer {
154 |   bottom: "conv2_2"
155 |   top: "pool2"
156 |   name: "pool2"
157 |   type: "Pooling"
158 |   pooling_param {
159 |     pool: MAX
160 |     kernel_size: 2
161 |     stride: 2
162 |   }
163 | }
164 | layer {
165 |   bottom: "pool2"
166 |   top: "conv3_1"
167 |   name: "conv3_1"
168 |   type: "Convolution"
169 |   param {
170 |     lr_mult: 0
171 |     decay_mult: 0
172 |   }
173 |   param {
174 |     lr_mult: 0
175 |     decay_mult: 0
176 |   }
177 |   convolution_param {
178 |     num_output: 256
179 |     pad: 1
180 |     kernel_size: 3
181 |     weight_filler {
182 |       type: "gaussian"
183 |       std: 0.01
184 |     }
185 |     bias_filler {
186 |       type: "constant"
187 |       value: 0
188 |     }
189 |   }
190 | }
191 | layer {
192 |   bottom: "conv3_1"
193 |   top: "conv3_1"
194 |   name: "relu3_1"
195 |   type: "ReLU"
196 | }
197 | layer {
198 |   bottom: "conv3_1"
199 |   top: "conv3_2"
200 |   name: "conv3_2"
201 |   type: "Convolution"
202 |   param {
203 |     lr_mult: 0
204 |     decay_mult: 0
205 |   }
206 |   param {
207 |     lr_mult: 0
208 |     decay_mult: 0
209 |   }
210 |   convolution_param {
211 |     num_output: 256
212 |     pad: 1
213 |     kernel_size: 3
214 |     weight_filler {
215 |       type: "gaussian"
216 |       std: 0.01
217 |     }
218 |     bias_filler {
219 |       type: "constant"
220 |       value: 0
221 |     }
222 |   }
223 | }
224 | layer {
225 |   bottom: "conv3_2"
226 |   top: "conv3_2"
227 |   name: "relu3_2"
228 |   type: "ReLU"
229 | }
230 | layer {
231 |   bottom: "conv3_2"
232 |   top: "conv3_3"
233 |   name: "conv3_3"
234 |   type: "Convolution"
235 |   param {
236 |     lr_mult: 0
237 |     decay_mult: 0
238 |   }
239 |   param {
240 |     lr_mult: 0
241 |     decay_mult: 0
242 |   }
243 |   convolution_param {
244 |     num_output: 256
245 |     pad: 1
246 |     kernel_size: 3
247 |     weight_filler {
248 |       type: "gaussian"
249 |       std: 0.01
250 |     }
251 |     bias_filler {
252 |       type: "constant"
253 |       value: 0
254 |     }
255 |   }
256 | }
257 | layer {
258 |   bottom: "conv3_3"
259 |   top: "conv3_3"
260 |   name: "relu3_3"
261 |   type: "ReLU"
262 | }
263 | layer {
264 |   bottom: "conv3_3"
265 |   top: "pool3"
266 |   name: "pool3"
267 |   type: "Pooling"
268 |   pooling_param {
269 |     pool: MAX
270 |     kernel_size: 2
271 |     stride: 2
272 |   }
273 | }
274 | layer {
275 |   bottom: "pool3"
276 |   top: "conv4_1"
277 |   name: "conv4_1"
278 |   type: "Convolution"
279 |   param {
280 |     lr_mult: 0
281 |     decay_mult: 0
282 |   }
283 |   param {
284 |     lr_mult: 0
285 |     decay_mult: 0
286 |   }
287 |   convolution_param {
288 |     num_output: 512
289 |     pad: 1
290 |     kernel_size: 3
291 |     weight_filler {
292 |       type: "gaussian"
293 |       std: 0.01
294 |     }
295 |     bias_filler {
296 |       type: "constant"
297 |       value: 0
298 |     }
299 |   }
300 | }
301 | layer {
302 |   bottom: "conv4_1"
303 |   top: "conv4_1"
304 |   name: "relu4_1"
305 |   type: "ReLU"
306 | }
307 | layer {
308 |   bottom: "conv4_1"
309 |   top: "conv4_2"
310 |   name: "conv4_2"
311 |   type: "Convolution"
312 |   param {
313 |     lr_mult: 0
314 |     decay_mult: 0
315 |   }
316 |   param {
317 |     lr_mult: 0
318 |     decay_mult: 0
319 |   }
320 |   convolution_param {
321 |     num_output: 512
322 |     pad: 1
323 |     kernel_size: 3
324 |     weight_filler {
325 |       type: "gaussian"
326 |       std: 0.01
327 |     }
328 |     bias_filler {
329 |       type: "constant"
330 |       value: 0
331 |     }
332 |   }
333 | }
334 | layer {
335 |   bottom: "conv4_2"
336 |   top: "conv4_2"
337 |   name: "relu4_2"
338 |   type: "ReLU"
339 | }
340 | layer {
341 |   bottom: "conv4_2"
342 |   top: "conv4_3"
343 |   name: "conv4_3"
344 |   type: "Convolution"
345 |   param {
346 |     lr_mult: 0
347 |     decay_mult: 0
348 |   }
349 |   param {
350 |     lr_mult: 0
351 |     decay_mult: 0
352 |   }
353 |   convolution_param {
354 |     num_output: 512
355 |     pad: 1
356 |     kernel_size: 3
357 |     weight_filler {
358 |       type: "gaussian"
359 |       std: 0.01
360 |     }
361 |     bias_filler {
362 |       type: "constant"
363 |       value: 0
364 |     }
365 |   }
366 | }
367 | layer {
368 |   bottom: "conv4_3"
369 |   top: "conv4_3"
370 |   name: "relu4_3"
371 |   type: "ReLU"
372 | }
373 | layer {
374 |   bottom: "conv4_3"
375 |   top: "conv6_1"
376 |   name: "conv6_1"
377 |   type: "Convolution"
378 |   param {
379 |     lr_mult: 1
380 |     decay_mult: 1
381 |   }
382 |   param {
383 |     lr_mult: 2
384 |     decay_mult: 0
385 |   }
386 |   convolution_param {
387 |     num_output: 512
388 |     pad: 2
389 |     dilation: 2
390 |     kernel_size: 3
391 |     weight_filler {
392 |       type: "gaussian"
393 |       std: 0.01
394 |     }
395 |     bias_filler {
396 |       type: "constant"
397 |       value: 0
398 |     }
399 |   }
400 | }
401 | layer {
402 |   bottom: "conv6_1"
403 |   top: "conv6_1"
404 |   name: "relu6_1"
405 |   type: "ReLU"
406 | }
407 | layer {
408 |   bottom: "conv6_1"
409 |   top: "conv6_2"
410 |   name: "conv6_2"
411 |   type: "Convolution"
412 |   param {
413 |     lr_mult: 1
414 |     decay_mult: 1
415 |   }
416 |   param {
417 |     lr_mult: 2
418 |     decay_mult: 0
419 |   }
420 |   convolution_param {
421 |     num_output: 512
422 |     pad: 2
423 |     dilation: 2
424 |     kernel_size: 3
425 |     weight_filler {
426 |       type: "gaussian"
427 |       std: 0.01
428 |     }
429 |     bias_filler {
430 |       type: "constant"
431 |       value: 0
432 |     }
433 |   }
434 | }
435 | layer {
436 |   bottom: "conv6_2"
437 |   top: "conv6_2"
438 |   name: "relu6_2"
439 |   type: "ReLU"
440 | }
441 | layer {
442 |   bottom: "conv6_2"
443 |   top: "conv6_3"
444 |   name: "conv6_3"
445 |   type: "Convolution"
446 |   param {
447 |     lr_mult: 1
448 |     decay_mult: 1
449 |   }
450 |   param {
451 |     lr_mult: 2
452 |     decay_mult: 0
453 |   }
454 |   convolution_param {
455 |     num_output: 512
456 |     pad: 2
457 |     dilation: 2
458 |     kernel_size: 3
459 |     weight_filler {
460 |       type: "gaussian"
461 |       std: 0.01
462 |     }
463 |     bias_filler {
464 |       type: "constant"
465 |       value: 0
466 |     }
467 |   }
468 | }
469 | layer {
470 |   bottom: "conv6_3"
471 |   top: "conv6_3"
472 |   name: "relu6_3"
473 |   type: "ReLU"
474 | }
475 | layer {
476 |   bottom: "conv6_3"
477 |   top: "conv7_1"
478 |   name: "conv7_1"
479 |   type: "Convolution"
480 |   param {
481 |     lr_mult: 1
482 |     decay_mult: 1
483 |   }
484 |   param {
485 |     lr_mult: 2
486 |     decay_mult: 0
487 |   }
488 |   convolution_param {
489 |     num_output: 256
490 |     pad: 2
491 |     dilation: 2
492 |     kernel_size: 3
493 |     weight_filler {
494 |       type: "gaussian"
495 |       std: 0.01
496 |     }
497 |     bias_filler {
498 |       type: "constant"
499 |       value: 0
500 |     }
501 |   }
502 | }
503 | layer {
504 |   bottom: "conv7_1"
505 |   top: "conv7_1"
506 |   name: "relu7_1"
507 |   type: "ReLU"
508 | }
509 | layer {
510 |   bottom: "conv7_1"
511 |   top: "conv7_2"
512 |   name: "conv7_2"
513 |   type: "Convolution"
514 |   param {
515 |     lr_mult: 1
516 |     decay_mult: 1
517 |   }
518 |   param {
519 |     lr_mult: 2
520 |     decay_mult: 0
521 |   }
522 |   convolution_param {
523 |     num_output: 128
524 |     pad: 2
525 |     dilation: 2
526 |     kernel_size: 3
527 |     weight_filler {
528 |       type: "gaussian"
529 |       std: 0.01
530 |     }
531 |     bias_filler {
532 |       type: "constant"
533 |       value: 0
534 |     }
535 |   }
536 | }
537 | layer {
538 |   bottom: "conv7_2"
539 |   top: "conv7_2"
540 |   name: "relu7_2"
541 |   type: "ReLU"
542 | }
543 | layer {
544 |   bottom: "conv7_2"
545 |   top: "conv7_3"
546 |   name: "conv7_3"
547 |   type: "Convolution"
548 |   param {
549 |     lr_mult: 1
550 |     decay_mult: 1
551 |   }
552 |   param {
553 |     lr_mult: 2
554 |     decay_mult: 0
555 |   }
556 |   convolution_param {
557 |     num_output: 64
558 |     pad: 2
559 |     dilation: 2
560 |     kernel_size: 3
561 |     weight_filler {
562 |       type: "gaussian"
563 |       std: 0.01
564 |     }
565 |     bias_filler {
566 |       type: "constant"
567 |       value: 0
568 |     }
569 |   }
570 | }
571 | layer {
572 |   bottom: "conv7_3"
573 |   top: "conv7_3"
574 |   name: "relu7_3"
575 |   type: "ReLU"
576 | }
577 | layer {
578 |   bottom: "conv7_3"
579 |   top: "estdmap"
580 |   name: "fu1"
581 |   type: "Convolution"
582 |   param {
583 |     lr_mult: 1
584 |     decay_mult: 1
585 |   }
586 |   param {
587 |     lr_mult: 2
588 |     decay_mult: 0
589 |   }
590 |   convolution_param {
591 |     num_output: 1
592 |     kernel_size: 1
593 |     weight_filler {
594 |       type: "gaussian"
595 |       std: 0.01
596 |     }
597 |     bias_filler {
598 |       type: "constant"
599 |       value: 0
600 |     }
601 |   }
602 | }
603 | 
604 | 
605 | 


--------------------------------------------------------------------------------