├── LICENSE.md
├── README.md
├── compute_burdens.m
├── core
└── burden.m
├── matlab
├── fcn_autonn_custom_fn.m
├── inception_autonn_custom_fn.m
└── squeezenet_autonn_custom_fn.m
├── misc
└── generate_markdown.sh
├── reports
├── SE-BN-Inception.md
├── SE-ResNeXt-101-32x4d.md
├── SE-ResNeXt-50-32x4d.md
├── SE-ResNet-101.md
├── SE-ResNet-152.md
├── SE-ResNet-50.md
├── SENet.md
├── alexnet.md
├── caffe-ref.md
├── caffenet.md
├── deeplab-res101-v2.md
├── deeplab-vggvd-v2.md
├── densenet121.md
├── densenet161.md
├── densenet169.md
├── densenet201.md
├── faster-rcnn-vggvd-pascal.md
├── figs
│ ├── SE-BN-Inception.png
│ ├── SE-ResNeXt-101-32x4d.png
│ ├── SE-ResNeXt-50-32x4d.png
│ ├── SE-ResNet-101.png
│ ├── SE-ResNet-152.png
│ ├── SE-ResNet-50.png
│ ├── SENet.png
│ ├── alexnet.png
│ ├── caffe-ref.png
│ ├── caffenet.png
│ ├── deeplab-res101-v2.png
│ ├── deeplab-vggvd-v2.png
│ ├── densenet121.png
│ ├── densenet161.png
│ ├── densenet169.png
│ ├── densenet201.png
│ ├── faster-rcnn-vggvd-pascal.png
│ ├── googlenet.png
│ ├── inception-v3.png
│ ├── matconvnet-alex.png
│ ├── mcn-mobilenet-v2.png
│ ├── mcn-mobilenet.png
│ ├── multipose-coco.png
│ ├── multipose-mpi.png
│ ├── pascal-fcn16s.png
│ ├── pascal-fcn32s.png
│ ├── pascal-fcn8s.png
│ ├── resnet-101.png
│ ├── resnet-152.png
│ ├── resnet-50.png
│ ├── resnet18.png
│ ├── resnet34.png
│ ├── resnet50.png
│ ├── resnext-101-32x4d.png
│ ├── resnext-101-64x4d.png
│ ├── resnext-50-32x4d.png
│ ├── rfcn-res101-pascal.png
│ ├── rfcn-res50-pascal.png
│ ├── squeezenet1-0.png
│ ├── squeezenet1-1.png
│ ├── ssd-mcn-pascal-vggvd-300.png
│ ├── ssd-mcn-pascal-vggvd-512.png
│ ├── ssd-pascal-mobilenet-ft.png
│ ├── ssd-pascal-vggvd-300.png
│ ├── ssd-pascal-vggvd-512.png
│ ├── vgg-f.png
│ ├── vgg-m-1024.png
│ ├── vgg-m-128.png
│ ├── vgg-m-2048.png
│ ├── vgg-m.png
│ ├── vgg-s.png
│ ├── vgg-vd-16-atrous.png
│ ├── vgg-vd-16.png
│ └── vgg-vd-19.png
├── googlenet.md
├── inception-v3.md
├── matconvnet-alex.md
├── mcn-mobilenet-v2.md
├── mcn-mobilenet.md
├── multipose-coco.md
├── multipose-mpi.md
├── pascal-fcn16s.md
├── pascal-fcn32s.md
├── pascal-fcn8s.md
├── resnet-101.md
├── resnet-152.md
├── resnet-50.md
├── resnet18.md
├── resnet34.md
├── resnet50.md
├── resnext-101-32x4d.md
├── resnext-101-64x4d.md
├── resnext-50-32x4d.md
├── rfcn-res101-pascal.md
├── rfcn-res50-pascal.md
├── squeezenet1-0.md
├── squeezenet1-1.md
├── ssd-pascal-mobilenet-ft.md
├── ssd-pascal-vggvd-300.md
├── ssd-pascal-vggvd-512.md
├── vgg-f.md
├── vgg-m-1024.md
├── vgg-m-128.md
├── vgg-m-2048.md
├── vgg-m.md
├── vgg-s.md
├── vgg-vd-16-atrous.md
├── vgg-vd-16.md
└── vgg-vd-19.md
└── setup_convnet_burden.m
/LICENSE.md:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2017 Samuel Albanie
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | convnet-burden
2 | ---
3 |
4 | Estimates of memory consumption and FLOP counts for various convolutional neural networks.
5 |
6 |
7 | ### Image Classification Architectures
8 |
9 | The numbers below are given for single element batches.
10 |
11 | | model | input size | param mem | feat. mem | flops | src | performance |
12 | |-------|------------|--------------|----------------|-------|-----|-------------|
13 | | [alexnet](reports/alexnet.md) | 227 x 227 | 233 MB | 3 MB | 727 MFLOPs | MCN | 41.80 / 19.20 |
14 | | [caffenet](reports/caffenet.md) | 224 x 224 | 233 MB | 3 MB | 724 MFLOPs | MCN | 42.60 / 19.70 |
15 | | [squeezenet1-0](reports/squeezenet1-0.md) | 224 x 224 | 5 MB | 30 MB | 837 MFLOPs | PT | 41.90 / 19.58 |
16 | | [squeezenet1-1](reports/squeezenet1-1.md) | 224 x 224 | 5 MB | 17 MB | 360 MFLOPs | PT | 41.81 / 19.38 |
17 | | [vgg-f](reports/vgg-f.md) | 224 x 224 | 232 MB | 4 MB | 727 MFLOPs | MCN | 41.40 / 19.10 |
18 | | [vgg-m](reports/vgg-m.md) | 224 x 224 | 393 MB | 12 MB | 2 GFLOPs | MCN | 36.90 / 15.50 |
19 | | [vgg-s](reports/vgg-s.md) | 224 x 224 | 393 MB | 12 MB | 3 GFLOPs | MCN | 37.00 / 15.80 |
20 | | [vgg-m-2048](reports/vgg-m-2048.md) | 224 x 224 | 353 MB | 12 MB | 2 GFLOPs | MCN | 37.10 / 15.80 |
21 | | [vgg-m-1024](reports/vgg-m-1024.md) | 224 x 224 | 333 MB | 12 MB | 2 GFLOPs | MCN | 37.80 / 16.10 |
22 | | [vgg-m-128](reports/vgg-m-128.md) | 224 x 224 | 315 MB | 12 MB | 2 GFLOPs | MCN | 40.80 / 18.40 |
23 | | [vgg-vd-16-atrous](reports/vgg-vd-16-atrous.md) | 224 x 224 | 82 MB | 58 MB | 16 GFLOPs | N/A | - / - |
24 | | [vgg-vd-16](reports/vgg-vd-16.md) | 224 x 224 | 528 MB | 58 MB | 16 GFLOPs | MCN | 28.50 / 9.90 |
25 | | [vgg-vd-19](reports/vgg-vd-19.md) | 224 x 224 | 548 MB | 63 MB | 20 GFLOPs | MCN | 28.70 / 9.90 |
26 | | [googlenet](reports/googlenet.md) | 224 x 224 | 51 MB | 26 MB | 2 GFLOPs | MCN | 34.20 / 12.90 |
27 | | [resnet18](reports/resnet18.md) | 224 x 224 | 45 MB | 23 MB | 2 GFLOPs | PT | 30.24 / 10.92 |
28 | | [resnet34](reports/resnet34.md) | 224 x 224 | 83 MB | 35 MB | 4 GFLOPs | PT | 26.70 / 8.58 |
29 | | [resnet-50](reports/resnet-50.md) | 224 x 224 | 98 MB | 103 MB | 4 GFLOPs | MCN | 24.60 / 7.70 |
30 | | [resnet-101](reports/resnet-101.md) | 224 x 224 | 170 MB | 155 MB | 8 GFLOPs | MCN | 23.40 / 7.00 |
31 | | [resnet-152](reports/resnet-152.md) | 224 x 224 | 230 MB | 219 MB | 11 GFLOPs | MCN | 23.00 / 6.70 |
32 | | [resnext-50-32x4d](reports/resnext-50-32x4d.md) | 224 x 224 | 96 MB | 132 MB | 4 GFLOPs | L1 | 22.60 / 6.49 |
33 | | [resnext-101-32x4d](reports/resnext-101-32x4d.md) | 224 x 224 | 169 MB | 197 MB | 8 GFLOPs | L1 | 21.55 / 5.93 |
34 | | [resnext-101-64x4d](reports/resnext-101-64x4d.md) | 224 x 224 | 319 MB | 273 MB | 16 GFLOPs | PT | 20.81 / 5.66 |
35 | | [inception-v3](reports/inception-v3.md) | 299 x 299 | 91 MB | 89 MB | 6 GFLOPs | PT | 22.55 / 6.44 |
36 | | [SE-ResNet-50](reports/SE-ResNet-50.md) | 224 x 224 | 107 MB | 103 MB | 4 GFLOPs | SE | 22.37 / 6.36 |
37 | | [SE-ResNet-101](reports/SE-ResNet-101.md) | 224 x 224 | 189 MB | 155 MB | 8 GFLOPs | SE | 21.75 / 5.72 |
38 | | [SE-ResNet-152](reports/SE-ResNet-152.md) | 224 x 224 | 255 MB | 220 MB | 11 GFLOPs | SE | 21.34 / 5.54 |
39 | | [SE-ResNeXt-50-32x4d](reports/SE-ResNeXt-50-32x4d.md) | 224 x 224 | 105 MB | 132 MB | 4 GFLOPs | SE | 20.97 / 5.54 |
40 | | [SE-ResNeXt-101-32x4d](reports/SE-ResNeXt-101-32x4d.md) | 224 x 224 | 187 MB | 197 MB | 8 GFLOPs | SE | 19.81 / 4.96 |
41 | | [SENet](reports/SENet.md) | 224 x 224 | 440 MB | 347 MB | 21 GFLOPs | SE | 18.68 / 4.47 |
42 | | [SE-BN-Inception](reports/SE-BN-Inception.md) | 224 x 224 | 46 MB | 43 MB | 2 GFLOPs | SE | 23.62 / 7.04 |
43 | | [densenet121](reports/densenet121.md) | 224 x 224 | 31 MB | 126 MB | 3 GFLOPs | PT | 25.35 / 7.83 |
44 | | [densenet161](reports/densenet161.md) | 224 x 224 | 110 MB | 235 MB | 8 GFLOPs | PT | 22.35 / 6.20 |
45 | | [densenet169](reports/densenet169.md) | 224 x 224 | 55 MB | 152 MB | 3 GFLOPs | PT | 24.00 / 7.00 |
46 | | [densenet201](reports/densenet201.md) | 224 x 224 | 77 MB | 196 MB | 4 GFLOPs | PT | 22.80 / 6.43 |
47 | | [mcn-mobilenet](reports/mcn-mobilenet.md) | 224 x 224 | 16 MB | 38 MB | 579 MFLOPs | AU | 29.40 / - |
48 |
49 |
50 | Click on the model name for a more detailed breakdown of feature extraction costs at different input image/batch sizes if needed. The performance numbers are reported as `top-1 error/top-5 error` on the 2012 ILSVRC validation data. The `src` column indicates the source of the benchmark scores using the following abberviations:
51 |
52 | * [MCN](http://www.vlfeat.org/matconvnet/pretrained/) - scores obtained from the matconvnet website.
53 | * [PT](http://pytorch.org/docs/master/torchvision/models.html) - scores obtained from the PyTorch torchvision module.
54 | * [L1](https://github.com/albanie/mcnPyTorch/blob/master/benchmarks/cnn_imagenet_pt_mcn.m) - evaluated locally (follow link to view benchmark code).
55 | * AU - numbers reported by the paper authors.
56 |
57 |
58 | These numbers provide an estimate of performance, but note that there may be small differences between the evaluation scripts from different sources.
59 |
60 | **References:**
61 |
62 | * [alexnet](http://papers.nips.cc/paper/4824-imagenet-classification-with-deep-convolutional-neural-networks) - *Krizhevsky, Alex, Ilya Sutskever, and Geoffrey E. Hinton. "Imagenet classification with deep convolutional neural networks." Advances in neural information processing systems. 2012.*
63 | * [squeezenet](https://arxiv.org/abs/1602.07360) - *Iandola, Forrest N., et al. "SqueezeNet: AlexNet-level accuracy with 50x fewer parameters and< 0.5 MB model size." arXiv preprint arXiv:1602.07360 (2016).*
64 | * [vgg-m](https://arxiv.org/abs/1405.3531) - *Chatfield, Ken, et al. "Return of the devil in the details: Delving deep into convolutional nets." arXiv preprint arXiv:1405.3531 (2014).*
65 | * [vgg-vd-16/vgg-vd-19](https://arxiv.org/abs/1409.1556) - *Simonyan, Karen, and Andrew Zisserman. "Very deep convolutional networks for large-scale image recognition." arXiv preprint arXiv:1409.1556 (2014).*
66 | * [vgg-vd-16-reduced](https://arxiv.org/abs/1506.04579) - *Liu, Wei, Andrew Rabinovich, and Alexander C. Berg. "Parsenet: Looking wider to see better." arXiv preprint arXiv:1506.04579 (2015)*
67 | * [googlenet](http://www.cv-foundation.org/openaccess/content_cvpr_2015/html/Szegedy_Going_Deeper_With_2015_CVPR_paper.html) - *Szegedy, Christian, et al. "Going deeper with convolutions." Proceedings of the IEEE conference on computer vision and pattern recognition. 2015.*
68 | * [inception](https://arxiv.org/abs/1512.00567) - *Szegedy, Christian, et al. "Rethinking the inception architecture for computer vision." Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition. 2016.*
69 | * [resnet](https://arxiv.org/abs/1512.03385) - *He, Kaiming, et al. "Deep residual learning for image recognition." Proceedings of the IEEE conference on computer vision and pattern recognition. 2016.*
70 | * [resnext](https://arxiv.org/abs/1611.05431) - *Xie, Saining, et al. "Aggregated residual transformations for deep neural networks." arXiv preprint arXiv:1611.05431 (2016).*
71 | * [SENets](https://arxiv.org/abs/1709.01507) - *Jie Hu, Li Shen and Gang Sun. "Squeeze-and-Excitation Networks." arXiv preprint arXiv:1709.01507 (2017).*
72 | * [Densenet](https://arxiv.org/abs/1608.06993) - *Huang, Gao, et al. "Densely connected convolutional networks." CVPR, (2017).*
73 |
74 | ### Object Detection Architectures
75 |
76 | | model | input size | param memory | feature memory | flops |
77 | |-------|------------|--------------|----------------|-------|
78 | | [rfcn-res50-pascal](reports/rfcn-res50-pascal.md) | 600 x 850 | 122 MB | 1 GB | 79 GFLOPS|
79 | | [rfcn-res101-pascal](reports/rfcn-res101-pascal.md) | 600 x 850 | 194 MB | 2 GB | 117 GFLOPS|
80 | | [ssd-pascal-vggvd-300](reports/ssd-pascal-vggvd-300.md) | 300 x 300 | 100 MB | 116 MB | 31 GFLOPS|
81 | | [ssd-pascal-vggvd-512](reports/ssd-pascal-vggvd-512.md) | 512 x 512 | 104 MB | 337 MB | 91 GFLOPS|
82 | | [ssd-pascal-mobilenet-ft](reports/ssd-pascal-mobilenet-ft.md) | 300 x 300 | 22 MB | 37 MB | 1 GFLOPs|
83 | | [faster-rcnn-vggvd-pascal](reports/faster-rcnn-vggvd-pascal.md) | 600 x 850 | 523 MB | 600 MB | 172 GFLOPS|
84 |
85 | The input sizes used are "typical" for each of the architectures listed, but can be varied. *Anchor/priorbox* generation and *roi/psroi*-pooling are not included in flop estimates. The *ssd-pascal-mobilenet-ft* detector uses the MobileNet feature extractor (the model used here was imported from the architecture made available by [chuanqi305](https://github.com/chuanqi305/MobileNet-SSD)).
86 |
87 | **References:**
88 |
89 | * [faster-rcnn](http://papers.nips.cc/paper/5638-faster-r-cnn-towards-real-time-object-detection-with-region-proposal-networks) - *Ren, Shaoqing, et al. "Faster R-CNN: Towards real-time object detection with region proposal networks." Advances in neural information processing systems. 2015..*
90 | * [r-fcn](https://arxiv.org/abs/1605.06409) - *Li, Yi, Kaiming He, and Jian Sun. "R-fcn: Object detection via region-based fully convolutional networks." Advances in Neural Information Processing Systems. 2016.*
91 | * [ssd](https://link.springer.com/chapter/10.1007%2F978-3-319-46448-0_2) - *Liu, Wei, et al. "Ssd: Single shot multibox detector." European conference on computer vision. Springer, Cham, 2016.*
92 | * [mobilenets](https://arxiv.org/abs/1704.04861) - *Howard, Andrew G., Menglong Zhu, Bo Chen, Dmitry Kalenichenko, Weijun Wang, Tobias Weyand, Marco Andreetto, and Hartwig Adam. "Mobilenets: Efficient convolutional neural networks for mobile vision applications." arXiv preprint arXiv:1704.04861 (2017).*
93 |
94 |
95 | ### Semantic Segmentation Architectures
96 |
97 | | model | input size | param memory | feature memory | flops |
98 | |-------|------------|--------------|----------------|-------|
99 | | [pascal-fcn32s](reports/pascal-fcn32s.md) | 384 x 384 | 519 MB | 423 MB | 125 GFLOPS|
100 | | [pascal-fcn16s](reports/pascal-fcn16s.md) | 384 x 384 | 514 MB | 424 MB | 125 GFLOPS|
101 | | [pascal-fcn8s](reports/pascal-fcn8s.md) | 384 x 384 | 513 MB | 426 MB | 125 GFLOPS|
102 | | [deeplab-vggvd-v2](reports/deeplab-vggvd-v2.md) | 513 x 513 | 144 MB | 755 MB | 202 GFLOPs|
103 | | [deeplab-res101-v2](reports/deeplab-res101-v2.md) | 513 x 513 | 505 MB | 4 GB | 346 GFLOPs|
104 |
105 | In this case, the input sizes are those which are typically taken as input crops during training. The *deeplab-res101-v2* model uses multi-scale input, with scales `x1, x0.75, x0.5` (computed relative to the given input size).
106 |
107 | **References:**
108 |
109 | * [pascal-fcn](http://www.cv-foundation.org/openaccess/content_cvpr_2015/html/Long_Fully_Convolutional_Networks_2015_CVPR_paper.html) - *Long, Jonathan, Evan Shelhamer, and Trevor Darrell. "Fully convolutional networks for semantic segmentation." Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition. 2015..*
110 | * [deeplab](https://arxiv.org/abs/1606.00915) - *DeepLab: Semantic Image Segmentation with Deep Convolutional Nets, Atrous Convolution, and Fully Connected CRFs
111 | Liang-Chieh Chen^, George Papandreou^, Iasonas Kokkinos, Kevin Murphy, and Alan L. Yuille (^equal contribution)
112 | Transactions on Pattern Analysis and Machine Intelligence (TPAMI)*
113 |
114 | ### Keypoint Detection Architectures
115 |
116 | | model | input size | param memory | feature memory | flops |
117 | |-------|------------|--------------|----------------|-------|
118 | | [multipose-mpi](reports/multipose-mpi.md) | 368 x 368 | 196 MB | 245 MB | 134 GFLOPS|
119 | | [multipose-coco](reports/multipose-coco.md) | 368 x 368 | 200 MB | 246 MB | 136 GFLOPS|
120 |
121 | **References:**
122 |
123 | * [multipose](https://arxiv.org/abs/1611.08050) - *Cao, Zhe, et al. "Realtime multi-person 2d pose estimation using part affinity fields." arXiv preprint arXiv:1611.08050 (2016)..*
124 |
125 |
126 |
Notes and Assumptions
127 |
128 |
129 | The numbers for each architecture should be reasonably framework agnostic. It is assumed that all weights and activations are stored as floats (with 4 bytes per datum) and that all relus are performed in-place. Feature memory therefore represents an estimate of the total memory consumption of the features computed via a forward pass of the network for a given input, assuming that memory is not re-used (the exception to this is that, as noted above, relus are performed in-place and do not add to the feature memory total). In practice, many frameworks will clear features from memory when they are no-longer required by the execution path and will therefore require less memory than is noted here. The feature memory statistic is simply a rough guide as to "how big" the activations of the network look.
130 |
131 | Fused multiply-adds are counted as single operations. The numbers should be considered to be rough approximations - modern hardware makes it very difficult to accurately count operations (and even if you could, pipelining etc. means that it is not necessarily a good estimate of inference time).
132 |
133 | The tool for computing the estimates is implemented as a module for the [autonn](https://github.com/vlfeat/autonn) wrapper of matconvnet and is included in this [repo](core/burden.m), so feel free to take a look for extra details. This module can be installed with the `vl_contrib` package manager (it has two dependencies which can be installed in a similar manner: [autonn](https://github.com/vlfeat/autonn) and [mcnExtraLayers](https://github.com/albanie/mcnExtraLayers)). Matconvnet versions of all of the models can be obtained from either [here](http://www.vlfeat.org/matconvnet/pretrained/) or [here](http://www.robots.ox.ac.uk/~albanie/mcn-models.html).
134 |
135 | For further reading on the topic, the 2017 ICLR submission [An analysis of deep neural network models for practical applications](https://openreview.net/pdf?id=Bygq-H9eg) is interesting. If you find any issues, or would like to add additional models, add an issue/PR.
136 |
--------------------------------------------------------------------------------
/compute_burdens.m:
--------------------------------------------------------------------------------
1 | function compute_burdens(varargin)
2 | %COMPUTE_BURDENS Compute burden estimates for common architectures
3 | % COMPUTE_BURDENS computes estimates of the memory and computational
4 | % requirements of a set of common convolutional neural network architectures.
5 | % COMPUTE_BURDENS(..'name', value) accepts the following
6 | % options:
7 | %
8 | % `includeClassifiers` :: true
9 | % Compute burden estimates for common image classification architectures.
10 | %
11 | % `includeObjDetectors` :: false
12 | % Compute burden estimates for common object detection architectures.
13 | %
14 | % `includeSegmenters` :: false
15 | % Compute burden estimates for a few semantic segmentation architectures.
16 | %
17 | % `includeKeypointDetectors` :: false
18 | % Compute burden estimates for a few keypoint detection architectures.
19 | %
20 | % `logDir` :: fullfile(vl_rootnn, 'data/burden')
21 | % Directory location to store logged analysis.
22 | %
23 | % `modelDir` :: fullfile(vl_rootnn, 'data/models-import')
24 | % Directory containing models to be analysed.
25 | %
26 | % Copyright (C) 2017 Samuel Albanie
27 | % Licensed under The MIT License [see LICENSE.md for details]
28 |
29 | opts.includeClassifiers = true ;
30 | opts.includeObjDetectors = false ;
31 | opts.includeSegmenters = false ;
32 | opts.includeKeypointDetectors = false ;
33 | opts.logDir = fullfile(vl_rootnn, 'data/burden') ;
34 | opts.modelDir = fullfile(vl_rootnn, 'data/models-import') ;
35 | opts = vl_argparse(opts, varargin) ;
36 |
37 | models = {} ; logName = 'log' ;
38 |
39 | if opts.includeClassifiers
40 | models = [ models { ...
41 | {'imagenet-matconvnet-alex.mat', [227 227], {'I', 'MCN', 41.8, 19.2}}, ...
42 | {'imagenet-caffe-ref.mat', [224 224], {'I', 'MCN', 42.6, 19.7}},...
43 | {'squeezenet1_0-pt-mcn.mat', [224 224], {'I', 'PT', 41.90, 19.58}},...
44 | {'squeezenet1_1-pt-mcn.mat', [224 224], {'I', 'PT', 41.81, 19.38}}...
45 | {'imagenet-vgg-f.mat', [224 224], {'I', 'MCN', 41.4, 19.1}},...
46 | {'imagenet-vgg-m.mat', [224 224], {'I', 'MCN', 36.9, 15.5}},...
47 | {'imagenet-vgg-s.mat', [224 224], {'I', 'MCN', 37.0, 15.8}},...
48 | {'imagenet-vgg-m-2048.mat', [224 224], {'I', 'MCN', 37.1, 15.8}},...
49 | {'imagenet-vgg-m-1024.mat', [224 224], {'I', 'MCN', 37.8, 16.1}},...
50 | {'imagenet-vgg-m-128.mat', [224 224], {'I', 'MCN', 40.8, 18.4}},...
51 | {'vgg-vd-16-reduced.mat', [224 224], {'I', 'N/A', '', ''}},...
52 | {'imagenet-vgg-verydeep-16.mat', [224 224], {'I', 'MCN', 28.5, 9.9}},...
53 | {'imagenet-vgg-verydeep-19.mat', [224 224], {'I', 'MCN', 28.7, 9.9}},...
54 | {'imagenet-googlenet-dag.mat', [224 224], {'I', 'MCN', 34.2, 12.9}},...
55 | {'resnet18-pt-mcn.mat', [224 224], {'I', 'PT', 30.24, 10.92}},...
56 | {'resnet34-pt-mcn.mat', [224 224], {'I', 'PT', 26.70, 8.58}},...
57 | {'imagenet-resnet-50-dag.mat', [224 224], {'I', 'MCN', 24.6, 7.7}},...
58 | {'imagenet-resnet-101-dag.mat', [224 224], {'I', 'MCN', 23.4, 7.0}},...
59 | {'imagenet-resnet-152-dag.mat', [224 224], {'I', 'MCN', 23.0, 6.7}},...
60 | {'resnext_50_32x4d-pt-mcn.mat', [224 224], {'I', 'L1', 22.6, 6.49}},...
61 | {'resnext_101_32x4d-pt-mcn.mat', [224 224], {'I', 'L1', 21.55, 5.93}},...
62 | {'resnext_101_64x4d-pt-mcn.mat', [224 224], {'I', 'PT', 20.81, 5.66}},...
63 | {'inception_v3-pt-mcn.mat', [299 299], {'I', 'PT', 22.55, 6.44}, 1:0.5:3},... % breaks on small inputs
64 | {'SE-ResNet-50-mcn.mat', [224 224], {'I', 'AU', 22.37, 6.36}},...
65 | {'SE-ResNet-101-mcn.mat', [224 224], {'I', 'AU', 21.75, 5.72}},...
66 | {'SE-ResNet-152-mcn.mat', [224 224], {'I', 'AU', 21.34, 5.54}},...
67 | {'SE-ResNeXt-50-32x4d-mcn.mat', [224 224], {'I', 'AU', 20.97, 5.54}},...
68 | {'SE-ResNeXt-101-32x4d-mcn.mat', [224 224], {'I', 'AU', 19.81, 4.96}},...
69 | {'SENet-mcn.mat', [224 224], {'I', 'AU', 18.68, 4.47}},...
70 | {'SE-BN-Inception-mcn.mat', [224 224], {'I', 'AU', 23.62, 7.04}, 1}... % breaks on most inputs
71 | {'densenet121-pt-mcn.mat', [224 224], {'I', 'PT', 25.35, 7.83}, 1:0.5:3},...
72 | {'densenet161-pt-mcn.mat', [224 224], {'I', 'PT', 22.35, 6.20}, 1:0.5:3},...
73 | {'densenet169-pt-mcn.mat', [224 224], {'I', 'PT', 24.00, 7.00}, 1:0.5:3},...
74 | {'densenet201-pt-mcn.mat', [224 224], {'I', 'PT', 22.80, 6.43}, 1:0.5:3},...
75 | }] ;
76 | models = [ models { ...
77 | {'imagenet-matconvnet-alex.mat', [227 227], {'I', 'MCN', 41.8, 19.2}}, ...
78 | {'imagenet-caffe-ref.mat', [224 224], {'I', 'MCN', 42.6, 19.7}},...
79 | {'squeezenet1_0-pt-mcn.mat', [224 224], {'I', 'PT', 41.90, 19.58}},...
80 | {'squeezenet1_1-pt-mcn.mat', [224 224], {'I', 'PT', 41.81, 19.38}}...
81 | {'imagenet-vgg-f.mat', [224 224], {'I', 'MCN', 41.4, 19.1}},...
82 | {'imagenet-vgg-m.mat', [224 224], {'I', 'MCN', 36.9, 15.5}},...
83 | {'imagenet-vgg-s.mat', [224 224], {'I', 'MCN', 37.0, 15.8}},...
84 | {'imagenet-vgg-m-2048.mat', [224 224], {'I', 'MCN', 37.1, 15.8}},...
85 | {'imagenet-vgg-m-1024.mat', [224 224], {'I', 'MCN', 37.8, 16.1}},...
86 | {'imagenet-vgg-m-128.mat', [224 224], {'I', 'MCN', 40.8, 18.4}},...
87 | {'vgg-vd-16-reduced.mat', [224 224], {'I', 'N/A', '', ''}},...
88 | {'imagenet-vgg-verydeep-16.mat', [224 224], {'I', 'MCN', 28.5, 9.9}},...
89 | {'imagenet-vgg-verydeep-19.mat', [224 224], {'I', 'MCN', 28.7, 9.9}},...
90 | {'imagenet-googlenet-dag.mat', [224 224], {'I', 'MCN', 34.2, 12.9}},...
91 | {'resnet18-pt-mcn.mat', [224 224], {'I', 'PT', 30.24, 10.92}},...
92 | {'resnet34-pt-mcn.mat', [224 224], {'I', 'PT', 26.70, 8.58}},...
93 | {'imagenet-resnet-50-dag.mat', [224 224], {'I', 'MCN', 24.6, 7.7}},...
94 | {'imagenet-resnet-101-dag.mat', [224 224], {'I', 'MCN', 23.4, 7.0}},...
95 | {'imagenet-resnet-152-dag.mat', [224 224], {'I', 'MCN', 23.0, 6.7}},...
96 | {'resnext_50_32x4d-pt-mcn.mat', [224 224], {'I', 'L1', 22.6, 6.49}},...
97 | {'resnext_101_32x4d-pt-mcn.mat', [224 224], {'I', 'L1', 21.55, 5.93}},...
98 | {'resnext_101_64x4d-pt-mcn.mat', [224 224], {'I', 'PT', 20.81, 5.66}},...
99 | {'inception_v3-pt-mcn.mat', [299 299], {'I', 'PT', 22.55, 6.44}, 1:0.5:3},... % breaks on small inputs
100 | {'SE-ResNet-50-mcn.mat', [224 224], {'I', 'AU', 22.37, 6.36}},...
101 | {'SE-ResNet-101-mcn.mat', [224 224], {'I', 'AU', 21.75, 5.72}},...
102 | {'SE-ResNet-152-mcn.mat', [224 224], {'I', 'AU', 21.34, 5.54}},...
103 | {'SE-ResNeXt-50-32x4d-mcn.mat', [224 224], {'I', 'AU', 20.97, 5.54}},...
104 | {'SE-ResNeXt-101-32x4d-mcn.mat', [224 224], {'I', 'AU', 19.81, 4.96}},...
105 | {'SENet-mcn.mat', [224 224], {'I', 'AU', 18.68, 4.47}},...
106 | {'SE-BN-Inception-mcn.mat', [224 224], {'I', 'AU', 23.62, 7.04}, 1}... % breaks on most inputs
107 | {'densenet121-pt-mcn.mat', [224 224], {'I', 'PT', 25.35, 7.83}, 1:0.5:3},...
108 | {'densenet161-pt-mcn.mat', [224 224], {'I', 'PT', 22.35, 6.20}, 1:0.5:3},...
109 | {'densenet169-pt-mcn.mat', [224 224], {'I', 'PT', 24.00, 7.00}, 1:0.5:3},...
110 | {'densenet201-pt-mcn.mat', [224 224], {'I', 'PT', 22.80, 6.43}, 1:0.5:3},...
111 | } ] ;
112 | logName = [ logName '-cls'] ;
113 | models = [{...
114 | {'mcn-mobilenet.mat', [224 224], {'I', 'AU', 29.4, '-'}, 1:0.5:3}, ...
115 | }] ;
116 | % fix later
117 | % {'mcn-mobilenet-v2.mat', [224 224], {'I', 'AU', 29.4, '-'}, 1:0.5:3} ...
118 | end
119 |
120 | if opts.includeObjDetectors
121 | models = [ models { ...
122 | {'ssd-pascal-mobilenet-ft.mat', [300 300]}, ...
123 | {'rfcn-res50-pascal', [600 850]}, ...
124 | {'rfcn-res101-pascal', [600 850]}, ...
125 | {'ssd-mcn-pascal-vggvd-300.mat', [300 300]}, ...
126 | {'ssd-mcn-pascal-vggvd-512.mat', [512 512]}, ...
127 | {'faster-rcnn-vggvd-pascal', [600 850]}, ...
128 | } ] ;
129 | logName = [logName '-det'] ;
130 | end
131 |
132 | if opts.includeSegmenters
133 | models = [ models { ...
134 | {'pascal-fcn32s-dag.mat', [384 384]}, ...
135 | {'pascal-fcn16s-dag.mat', [384 384]}, ...
136 | {'pascal-fcn8s-dag.mat', [384 384]}, ...
137 | {'deeplab-vggvd-v2.mat', [513 513]}, ...
138 | {'deeplab-res101-v2.mat', [513 513]}, ...
139 | } ] ;
140 | logName = [logName '-seg'] ;
141 | end
142 |
143 | if opts.includeKeypointDetectors
144 | models = [ models { ...
145 | {'multipose-mpi.mat', [368 368]}, ...
146 | {'multipose-coco.mat', [368 368]}, ...
147 | } ] ;
148 | logName = [logName '-key'] ;
149 | end
150 |
151 | if ~exist(opts.logDir, 'dir'), mkdir(opts.logDir) ; end
152 | logFile = fullfile(opts.logDir, [logName '.txt']) ;
153 | diary(logFile) ; diary on ;
154 |
155 | for ii = 1:numel(models)
156 | mm = models{ii} ;
157 | if numel(mm) == 4, sc = {'scales', mm{4}} ; else, sc = {} ; end
158 | modelPath = fullfile(opts.modelDir, mm{1}) ;
159 | burden('modelPath', modelPath, 'imsz', mm{2}, 'scores', mm{3}, sc{:}) ;
160 | end
161 | diary off ;
162 |
--------------------------------------------------------------------------------
/core/burden.m:
--------------------------------------------------------------------------------
1 | function burden(varargin)
2 | %BURDEN compute memory and computational burden of network %
3 | % Copyright (C) 2017 Samuel Albanie
4 | % Licensed under The MIT License [see LICENSE.md for details]
5 |
6 | opts.gpus = 1 ;
7 | opts.helper = [] ;
8 | opts.imsz = [224 224] ;
9 | opts.type = 'single' ;
10 | opts.scores = {} ;
11 | opts.batchSize = 128 ;
12 | opts.lastConvFeats = '' ;
13 | opts.scales = 0.5:0.5:3 ;
14 | opts.reportDir = fullfile(vl_rootnn, 'contrib/convnet-burden/reports') ;
15 | opts.modelPath = 'data/models-import/imagenet-matconvnet-alex.mat' ;
16 | opts = vl_argparse(opts, varargin) ;
17 |
18 | useGpu = numel(opts.gpus) > 0 ; dag = loadDagNN(opts) ;
19 |
20 | % set options which are specific to current model
21 | [~,modelName,~] = fileparts(opts.modelPath) ;
22 | modelOpts.name = modelName ; modelOpts.inputVars = dag.getInputs() ;
23 | modelOpts.lastConvFeats = getLastFullyConv(modelName, opts) ;
24 | opts.modelOpts = modelOpts ; out = toAutonn(dag, opts) ; net = Net(out{:}) ;
25 |
26 | if useGpu, net.move('gpu') ; end
27 | imsz = opts.imsz ;
28 | baseParams = computeBurden(net, 'params', imsz, opts) ;
29 | base.paramMem = sum(baseParams) ;
30 | [featMem,flops] = computeBurden(net, 'full', imsz, opts) ;
31 | base.featMem = sum(featMem) ; base.flops = sum(flops) ;
32 | base.scores = opts.scores ;
33 | plotProfile(baseParams, featMem, flops, opts) ;
34 |
35 | % find fully convolutional component
36 | if ~isempty(modelOpts.lastConvFeats)
37 | for ii = 1:numel(out) % to avoid hardcoding head ordering, try them in turn
38 | try tail = out{ii}.find(modelOpts.lastConvFeats, 1) ; break
39 | catch ME, tail = [] ; %#ok -> continue to try remaining heads
40 | end
41 | end
42 | trunk = Net(tail) ;
43 | if useGpu, trunk.move('gpu') ; end
44 | else
45 | trunk = net ;
46 | end
47 | report(numel(opts.scales)).imsz = [] ;
48 |
49 | for ii = 1:numel(opts.scales)
50 | imsz_ = round(imsz * opts.scales(ii)) ;
51 | [mem_, flops_, lastSz] = computeBurden(trunk, 'feats', imsz_, opts) ;
52 | mem = sum(mem_) * opts.batchSize ; flops = sum(flops_) * opts.batchSize ;
53 | report(ii).imsz = sprintf('%d x %d', imsz_) ;
54 | report(ii).flops = readableFlops(flops) ;
55 | report(ii).featMem = readableMemory(mem) ;
56 | report(ii).featSz = sprintf('%d x %d x %d', lastSz) ;
57 | end
58 | printReport(base, report, opts) ;
59 | if useGpu, trunk.move('cpu') ; end
60 |
61 | % --------------------------------------
62 | function printReport(base, report, opts)
63 | % --------------------------------------
64 | modelName = readableName(opts.modelOpts.name) ;
65 |
66 | % produce readable output
67 | header = sprintf('Report for %s\n', modelName) ;
68 | fprintf('%s\n', repmat('-', 1, numel(header))) ;
69 | fprintf(header) ;
70 | fprintf('Data type of feats and params: %s\n', opts.type) ; % for humans
71 | fprintf('Memory used by params: %s\n', readableMemory(base.paramMem)) ;
72 |
73 | msg1 = 'Computing burden for single item batch at imsz %s: \n' ;
74 | msg2 = ' Memory consumed by full feats: %s\n' ;
75 | msg3 = ' Estimated total flops: %s\n' ;
76 | baseImsz = report(opts.scales ==1).imsz ;
77 | fprintf(msg1, baseImsz) ;
78 | fprintf(msg2, readableMemory(base.featMem)) ;
79 | fprintf(msg3, readableFlops(base.flops)) ;
80 |
81 | msg1 = 'Computing burden for %d item batch at imsz %s: \n' ;
82 | msg2 = ' Memory consumed by full feats: %s\n' ;
83 | msg3 = ' Estimated total flops: %s\n' ;
84 | fprintf(msg1, opts.batchSize, baseImsz) ;
85 | fprintf(msg2, readableMemory(opts.batchSize*base.featMem)) ;
86 | fprintf(msg3, readableFlops(base.flops * opts.batchSize)) ;
87 |
88 | % produce output for shared table
89 | detailedReport = sprintf('reports/%s.md', modelName) ;
90 | stats = {readableMemory(base.paramMem), ...
91 | readableMemory(base.featMem), ...
92 | readableFlops(base.flops), ...
93 | readableScores(base.scores)} ; % note: scores adds two columns
94 | markdown = 'MD:: | [%s](%s) | %s | %s | %s | %s | %s |\n' ;
95 | fprintf(markdown, modelName, detailedReport, baseImsz, stats{:}) ;
96 |
97 | fprintf('%s\n', repmat('-', 1, numel(header))) ;
98 | msg = '\nFeature extraction burden at %s with batch size %d: \n\n' ;
99 | fprintf(msg, opts.modelOpts.lastConvFeats, opts.batchSize) ;
100 | disp(struct2table(report)) ;
101 |
102 | % generate detailed report for feature extraction
103 | if ~exist(opts.reportDir, 'dir'), mkdir(opts.reportDir) ; end
104 | reportPath = fullfile(opts.reportDir, sprintf('%s.md', modelName)) ;
105 | header = '### Report for %s\n' ;
106 | body = ['Model params %s \n\n' ...
107 | 'Estimates for a single full pass of model at input size %s: \n' ...
108 | '\n' ...
109 | '* Memory required for features: %s \n' ...
110 | '* Flops: %s \n' ...
111 | '\n' ...
112 | 'Estimates are given below of the burden of computing the `%s` ' ...
113 | 'features in the network for different input sizes using a '...
114 | 'batch size of %d: \n\n'] ;
115 | bodyArgs = {readableMemory(base.paramMem), baseImsz, ...
116 | readableMemory(base.featMem), readableFlops(base.flops), ...
117 | opts.modelOpts.lastConvFeats, opts.batchSize} ;
118 |
119 | tableHeader = ['| input size | feature size | feature memory | flops | \n' ...
120 | '|------------|--------------|----------------|-------| \n'] ;
121 | tableRow = '| %s | %s | %s | %s |\n' ;
122 | graphDescription = ['\nA rough outline of where in the network memory is ' ...
123 | 'allocated to parameters and features and where the greatest computational '...
124 | 'cost lies is shown below. The x-axis does not show labels (it becomes hard' ...
125 | ' to read for networks containing hundreds of layers) - it should be ' ...
126 | 'interpreted as depicting increasing depth from left to right. The goal is' ...
127 | ' simply to give some idea of the overall profile of the model: \n\n'] ;
128 | graph = '\n' ;
129 |
130 | fid = fopen(reportPath, 'w') ;
131 | fprintf(fid, header, modelName) ;
132 | fprintf(fid, body, bodyArgs{:}) ;
133 | fprintf(fid, tableHeader) ;
134 | for ii = 1:numel(report)
135 | rec = report(ii) ;
136 | fprintf(fid, tableRow, rec.imsz, rec.featSz, rec.featMem, rec.flops) ;
137 | end
138 | fprintf(fid, graphDescription) ;
139 | fprintf(fid, graph, modelName, modelName) ;
140 | fclose(fid) ;
141 |
142 | % ----------------------------------------------------
143 | function plotProfile(baseParams, featMem, flops, opts)
144 | % ----------------------------------------------------
145 | subplot(3,1,1) ;
146 | [~,units,factor] = readableMemory(max(baseParams)) ;
147 | scaledParams = baseParams ./ factor ;
148 | bar(scaledParams, 'FaceAlpha', 0.6, 'edgecolor','none') ;
149 | title('Parameter memory profile') ; set(gca,'xtick',[]) ;
150 | ylabel(sprintf('memory (%s)', units)) ;
151 |
152 | subplot(3,1,2) ;
153 | [~,units,factor] = readableMemory(max(featMem)) ;
154 | scaledFeats = featMem ./ factor ;
155 | bar(scaledFeats, 'FaceAlpha', 0.4, 'FaceColor', 'r', 'edgecolor','none') ;
156 | title('Feature memory profile') ; set(gca,'xtick',[]) ;
157 | ylabel(sprintf('memory (%s)', units)) ;
158 |
159 | subplot(3,1,3) ;
160 | [~,units,factor] = readableFlops(max(flops)) ;
161 | scaledFlops = flops ./ factor ;
162 | bar(scaledFlops, 'FaceAlpha', 0.3, 'FaceColor', 'm', 'edgecolor','none') ;
163 | title('Flops profile') ; set(gca,'xtick',[]) ;
164 | ylabel(sprintf('%sFLOPS', units)) ; xlabel('depth') ;
165 | figDir = fullfile(opts.reportDir, 'figs') ;
166 | if ~exist(figDir, 'dir'), mkdir(figDir) ; end
167 | figName = sprintf('%s.png', readableName(opts.modelOpts.name)) ;
168 | figPath = fullfile(figDir, figName) ;
169 | print(figPath, '-dpng') ;
170 |
171 | % -------------------------------------
172 | function name = readableName(modelName)
173 | % -------------------------------------
174 | % READABLENAME(MODELNAME) renames the model to its canonical name
175 | % for easier reading
176 |
177 | name = strrep(modelName, '_', '-') ; % use consistent separators
178 | name = strrep(name, 'imagenet-', '') ; % clean up prefixes
179 | name = strrep(name, '-pt-mcn', '') ; % clean up suffixes
180 | name = strrep(name, '-mcn', '') ;
181 | name = strrep(name, '-dag', '') ;
182 | name = strrep(name, 'verydeep', 'vd') ; % consistent naming
183 | name = strrep(name, 'reduced', 'atrous') ;
184 |
185 | switch name % handle special cases
186 | case 'matconvnet-alex', name = 'alexnet' ;
187 | case 'caffe-ref', name = 'caffenet' ;
188 | end
189 |
190 | % ----------------------------------------------------
191 | function [memStr, units, factor] = readableMemory(mem)
192 | % ----------------------------------------------------
193 | % READABLEMEMORY(MEM) convert total raw bytes into more readable summary
194 | % based on J. Henriques' autonn varDisplay() function
195 |
196 | suffixes = {'B', 'KB', 'MB', 'GB', 'TB', 'PB', 'EB'} ;
197 | place = floor(log(mem) / log(1024)) ; % 0-based index into 'suffixes'
198 | place(mem == 0) = 0 ; % 0 bytes needs special handling
199 | num = mem ./ (1024 .^ place) ; memStr = num2str(num, '%.0f') ;
200 | memStr(:,end+1) = ' ' ; units = suffixes{max(1, place + 1)} ;
201 | memStr = [memStr, char(units)] ; factor = 1024^(max(place,1)) ;
202 | memStr(isnan(mem),:) = ' ' ; % leave invalid values blank
203 |
204 | % ------------------------------------------------------
205 | function scoreStr = readableScores(scores)
206 | % ------------------------------------------------------
207 | % READABLESCORES(SCORES) produce a summary string describing model
208 | % performance
209 | format = scores{1} ; res = scores(2:end) ;
210 | switch format
211 | case 'I'
212 | if strcmp(res{1}, 'N/A')
213 | scoreStr = 'N/A | - / - ' ;
214 | else
215 | template = '%s |' ;
216 | if isa(res{2}, 'double')
217 | template = [template ' %.2f'] ;
218 | else
219 | template = [template ' %s'] ;
220 | end
221 | if isa(res{3}, 'double')
222 | template = [template ' / %.2f'] ;
223 | else
224 | template = [template ' / %s'] ;
225 | end
226 | scoreStr = sprintf(template, res{:}) ; % imagenet
227 | end
228 | end
229 |
230 | % ------------------------------------------------------
231 | function [flopStr, units, factor] = readableFlops(flops)
232 | % ------------------------------------------------------
233 | % READABLEFLOPS(FLOPS) convert total flops into more readable summary
234 |
235 | suffixes = {' ', 'K', 'M', 'G', 'T', 'P', 'E', 'Z', 'Y'} ;
236 | place = floor(log(flops) / log(1000)) ; % 0-based index into 'suffixes'
237 | place(flops == 0) = 0 ; % 0 bytes needs special handling
238 | num = flops ./ (1000 .^ place) ; flopStr = num2str(num, '%.0f') ;
239 | flopStr(:,end+1) = ' ' ; units = suffixes{max(1, place + 1)} ;
240 | flopStr = [flopStr, char(units) 'FLOPs'] ; factor = 1000^(max(place,1)) ;
241 | flopStr(isnan(flops),:) = ' ' ; % leave invalid values blank
242 |
243 | % --------------------------------
244 | function dag = loadDagNN(opts)
245 | % --------------------------------
246 | stored = load(opts.modelPath) ;
247 | if ~isfield(stored, 'params') % simplenn
248 | dag = dagnn.DagNN.fromSimpleNN(stored) ;
249 | else
250 | dag = dagnn.DagNN.loadobj(stored) ;
251 | end
252 |
253 | % --------------------------------
254 | function out = toAutonn(net, opts)
255 | % --------------------------------
256 | % provide required helper functions for custom architectures
257 |
258 | args = {net} ;
259 | if contains(opts.modelOpts.name, 'faster-rcnn')
260 | args = [args {@faster_rcnn_autonn_custom_fn}] ;
261 | elseif contains(opts.modelOpts.name, 'ssd')
262 | args = [args {@ssd_autonn_custom_fn}] ;
263 | elseif contains(opts.modelOpts.name, 'rfcn')
264 | args = [args {@rfcn_autonn_custom_fn}] ;
265 | %elsef contains(opts.modelOpts.name, {'SE', '-pt', '-fcn', 'deeplab-'})
266 | else
267 | args = [args {@extras_autonn_custom_fn}] ;
268 | end
269 | out = Layer.fromDagNN(args{:}) ;
270 |
271 | % -----------------------------------------------
272 | function last = getLastFullyConv(modelName, opts)
273 | % -----------------------------------------------
274 | %GETlASTCONV - find the last convolutional layer of the network
275 | % GETlASTCONV(OPTS) - looks up the last "fully convolutional"
276 | % layer of the network architecture. This is the last layer that can
277 | % be computed with any input image size (fully connected layers
278 | % typically break under varying input sizes). In this function the
279 | % last layer is "looked up" for common architectures as a convenience.
280 | % However, the user may also specify the name of the layer output
281 | % variable directly.
282 |
283 | last = opts.lastConvFeats ;
284 | if ~isempty(last) ; return ; end
285 | alexFamily = {'imagenet-matconvnet-alex', ...
286 | 'imagenet-vgg-f', ...
287 | 'imagenet-vgg-m', ...
288 | 'imagenet-vgg-s', ...
289 | 'imagenet-vgg-m-2048', ...
290 | 'imagenet-vgg-m-1024', ...
291 | 'imagenet-vgg-m-128', ...
292 | 'imagenet-caffe-ref', ...
293 | 'imagenet-vgg-verydeep-16', ...
294 | 'imagenet-vgg-verydeep-19', ...
295 | 'vgg-vd-16-reduced'} ;
296 | resnets = {'imagenet-resnet-50-dag', ...
297 | 'imagenet-resnet-101-dag', ...
298 | 'imagenet-resnet-152-dag'} ;
299 | small_resnets = {'resnet18-pt-mcn'} ;
300 | pt_imports = {'resnet34-pt-mcn', ...
301 | 'resnext_50_32x4d-pt-mcn', ...
302 | 'resnext_101_32x4d-pt-mcn', ...
303 | 'resnext_101_64x4d-pt-mcn'} ;
304 | fcns = {'pascal-fcn32s-dag', 'pascal-fcn16s-dag', 'pascal-fcn8s-dag'} ;
305 | squeezenets = {'squeezenet1_0-pt-mcn', 'squeezenet1_1-pt-mcn'} ;
306 | if ismember(modelName, alexFamily), last = 'pool5' ;
307 | elseif ismember(modelName, resnets), last = 'res5c_relu' ;
308 | elseif ismember(modelName, small_resnets), last = 'features_7_1_id_relu' ;
309 | elseif ismember(modelName, pt_imports), last = 'features_7_2_id_relu' ;
310 | elseif ismember(modelName, squeezenets), last = 'features_12_cat' ;
311 | elseif ismember(modelName, fcns), last = 'score_fr' ;
312 | elseif contains(modelName, 'googlenet'), last = 'icp9_out' ;
313 | elseif contains(modelName, 'multipose'), last = 'Mconv6_stage6_L2' ;
314 | elseif contains(modelName, 'faster-rcnn') || contains(modelName, 'rfcn')
315 | if contains(modelName, 'vggvd'), last = 'relu5_3' ; end
316 | if contains(modelName, 'res50'), last = 'res5c_relu' ; end
317 | if contains(modelName, 'res101'), last = 'res5c_relu' ; end
318 | elseif contains(modelName, 'ssd')
319 | if contains(modelName, 'vggvd'), last = 'relu4_3' ; end
320 | if contains(modelName, 'res50'), last = 'res5c_relu' ; end
321 | if contains(modelName, 'res101'), last = 'res5c_relu' ; end
322 | if contains(modelName, 'mobilenet'), last = 'conv17_2_relu' ; end
323 | elseif contains(modelName, 'inception'), last = 'features_19' ;
324 | elseif contains(modelName, 'SE-BN-Inception'), last = 'inception_5b_scale' ;
325 | elseif contains(modelName, 'SE'), last = 'conv5_3' ;
326 | elseif strcmp(modelName, 'deeplab-vggvd-v2'), last = 'fc8_interp' ;
327 | elseif strcmp(modelName, 'deeplab-res101-v2'), last = 'fc1_interp' ;
328 | elseif contains(modelName, 'densenet'), last = 'features_2' ;
329 | elseif contains(modelName, 'mcn-mobilenet'), last = 'fc7' ;
330 | else
331 | keyboard
332 | end
333 | msg = ['architecture not recognised, last fully convolutional layer must' ...
334 | ' be specified directly using the lastConvFeats option'] ;
335 | assert(~isempty(last), msg) ;
336 |
337 | % -----------------------------------------------------------------
338 | function [mem,flops,lastSz] = computeBurden(net, target, imsz, opts)
339 | % -----------------------------------------------------------------
340 |
341 | flops = 0 ; lastSz = [] ;
342 | last = opts.modelOpts.lastConvFeats ;
343 | params = [net.params.var] ;
344 | inputs = cellfun(@(x) net.inputs.(x), fieldnames(net.inputs))' ;
345 | feats = 3:2:numel(net.vars) ;
346 | keep = arrayfun(@(x) ~ismember(x, [params inputs]), feats) ;
347 | feats = feats(keep) ;
348 |
349 | switch target
350 | case 'params'
351 | p = params ; mem = computeMemory(net, p, opts) ; return
352 | case {'feats', 'full'}
353 | x = zeros([imsz 3], opts.type) ;
354 | if numel(opts.gpus), x = gpuArray(x) ; end
355 | inVars = opts.modelOpts.inputVars ; args = {inVars{1}, x} ;
356 | if ismember('im_info', inVars) && strcmp(target, 'full') % handle custom inputs
357 | args = [args {'im_info', [imsz 1]}] ;
358 | end
359 | net.eval(args, 'test') ; p = feats ; lastSz = size(net.getValue(last)) ;
360 | mem = computeMemory(net, p, opts) ; flops = computeFlops(net) ;
361 | otherwise, error('%s not recognised') ;
362 | end
363 |
364 | % ---------------------------------------
365 | function mem = computeMemory(net, p, opts)
366 | % ---------------------------------------
367 | switch opts.type
368 | case 'int8', bytes = 1 ;
369 | case 'uint8', bytes = 1 ;
370 | case 'int16', bytes = 2 ;
371 | case 'uint16', bytes = 2 ;
372 | case 'int32', bytes = 4 ;
373 | case 'uint32', bytes = 4 ;
374 | case 'int64', bytes = 8 ;
375 | case 'uint64', bytes = 8 ;
376 | case 'single', bytes = 4 ;
377 | case 'double', bytes = 8 ;
378 | otherwise, error('data type %s not recognised') ;
379 | end
380 | mem = arrayfun(@(x) numel(net.vars{x}), p) * bytes ;
381 |
382 | % -------------------------------------------
383 | function totals = computeFlops(net, varargin)
384 | % -------------------------------------------
385 | opts.includeExp = 0 ;
386 | opts = vl_argparse(opts, varargin) ;
387 |
388 | totals = zeros(1, numel(net.forward)) ;
389 | for ii = 1:numel(net.forward)
390 | layer = net.forward(ii) ;
391 | ins = gather(net.vars(layer.inputVars)) ;
392 | outs = gather(net.vars(layer.outputVar)) ;
393 | funcStr = func2str(layer.func) ;
394 | switch funcStr
395 | case 'vl_nnconv' % count fused multiply-adds
396 | hasBias = (numel(ins) == 3) ;
397 | flops = numel(outs{1}) * numel(ins{2}(:,:,:,1)) ;
398 | if hasBias, flops = flops + numel(outs{1}) ; end
399 | case 'vl_nnconvt'
400 | hasBias = (numel(ins) == 3) ;
401 | flops = numel(ins{1}) * numel(ins{2}(:,:,1,:)) ;
402 | if hasBias, flops = flops + numel(outs{1}) ; end
403 | case 'vl_nnrelu' % count as comparison + multiply
404 | flops = 2 * numel(outs{1}) ;
405 | case 'vl_nnpool' % assume two flops per location
406 | pos = find(cellfun(@(x) isequal(x, 'stride'), layer.args)) ;
407 | stride = layer.args{pos+1} ;
408 | flops = 2 * numel(outs{1}) * prod(stride) ;
409 | case 'vl_nnglobalpool' % FMA
410 | flops = numel(ins{1}) ;
411 | case 'vl_nnbnorm_wrapper', flops = 0 ; % assume merged at test time
412 | case 'vl_nnwsum', flops = numel(outs{1}) ; % count fused multiply-adds
413 | case 'vl_nnreshape', flops = 0 ; % essentially free
414 | case 'vl_nnflatten', flops = 0 ; % essentially free
415 | case 'vl_nncrop', flops = 0 ; % index slicing
416 | case 'permute', flops = 0 ; % expensive, but no flops
417 | case 'cat', flops = 0 ; % can be expensive, but no flops
418 | case 'size', flops = 0 ;
419 | case 'max', flops = numel(ins{1}) ; % comparisons
420 | case 'vl_nnproposalrpn', flops = 0 ; % would be too inaccurate
421 | case 'vl_nnmultiboxdetector', flops = 0 ; % would be too inaccurate
422 | case 'vl_nnpriorbox', flops = 0 ; % not worth computing
423 | case 'vl_nnroipool', flops = 0 ; % would be too inaccurate
424 | case 'vl_nnpsroipool', flops = 0 ; % would be too inaccurate
425 | case 'vl_nnmask', flops = 0 ; % dropout would be removed during inference
426 | case 'vl_nndropout_wrapper', flops = 0 ; % ditto
427 | case 'vl_nninterp', flops = 4 * numel(outs{1}) ;
428 | case 'vl_nnmax', flops = numel(outs{1}) * numel(ins) ;
429 | case {'vl_nnscalenorm', 'vl_nnnormalize'}
430 | outSz = size(outs{1}) ; % simplifying assumption: common norm factors
431 | normFactors = (1 + 1 + 2 * outSz(3)) * prod(outSz(1:2)) ;
432 | flops = numel(outs{1}) + normFactors ;
433 | case {'vl_nnsoftmax', 'vl_nnsoftmaxt'} % counting flops for exp is tricky
434 | if opts.includeExp
435 | flops = (2+1+5+1+2)*numel(outs{1}) ;
436 | else
437 | flops = 0 ;
438 | end
439 | case 'vl_nnsigmoid' % counting flops for exp is tricky
440 | if opts.includeExp, flops = 3*numel(outs{1}) ; else, flops = 0 ; end
441 | case 'vl_nnaxpy', flops = 2*numel(outs{1}) ; % use FMA
442 | case 'vl_nnscale', flops = numel(outs{1}) ; % use FMA
443 | case 'root', continue
444 | otherwise, error('layer %s not recognised', func2str(layer.func)) ;
445 | end
446 | totals(ii) = flops ;
447 | end
448 |
--------------------------------------------------------------------------------
/matlab/fcn_autonn_custom_fn.m:
--------------------------------------------------------------------------------
1 | function obj = fcn_autonn_custom_fn(block, inputs, ~)
2 | % FCN_AUTONN_CUSTOM_FN autonn custom layer converter
3 | %
4 | % Copyright (C) 2017 Samuel Albanie
5 | % Licensed under The MIT License [see LICENSE.md for details]
6 |
7 | switch class(block)
8 | case 'dagnn.Crop'
9 | obj = vl_nncrop_wrapper(inputs{1}, inputs{2}, block.crop) ;
10 | end
11 |
--------------------------------------------------------------------------------
/matlab/inception_autonn_custom_fn.m:
--------------------------------------------------------------------------------
1 | function obj = inception_autonn_custom_fn(block, inputs, ~)
2 | % INCEPTION_AUTONN_CUSTOM_FN autonn custom layer converter
3 | %
4 | % Copyright (C) 2017 Samuel Albanie
5 | % Licensed under The MIT License [see LICENSE.md for details]
6 |
7 | switch class(block)
8 | case 'dagnn.Permute'
9 | obj = Layer.create(@permute, {inputs{1}, block.order}) ;
10 | case 'dagnn.Flatten'
11 | obj = Layer.create(@vl_nnflatten, {inputs{1}, block.axis}) ;
12 | case 'dagnn.Reshape'
13 | obj = Layer.create(@vl_nnreshape, {inputs{1}, block.shape}) ;
14 | end
15 |
--------------------------------------------------------------------------------
/matlab/squeezenet_autonn_custom_fn.m:
--------------------------------------------------------------------------------
1 | function obj = squeezenet_autonn_custom_fn(block, inputs, ~)
2 | % SQUEEZENET_AUTONN_CUSTOM_FN autonn custom layer converter
3 | %
4 | % Copyright (C) 2017 Samuel Albanie
5 | % Licensed under The MIT License [see LICENSE.md for details]
6 |
7 | switch class(block)
8 | case 'dagnn.Permute'
9 | obj = Layer.create(@permute, {inputs{1}, block.order}) ;
10 | case 'dagnn.Flatten'
11 | obj = Layer.create(@vl_nnflatten, {inputs{1}, block.axis}) ;
12 | case 'dagnn.Reshape'
13 | obj = Layer.create(@vl_nnreshape, {inputs{1}, block.shape}) ;
14 | end
15 |
--------------------------------------------------------------------------------
/misc/generate_markdown.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | # generate_markdown.sh generates a set of markdown tables
3 | # for more a readable summary of convnet computational costs
4 | #
5 | # --------------------------------------------------------
6 | # convnet-burden
7 | # Licensed under The MIT License [see LICENSE.md for details]
8 | # Copyright (C) 2017 Samuel Albanie
9 | # --------------------------------------------------------
10 | add_links="false"
11 |
12 | function gen_table() {
13 | # generate markdown table summaries
14 |
15 | res=`cat $1 | grep "MD::" | cut -f 1 -d ' ' --complement`
16 | echo "$res"
17 |
18 | # clean up dataset prefixes
19 | res=$(echo "$res" | sed "s/imagenet-//g")
20 |
21 | # update model names
22 | res=$(echo "$res" | sed "s/vgg-verydeep-\([0-9+]\)/vgg-vd-\1/g")
23 | res=$(echo "$res" | sed "s/vgg-\([a-z+]\)/vgg-\1/g")
24 | res=$(echo "$res" | sed "s/ssd-mcn-pascal-vggvd-\([0-9+]\)/ssd-pascal-\1/g")
25 | res=$(echo "$res" | sed "s/resnet-\([0-9+]\)/resnet-\1/g")
26 | res="${res/matconvnet-alex/alexnet}"
27 | res="${res/caffe-ref/caffenet}"
28 |
29 | # clean up suffixes and mcn notation
30 | res=$(echo "$res" | sed "s/_/-/g")
31 | res=$(echo "$res" | sed "s/-dag//g")
32 | res=$(echo "$res" | sed "s/-pt-mcn//g")
33 |
34 | # add links to download models
35 | if [ "$add_links" = "true" ] ; then
36 | mcn_home="(http://www.vlfeat.org/matconvnet/pretrained/)"
37 | imported="(http://www.robots.ox.ac.uk/~albanie/models.html)"
38 | res=$(echo "$res" | awk '{$2="\\["$2"\\]" ; print}')
39 | res=$(echo "$res" | sed 's/\\/ /g')
40 | res="${res//]/]$mcn_home}"
41 | echo "$res"
42 | else
43 | echo "$res"
44 | fi
45 | }
46 |
47 | # point this out the dir containing outputs of the compute_burdens.m script
48 | LOGDIR="${HOME}/coding/libs/mcn/contrib-matconvnet/data/burden"
49 |
50 | declare -a tasks=("cls" "det" "seg" "key")
51 | for sfx in "${tasks[@]}"
52 | do
53 | echo ""
54 | echo "task: ${sfx}"
55 | echo ""
56 | echo "| model | input size | param mem | feat. mem | flops | src | performance |"
57 | echo "|-------|------------|-----------|-----------|-------|-----|-------------|"
58 | gen_table "${LOGDIR}/log-${sfx}.txt"
59 | done
60 |
--------------------------------------------------------------------------------
/reports/SE-BN-Inception.md:
--------------------------------------------------------------------------------
1 | ### Report for SE-BN-Inception
2 | Model params 46 MB
3 |
4 | Estimates for a single full pass of model at input size 224 x 224:
5 |
6 | * Memory required for features: 43 MB
7 | * Flops: 2 GFLOPs
8 |
9 | Estimates are given below of the burden of computing the `inception_5b_scale` features in the network for different input sizes using a batch size of 128:
10 |
11 | | input size | feature size | feature memory | flops |
12 | |------------|--------------|----------------|-------|
13 | | 224 x 224 | 7 x 7 x 1024 | 5 GB | 262 GFLOPs |
14 |
15 | A rough outline of where in the network memory is allocated to parameters and features and where the greatest computational cost lies is shown below. The x-axis does not show labels (it becomes hard to read for networks containing hundreds of layers) - it should be interpreted as depicting increasing depth from left to right. The goal is simply to give some idea of the overall profile of the model:
16 |
17 | 
18 |
--------------------------------------------------------------------------------
/reports/SE-ResNeXt-101-32x4d.md:
--------------------------------------------------------------------------------
1 | ### Report for SE-ResNeXt-101-32x4d
2 | Model params 187 MB
3 |
4 | Estimates for a single full pass of model at input size 224 x 224:
5 |
6 | * Memory required for features: 197 MB
7 | * Flops: 8 GFLOPs
8 |
9 | Estimates are given below of the burden of computing the `conv5_3` features in the network for different input sizes using a batch size of 128:
10 |
11 | | input size | feature size | feature memory | flops |
12 | |------------|--------------|----------------|-------|
13 | | 112 x 112 | 4 x 4 x 2048 | 6 GB | 264 GFLOPs |
14 | | 224 x 224 | 7 x 7 x 2048 | 25 GB | 1 TFLOPs |
15 | | 336 x 336 | 11 x 11 x 2048 | 56 GB | 2 TFLOPs |
16 | | 448 x 448 | 14 x 14 x 2048 | 98 GB | 4 TFLOPs |
17 | | 560 x 560 | 18 x 18 x 2048 | 154 GB | 6 TFLOPs |
18 | | 672 x 672 | 21 x 21 x 2048 | 221 GB | 9 TFLOPs |
19 |
20 | A rough outline of where in the network memory is allocated to parameters and features and where the greatest computational cost lies is shown below. The x-axis does not show labels (it becomes hard to read for networks containing hundreds of layers) - it should be interpreted as depicting increasing depth from left to right. The goal is simply to give some idea of the overall profile of the model:
21 |
22 | 
23 |
--------------------------------------------------------------------------------
/reports/SE-ResNeXt-50-32x4d.md:
--------------------------------------------------------------------------------
1 | ### Report for SE-ResNeXt-50-32x4d
2 | Model params 105 MB
3 |
4 | Estimates for a single full pass of model at input size 224 x 224:
5 |
6 | * Memory required for features: 132 MB
7 | * Flops: 4 GFLOPs
8 |
9 | Estimates are given below of the burden of computing the `conv5_3` features in the network for different input sizes using a batch size of 128:
10 |
11 | | input size | feature size | feature memory | flops |
12 | |------------|--------------|----------------|-------|
13 | | 112 x 112 | 4 x 4 x 2048 | 4 GB | 144 GFLOPs |
14 | | 224 x 224 | 7 x 7 x 2048 | 16 GB | 547 GFLOPs |
15 | | 336 x 336 | 11 x 11 x 2048 | 37 GB | 1 TFLOPs |
16 | | 448 x 448 | 14 x 14 x 2048 | 66 GB | 2 TFLOPs |
17 | | 560 x 560 | 18 x 18 x 2048 | 103 GB | 3 TFLOPs |
18 | | 672 x 672 | 21 x 21 x 2048 | 148 GB | 5 TFLOPs |
19 |
20 | A rough outline of where in the network memory is allocated to parameters and features and where the greatest computational cost lies is shown below. The x-axis does not show labels (it becomes hard to read for networks containing hundreds of layers) - it should be interpreted as depicting increasing depth from left to right. The goal is simply to give some idea of the overall profile of the model:
21 |
22 | 
23 |
--------------------------------------------------------------------------------
/reports/SE-ResNet-101.md:
--------------------------------------------------------------------------------
1 | ### Report for SE-ResNet-101
2 | Model params 189 MB
3 |
4 | Estimates for a single full pass of model at input size 224 x 224:
5 |
6 | * Memory required for features: 155 MB
7 | * Flops: 8 GFLOPs
8 |
9 | Estimates are given below of the burden of computing the `conv5_3` features in the network for different input sizes using a batch size of 128:
10 |
11 | | input size | feature size | feature memory | flops |
12 | |------------|--------------|----------------|-------|
13 | | 112 x 112 | 4 x 4 x 2048 | 5 GB | 252 GFLOPs |
14 | | 224 x 224 | 7 x 7 x 2048 | 19 GB | 977 GFLOPs |
15 | | 336 x 336 | 11 x 11 x 2048 | 44 GB | 2 TFLOPs |
16 | | 448 x 448 | 14 x 14 x 2048 | 77 GB | 4 TFLOPs |
17 | | 560 x 560 | 18 x 18 x 2048 | 121 GB | 6 TFLOPs |
18 | | 672 x 672 | 21 x 21 x 2048 | 174 GB | 9 TFLOPs |
19 |
20 | A rough outline of where in the network memory is allocated to parameters and features and where the greatest computational cost lies is shown below. The x-axis does not show labels (it becomes hard to read for networks containing hundreds of layers) - it should be interpreted as depicting increasing depth from left to right. The goal is simply to give some idea of the overall profile of the model:
21 |
22 | 
23 |
--------------------------------------------------------------------------------
/reports/SE-ResNet-152.md:
--------------------------------------------------------------------------------
1 | ### Report for SE-ResNet-152
2 | Model params 255 MB
3 |
4 | Estimates for a single full pass of model at input size 224 x 224:
5 |
6 | * Memory required for features: 220 MB
7 | * Flops: 11 GFLOPs
8 |
9 | Estimates are given below of the burden of computing the `conv5_3` features in the network for different input sizes using a batch size of 128:
10 |
11 | | input size | feature size | feature memory | flops |
12 | |------------|--------------|----------------|-------|
13 | | 112 x 112 | 4 x 4 x 2048 | 7 GB | 372 GFLOPs |
14 | | 224 x 224 | 7 x 7 x 2048 | 27 GB | 1 TFLOPs |
15 | | 336 x 336 | 11 x 11 x 2048 | 62 GB | 3 TFLOPs |
16 | | 448 x 448 | 14 x 14 x 2048 | 110 GB | 6 TFLOPs |
17 | | 560 x 560 | 18 x 18 x 2048 | 171 GB | 9 TFLOPs |
18 | | 672 x 672 | 21 x 21 x 2048 | 246 GB | 13 TFLOPs |
19 |
20 | A rough outline of where in the network memory is allocated to parameters and features and where the greatest computational cost lies is shown below. The x-axis does not show labels (it becomes hard to read for networks containing hundreds of layers) - it should be interpreted as depicting increasing depth from left to right. The goal is simply to give some idea of the overall profile of the model:
21 |
22 | 
23 |
--------------------------------------------------------------------------------
/reports/SE-ResNet-50.md:
--------------------------------------------------------------------------------
1 | ### Report for SE-ResNet-50
2 | Model params 107 MB
3 |
4 | Estimates for a single full pass of model at input size 224 x 224:
5 |
6 | * Memory required for features: 103 MB
7 | * Flops: 4 GFLOPs
8 |
9 | Estimates are given below of the burden of computing the `conv5_3` features in the network for different input sizes using a batch size of 128:
10 |
11 | | input size | feature size | feature memory | flops |
12 | |------------|--------------|----------------|-------|
13 | | 112 x 112 | 4 x 4 x 2048 | 3 GB | 132 GFLOPs |
14 | | 224 x 224 | 7 x 7 x 2048 | 13 GB | 499 GFLOPs |
15 | | 336 x 336 | 11 x 11 x 2048 | 29 GB | 1 TFLOPs |
16 | | 448 x 448 | 14 x 14 x 2048 | 51 GB | 2 TFLOPs |
17 | | 560 x 560 | 18 x 18 x 2048 | 80 GB | 3 TFLOPs |
18 | | 672 x 672 | 21 x 21 x 2048 | 115 GB | 4 TFLOPs |
19 |
20 | A rough outline of where in the network memory is allocated to parameters and features and where the greatest computational cost lies is shown below. The x-axis does not show labels (it becomes hard to read for networks containing hundreds of layers) - it should be interpreted as depicting increasing depth from left to right. The goal is simply to give some idea of the overall profile of the model:
21 |
22 | 
23 |
--------------------------------------------------------------------------------
/reports/SENet.md:
--------------------------------------------------------------------------------
1 | ### Report for SENet
2 | Model params 440 MB
3 |
4 | Estimates for a single full pass of model at input size 224 x 224:
5 |
6 | * Memory required for features: 347 MB
7 | * Flops: 21 GFLOPs
8 |
9 | Estimates are given below of the burden of computing the `conv5_3` features in the network for different input sizes using a batch size of 128:
10 |
11 | | input size | feature size | feature memory | flops |
12 | |------------|--------------|----------------|-------|
13 | | 112 x 112 | 4 x 4 x 2048 | 11 GB | 684 GFLOPs |
14 | | 224 x 224 | 7 x 7 x 2048 | 43 GB | 3 TFLOPs |
15 | | 336 x 336 | 11 x 11 x 2048 | 98 GB | 6 TFLOPs |
16 | | 448 x 448 | 14 x 14 x 2048 | 173 GB | 11 TFLOPs |
17 | | 560 x 560 | 18 x 18 x 2048 | 271 GB | 17 TFLOPs |
18 | | 672 x 672 | 21 x 21 x 2048 | 390 GB | 24 TFLOPs |
19 |
20 | A rough outline of where in the network memory is allocated to parameters and features and where the greatest computational cost lies is shown below. The x-axis does not show labels (it becomes hard to read for networks containing hundreds of layers) - it should be interpreted as depicting increasing depth from left to right. The goal is simply to give some idea of the overall profile of the model:
21 |
22 | 
23 |
--------------------------------------------------------------------------------
/reports/alexnet.md:
--------------------------------------------------------------------------------
1 | ### Report for alexnet
2 | Model params 233 MB
3 |
4 | Estimates for a single full pass of model at input size 227 x 227:
5 |
6 | * Memory required for features: 3 MB
7 | * Flops: 727 MFLOPs
8 |
9 | Estimates are given below of the burden of computing the `pool5` features in the network for different input sizes using a batch size of 128:
10 |
11 | | input size | feature size | feature memory | flops |
12 | |------------|--------------|----------------|-------|
13 | | 114 x 114 | 2 x 2 x 256 | 73 MB | 15 GFLOPs |
14 | | 227 x 227 | 6 x 6 x 256 | 377 MB | 86 GFLOPs |
15 | | 341 x 341 | 9 x 9 x 256 | 872 MB | 200 GFLOPs |
16 | | 454 x 454 | 13 x 13 x 256 | 2 GB | 361 GFLOPs |
17 | | 568 x 568 | 16 x 16 x 256 | 2 GB | 572 GFLOPs |
18 | | 681 x 681 | 20 x 20 x 256 | 4 GB | 829 GFLOPs |
19 |
20 | A rough outline of where in the network memory is allocated to parameters and features and where the greatest computational cost lies is shown below. The x-axis does not show labels (it becomes hard to read for networks containing hundreds of layers) - it should be interpreted as depicting increasing depth from left to right. The goal is simply to give some idea of the overall profile of the model:
21 |
22 | 
23 |
--------------------------------------------------------------------------------
/reports/caffe-ref.md:
--------------------------------------------------------------------------------
1 | ### Report for caffe-ref
2 | Model params 233 MB
3 | Estimates for a single full pass of model at input size 224 x 224:
4 |
5 | * Memory required for features: 3 MB
6 | * Flops: 724 MFLOPS
7 |
8 | Estimates are given below of the burden of computing the `pool5` features in the network for different input sizes:
9 |
10 | | input size | feature size | feature memory | flops |
11 | | 112 x 112 | 3 x 3 x 256 | 97 MB | 19 GFLOPS |
12 | | 224 x 224 | 6 x 6 x 256 | 427 MB | 85 GFLOPS |
13 | | 336 x 336 | 10 x 10 x 256 | 995 MB | 199 GFLOPS |
14 | | 448 x 448 | 13 x 13 x 256 | 2 GB | 360 GFLOPS |
15 | | 560 x 560 | 17 x 17 x 256 | 3 GB | 569 GFLOPS |
16 | | 672 x 672 | 20 x 20 x 256 | 4 GB | 826 GFLOPS |
17 |
18 | A rough outline of where in the network memory is allocated to parameters and features and where the greatest computational cost lies is shown below. The x-axis does not show labels (it becomes hard to read with the networks containing hundreds of layers) - it should be interpreted as depicting increasing depth from left to right. The goal is to give some idea of the overall profile of the model:
19 | 
20 |
--------------------------------------------------------------------------------
/reports/caffenet.md:
--------------------------------------------------------------------------------
1 | ### Report for caffenet
2 | Model params 233 MB
3 |
4 | Estimates for a single full pass of model at input size 224 x 224:
5 |
6 | * Memory required for features: 3 MB
7 | * Flops: 724 MFLOPs
8 |
9 | Estimates are given below of the burden of computing the `pool5` features in the network for different input sizes using a batch size of 128:
10 |
11 | | input size | feature size | feature memory | flops |
12 | |------------|--------------|----------------|-------|
13 | | 112 x 112 | 3 x 3 x 256 | 97 MB | 19 GFLOPs |
14 | | 224 x 224 | 6 x 6 x 256 | 427 MB | 85 GFLOPs |
15 | | 336 x 336 | 10 x 10 x 256 | 995 MB | 199 GFLOPs |
16 | | 448 x 448 | 13 x 13 x 256 | 2 GB | 360 GFLOPs |
17 | | 560 x 560 | 17 x 17 x 256 | 3 GB | 569 GFLOPs |
18 | | 672 x 672 | 20 x 20 x 256 | 4 GB | 826 GFLOPs |
19 |
20 | A rough outline of where in the network memory is allocated to parameters and features and where the greatest computational cost lies is shown below. The x-axis does not show labels (it becomes hard to read for networks containing hundreds of layers) - it should be interpreted as depicting increasing depth from left to right. The goal is simply to give some idea of the overall profile of the model:
21 |
22 | 
23 |
--------------------------------------------------------------------------------
/reports/deeplab-res101-v2.md:
--------------------------------------------------------------------------------
1 | ### Report for deeplab-res101-v2
2 | Model params 505 MB
3 |
4 | Estimates for a single full pass of model at input size 513 x 513:
5 |
6 | * Memory required for features: 4 GB
7 | * Flops: 346 GFLOPs
8 |
9 | Estimates are given below of the burden of computing the `fc1_interp` features in the network for different input sizes using a batch size of 128:
10 |
11 | | input size | feature size | feature memory | flops |
12 | |------------|--------------|----------------|-------|
13 | | 257 x 257 | 257 x 257 x 21 | 144 GB | 11 TFLOPs |
14 | | 513 x 513 | 513 x 513 x 21 | 557 GB | 44 TFLOPs |
15 | | 770 x 770 | 769 x 769 x 21 | 1 TB | 98 TFLOPs |
16 | | 1026 x 1026 | 1025 x 1025 x 21 | 2 TB | 174 TFLOPs |
17 | | 1283 x 1283 | 1281 x 1281 x 21 | 3 TB | 271 TFLOPs |
18 | | 1539 x 1539 | 1537 x 1537 x 21 | 5 TB | 389 TFLOPs |
19 |
20 | A rough outline of where in the network memory is allocated to parameters and features and where the greatest computational cost lies is shown below. The x-axis does not show labels (it becomes hard to read for networks containing hundreds of layers) - it should be interpreted as depicting increasing depth from left to right. The goal is simply to give some idea of the overall profile of the model:
21 |
22 | 
23 |
--------------------------------------------------------------------------------
/reports/deeplab-vggvd-v2.md:
--------------------------------------------------------------------------------
1 | ### Report for deeplab-vggvd-v2
2 | Model params 144 MB
3 |
4 | Estimates for a single full pass of model at input size 513 x 513:
5 |
6 | * Memory required for features: 755 MB
7 | * Flops: 202 GFLOPs
8 |
9 | Estimates are given below of the burden of computing the `fc8_interp` features in the network for different input sizes using a batch size of 128:
10 |
11 | | input size | feature size | feature memory | flops |
12 | |------------|--------------|----------------|-------|
13 | | 257 x 257 | 257 x 257 x 21 | 24 GB | 7 TFLOPs |
14 | | 513 x 513 | 513 x 513 x 21 | 94 GB | 26 TFLOPs |
15 | | 770 x 770 | 777 x 777 x 21 | 214 GB | 59 TFLOPs |
16 | | 1026 x 1026 | 1033 x 1033 x 21 | 378 GB | 104 TFLOPs |
17 | | 1283 x 1283 | 1289 x 1289 x 21 | 588 GB | 161 TFLOPs |
18 | | 1539 x 1539 | 1545 x 1545 x 21 | 844 GB | 231 TFLOPs |
19 |
20 | A rough outline of where in the network memory is allocated to parameters and features and where the greatest computational cost lies is shown below. The x-axis does not show labels (it becomes hard to read for networks containing hundreds of layers) - it should be interpreted as depicting increasing depth from left to right. The goal is simply to give some idea of the overall profile of the model:
21 |
22 | 
23 |
--------------------------------------------------------------------------------
/reports/densenet121.md:
--------------------------------------------------------------------------------
1 | ### Report for densenet121
2 | Model params 31 MB
3 |
4 | Estimates for a single full pass of model at input size 224 x 224:
5 |
6 | * Memory required for features: 126 MB
7 | * Flops: 3 GFLOPs
8 |
9 | Estimates are given below of the burden of computing the `features_2` features in the network for different input sizes using a batch size of 128:
10 |
11 | | input size | feature size | feature memory | flops |
12 | |------------|--------------|----------------|-------|
13 | | 224 x 224 | 1 x 1 x 1024 | 16 GB | 367 GFLOPs |
14 | | 336 x 336 | 1 x 1 x 1024 | 35 GB | 823 GFLOPs |
15 | | 448 x 448 | 2 x 2 x 1024 | 63 GB | 1 TFLOPs |
16 | | 560 x 560 | 2 x 2 x 1024 | 98 GB | 2 TFLOPs |
17 | | 672 x 672 | 3 x 3 x 1024 | 142 GB | 3 TFLOPs |
18 |
19 | A rough outline of where in the network memory is allocated to parameters and features and where the greatest computational cost lies is shown below. The x-axis does not show labels (it becomes hard to read for networks containing hundreds of layers) - it should be interpreted as depicting increasing depth from left to right. The goal is simply to give some idea of the overall profile of the model:
20 |
21 | 
22 |
--------------------------------------------------------------------------------
/reports/densenet161.md:
--------------------------------------------------------------------------------
1 | ### Report for densenet161
2 | Model params 110 MB
3 |
4 | Estimates for a single full pass of model at input size 224 x 224:
5 |
6 | * Memory required for features: 235 MB
7 | * Flops: 8 GFLOPs
8 |
9 | Estimates are given below of the burden of computing the `features_2` features in the network for different input sizes using a batch size of 128:
10 |
11 | | input size | feature size | feature memory | flops |
12 | |------------|--------------|----------------|-------|
13 | | 224 x 224 | 1 x 1 x 2208 | 29 GB | 997 GFLOPs |
14 | | 336 x 336 | 1 x 1 x 2208 | 66 GB | 2 TFLOPs |
15 | | 448 x 448 | 2 x 2 x 2208 | 118 GB | 4 TFLOPs |
16 | | 560 x 560 | 2 x 2 x 2208 | 183 GB | 6 TFLOPs |
17 | | 672 x 672 | 3 x 3 x 2208 | 265 GB | 9 TFLOPs |
18 |
19 | A rough outline of where in the network memory is allocated to parameters and features and where the greatest computational cost lies is shown below. The x-axis does not show labels (it becomes hard to read for networks containing hundreds of layers) - it should be interpreted as depicting increasing depth from left to right. The goal is simply to give some idea of the overall profile of the model:
20 |
21 | 
22 |
--------------------------------------------------------------------------------
/reports/densenet169.md:
--------------------------------------------------------------------------------
1 | ### Report for densenet169
2 | Model params 55 MB
3 |
4 | Estimates for a single full pass of model at input size 224 x 224:
5 |
6 | * Memory required for features: 152 MB
7 | * Flops: 3 GFLOPs
8 |
9 | Estimates are given below of the burden of computing the `features_2` features in the network for different input sizes using a batch size of 128:
10 |
11 | | input size | feature size | feature memory | flops |
12 | |------------|--------------|----------------|-------|
13 | | 224 x 224 | 1 x 1 x 1664 | 19 GB | 435 GFLOPs |
14 | | 336 x 336 | 1 x 1 x 1664 | 42 GB | 971 GFLOPs |
15 | | 448 x 448 | 2 x 2 x 1664 | 76 GB | 2 TFLOPs |
16 | | 560 x 560 | 2 x 2 x 1664 | 118 GB | 3 TFLOPs |
17 | | 672 x 672 | 3 x 3 x 1664 | 171 GB | 4 TFLOPs |
18 |
19 | A rough outline of where in the network memory is allocated to parameters and features and where the greatest computational cost lies is shown below. The x-axis does not show labels (it becomes hard to read for networks containing hundreds of layers) - it should be interpreted as depicting increasing depth from left to right. The goal is simply to give some idea of the overall profile of the model:
20 |
21 | 
22 |
--------------------------------------------------------------------------------
/reports/densenet201.md:
--------------------------------------------------------------------------------
1 | ### Report for densenet201
2 | Model params 77 MB
3 |
4 | Estimates for a single full pass of model at input size 224 x 224:
5 |
6 | * Memory required for features: 196 MB
7 | * Flops: 4 GFLOPs
8 |
9 | Estimates are given below of the burden of computing the `features_2` features in the network for different input sizes using a batch size of 128:
10 |
11 | | input size | feature size | feature memory | flops |
12 | |------------|--------------|----------------|-------|
13 | | 224 x 224 | 1 x 1 x 1920 | 25 GB | 556 GFLOPs |
14 | | 336 x 336 | 1 x 1 x 1920 | 55 GB | 1 TFLOPs |
15 | | 448 x 448 | 2 x 2 x 1920 | 98 GB | 2 TFLOPs |
16 | | 560 x 560 | 2 x 2 x 1920 | 152 GB | 3 TFLOPs |
17 | | 672 x 672 | 3 x 3 x 1920 | 221 GB | 5 TFLOPs |
18 |
19 | A rough outline of where in the network memory is allocated to parameters and features and where the greatest computational cost lies is shown below. The x-axis does not show labels (it becomes hard to read for networks containing hundreds of layers) - it should be interpreted as depicting increasing depth from left to right. The goal is simply to give some idea of the overall profile of the model:
20 |
21 | 
22 |
--------------------------------------------------------------------------------
/reports/faster-rcnn-vggvd-pascal.md:
--------------------------------------------------------------------------------
1 | ### Report for faster-rcnn-vggvd-pascal
2 | Model params 523 MB
3 |
4 | Estimates for a single full pass of model at input size 600 x 850:
5 |
6 | * Memory required for features: 600 MB
7 | * Flops: 172 GFLOPs
8 |
9 | Estimates are given below of the burden of computing the `relu5_3` features in the network for different input sizes using a batch size of 128:
10 |
11 | | input size | feature size | feature memory | flops |
12 | |------------|--------------|----------------|-------|
13 | | 300 x 425 | 19 x 27 x 512 | 18 GB | 5 TFLOPs |
14 | | 600 x 850 | 38 x 54 x 512 | 73 GB | 20 TFLOPs |
15 | | 900 x 1275 | 57 x 80 x 512 | 164 GB | 45 TFLOPs |
16 | | 1200 x 1700 | 75 x 107 x 512 | 292 GB | 80 TFLOPs |
17 | | 1500 x 2125 | 94 x 133 x 512 | 456 GB | 125 TFLOPs |
18 | | 1800 x 2550 | 113 x 160 x 512 | 657 GB | 181 TFLOPs |
19 |
20 | A rough outline of where in the network memory is allocated to parameters and features and where the greatest computational cost lies is shown below. The x-axis does not show labels (it becomes hard to read for networks containing hundreds of layers) - it should be interpreted as depicting increasing depth from left to right. The goal is simply to give some idea of the overall profile of the model:
21 |
22 | 
23 |
--------------------------------------------------------------------------------
/reports/figs/SE-BN-Inception.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/albanie/convnet-burden/9c7f31ba3bc108cb5b5c30a8fcd12afce07c207e/reports/figs/SE-BN-Inception.png
--------------------------------------------------------------------------------
/reports/figs/SE-ResNeXt-101-32x4d.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/albanie/convnet-burden/9c7f31ba3bc108cb5b5c30a8fcd12afce07c207e/reports/figs/SE-ResNeXt-101-32x4d.png
--------------------------------------------------------------------------------
/reports/figs/SE-ResNeXt-50-32x4d.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/albanie/convnet-burden/9c7f31ba3bc108cb5b5c30a8fcd12afce07c207e/reports/figs/SE-ResNeXt-50-32x4d.png
--------------------------------------------------------------------------------
/reports/figs/SE-ResNet-101.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/albanie/convnet-burden/9c7f31ba3bc108cb5b5c30a8fcd12afce07c207e/reports/figs/SE-ResNet-101.png
--------------------------------------------------------------------------------
/reports/figs/SE-ResNet-152.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/albanie/convnet-burden/9c7f31ba3bc108cb5b5c30a8fcd12afce07c207e/reports/figs/SE-ResNet-152.png
--------------------------------------------------------------------------------
/reports/figs/SE-ResNet-50.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/albanie/convnet-burden/9c7f31ba3bc108cb5b5c30a8fcd12afce07c207e/reports/figs/SE-ResNet-50.png
--------------------------------------------------------------------------------
/reports/figs/SENet.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/albanie/convnet-burden/9c7f31ba3bc108cb5b5c30a8fcd12afce07c207e/reports/figs/SENet.png
--------------------------------------------------------------------------------
/reports/figs/alexnet.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/albanie/convnet-burden/9c7f31ba3bc108cb5b5c30a8fcd12afce07c207e/reports/figs/alexnet.png
--------------------------------------------------------------------------------
/reports/figs/caffe-ref.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/albanie/convnet-burden/9c7f31ba3bc108cb5b5c30a8fcd12afce07c207e/reports/figs/caffe-ref.png
--------------------------------------------------------------------------------
/reports/figs/caffenet.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/albanie/convnet-burden/9c7f31ba3bc108cb5b5c30a8fcd12afce07c207e/reports/figs/caffenet.png
--------------------------------------------------------------------------------
/reports/figs/deeplab-res101-v2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/albanie/convnet-burden/9c7f31ba3bc108cb5b5c30a8fcd12afce07c207e/reports/figs/deeplab-res101-v2.png
--------------------------------------------------------------------------------
/reports/figs/deeplab-vggvd-v2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/albanie/convnet-burden/9c7f31ba3bc108cb5b5c30a8fcd12afce07c207e/reports/figs/deeplab-vggvd-v2.png
--------------------------------------------------------------------------------
/reports/figs/densenet121.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/albanie/convnet-burden/9c7f31ba3bc108cb5b5c30a8fcd12afce07c207e/reports/figs/densenet121.png
--------------------------------------------------------------------------------
/reports/figs/densenet161.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/albanie/convnet-burden/9c7f31ba3bc108cb5b5c30a8fcd12afce07c207e/reports/figs/densenet161.png
--------------------------------------------------------------------------------
/reports/figs/densenet169.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/albanie/convnet-burden/9c7f31ba3bc108cb5b5c30a8fcd12afce07c207e/reports/figs/densenet169.png
--------------------------------------------------------------------------------
/reports/figs/densenet201.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/albanie/convnet-burden/9c7f31ba3bc108cb5b5c30a8fcd12afce07c207e/reports/figs/densenet201.png
--------------------------------------------------------------------------------
/reports/figs/faster-rcnn-vggvd-pascal.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/albanie/convnet-burden/9c7f31ba3bc108cb5b5c30a8fcd12afce07c207e/reports/figs/faster-rcnn-vggvd-pascal.png
--------------------------------------------------------------------------------
/reports/figs/googlenet.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/albanie/convnet-burden/9c7f31ba3bc108cb5b5c30a8fcd12afce07c207e/reports/figs/googlenet.png
--------------------------------------------------------------------------------
/reports/figs/inception-v3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/albanie/convnet-burden/9c7f31ba3bc108cb5b5c30a8fcd12afce07c207e/reports/figs/inception-v3.png
--------------------------------------------------------------------------------
/reports/figs/matconvnet-alex.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/albanie/convnet-burden/9c7f31ba3bc108cb5b5c30a8fcd12afce07c207e/reports/figs/matconvnet-alex.png
--------------------------------------------------------------------------------
/reports/figs/mcn-mobilenet-v2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/albanie/convnet-burden/9c7f31ba3bc108cb5b5c30a8fcd12afce07c207e/reports/figs/mcn-mobilenet-v2.png
--------------------------------------------------------------------------------
/reports/figs/mcn-mobilenet.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/albanie/convnet-burden/9c7f31ba3bc108cb5b5c30a8fcd12afce07c207e/reports/figs/mcn-mobilenet.png
--------------------------------------------------------------------------------
/reports/figs/multipose-coco.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/albanie/convnet-burden/9c7f31ba3bc108cb5b5c30a8fcd12afce07c207e/reports/figs/multipose-coco.png
--------------------------------------------------------------------------------
/reports/figs/multipose-mpi.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/albanie/convnet-burden/9c7f31ba3bc108cb5b5c30a8fcd12afce07c207e/reports/figs/multipose-mpi.png
--------------------------------------------------------------------------------
/reports/figs/pascal-fcn16s.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/albanie/convnet-burden/9c7f31ba3bc108cb5b5c30a8fcd12afce07c207e/reports/figs/pascal-fcn16s.png
--------------------------------------------------------------------------------
/reports/figs/pascal-fcn32s.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/albanie/convnet-burden/9c7f31ba3bc108cb5b5c30a8fcd12afce07c207e/reports/figs/pascal-fcn32s.png
--------------------------------------------------------------------------------
/reports/figs/pascal-fcn8s.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/albanie/convnet-burden/9c7f31ba3bc108cb5b5c30a8fcd12afce07c207e/reports/figs/pascal-fcn8s.png
--------------------------------------------------------------------------------
/reports/figs/resnet-101.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/albanie/convnet-burden/9c7f31ba3bc108cb5b5c30a8fcd12afce07c207e/reports/figs/resnet-101.png
--------------------------------------------------------------------------------
/reports/figs/resnet-152.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/albanie/convnet-burden/9c7f31ba3bc108cb5b5c30a8fcd12afce07c207e/reports/figs/resnet-152.png
--------------------------------------------------------------------------------
/reports/figs/resnet-50.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/albanie/convnet-burden/9c7f31ba3bc108cb5b5c30a8fcd12afce07c207e/reports/figs/resnet-50.png
--------------------------------------------------------------------------------
/reports/figs/resnet18.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/albanie/convnet-burden/9c7f31ba3bc108cb5b5c30a8fcd12afce07c207e/reports/figs/resnet18.png
--------------------------------------------------------------------------------
/reports/figs/resnet34.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/albanie/convnet-burden/9c7f31ba3bc108cb5b5c30a8fcd12afce07c207e/reports/figs/resnet34.png
--------------------------------------------------------------------------------
/reports/figs/resnet50.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/albanie/convnet-burden/9c7f31ba3bc108cb5b5c30a8fcd12afce07c207e/reports/figs/resnet50.png
--------------------------------------------------------------------------------
/reports/figs/resnext-101-32x4d.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/albanie/convnet-burden/9c7f31ba3bc108cb5b5c30a8fcd12afce07c207e/reports/figs/resnext-101-32x4d.png
--------------------------------------------------------------------------------
/reports/figs/resnext-101-64x4d.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/albanie/convnet-burden/9c7f31ba3bc108cb5b5c30a8fcd12afce07c207e/reports/figs/resnext-101-64x4d.png
--------------------------------------------------------------------------------
/reports/figs/resnext-50-32x4d.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/albanie/convnet-burden/9c7f31ba3bc108cb5b5c30a8fcd12afce07c207e/reports/figs/resnext-50-32x4d.png
--------------------------------------------------------------------------------
/reports/figs/rfcn-res101-pascal.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/albanie/convnet-burden/9c7f31ba3bc108cb5b5c30a8fcd12afce07c207e/reports/figs/rfcn-res101-pascal.png
--------------------------------------------------------------------------------
/reports/figs/rfcn-res50-pascal.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/albanie/convnet-burden/9c7f31ba3bc108cb5b5c30a8fcd12afce07c207e/reports/figs/rfcn-res50-pascal.png
--------------------------------------------------------------------------------
/reports/figs/squeezenet1-0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/albanie/convnet-burden/9c7f31ba3bc108cb5b5c30a8fcd12afce07c207e/reports/figs/squeezenet1-0.png
--------------------------------------------------------------------------------
/reports/figs/squeezenet1-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/albanie/convnet-burden/9c7f31ba3bc108cb5b5c30a8fcd12afce07c207e/reports/figs/squeezenet1-1.png
--------------------------------------------------------------------------------
/reports/figs/ssd-mcn-pascal-vggvd-300.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/albanie/convnet-burden/9c7f31ba3bc108cb5b5c30a8fcd12afce07c207e/reports/figs/ssd-mcn-pascal-vggvd-300.png
--------------------------------------------------------------------------------
/reports/figs/ssd-mcn-pascal-vggvd-512.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/albanie/convnet-burden/9c7f31ba3bc108cb5b5c30a8fcd12afce07c207e/reports/figs/ssd-mcn-pascal-vggvd-512.png
--------------------------------------------------------------------------------
/reports/figs/ssd-pascal-mobilenet-ft.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/albanie/convnet-burden/9c7f31ba3bc108cb5b5c30a8fcd12afce07c207e/reports/figs/ssd-pascal-mobilenet-ft.png
--------------------------------------------------------------------------------
/reports/figs/ssd-pascal-vggvd-300.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/albanie/convnet-burden/9c7f31ba3bc108cb5b5c30a8fcd12afce07c207e/reports/figs/ssd-pascal-vggvd-300.png
--------------------------------------------------------------------------------
/reports/figs/ssd-pascal-vggvd-512.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/albanie/convnet-burden/9c7f31ba3bc108cb5b5c30a8fcd12afce07c207e/reports/figs/ssd-pascal-vggvd-512.png
--------------------------------------------------------------------------------
/reports/figs/vgg-f.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/albanie/convnet-burden/9c7f31ba3bc108cb5b5c30a8fcd12afce07c207e/reports/figs/vgg-f.png
--------------------------------------------------------------------------------
/reports/figs/vgg-m-1024.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/albanie/convnet-burden/9c7f31ba3bc108cb5b5c30a8fcd12afce07c207e/reports/figs/vgg-m-1024.png
--------------------------------------------------------------------------------
/reports/figs/vgg-m-128.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/albanie/convnet-burden/9c7f31ba3bc108cb5b5c30a8fcd12afce07c207e/reports/figs/vgg-m-128.png
--------------------------------------------------------------------------------
/reports/figs/vgg-m-2048.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/albanie/convnet-burden/9c7f31ba3bc108cb5b5c30a8fcd12afce07c207e/reports/figs/vgg-m-2048.png
--------------------------------------------------------------------------------
/reports/figs/vgg-m.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/albanie/convnet-burden/9c7f31ba3bc108cb5b5c30a8fcd12afce07c207e/reports/figs/vgg-m.png
--------------------------------------------------------------------------------
/reports/figs/vgg-s.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/albanie/convnet-burden/9c7f31ba3bc108cb5b5c30a8fcd12afce07c207e/reports/figs/vgg-s.png
--------------------------------------------------------------------------------
/reports/figs/vgg-vd-16-atrous.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/albanie/convnet-burden/9c7f31ba3bc108cb5b5c30a8fcd12afce07c207e/reports/figs/vgg-vd-16-atrous.png
--------------------------------------------------------------------------------
/reports/figs/vgg-vd-16.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/albanie/convnet-burden/9c7f31ba3bc108cb5b5c30a8fcd12afce07c207e/reports/figs/vgg-vd-16.png
--------------------------------------------------------------------------------
/reports/figs/vgg-vd-19.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/albanie/convnet-burden/9c7f31ba3bc108cb5b5c30a8fcd12afce07c207e/reports/figs/vgg-vd-19.png
--------------------------------------------------------------------------------
/reports/googlenet.md:
--------------------------------------------------------------------------------
1 | ### Report for googlenet
2 | Model params 51 MB
3 |
4 | Estimates for a single full pass of model at input size 224 x 224:
5 |
6 | * Memory required for features: 26 MB
7 | * Flops: 2 GFLOPs
8 |
9 | Estimates are given below of the burden of computing the `icp9_out` features in the network for different input sizes using a batch size of 128:
10 |
11 | | input size | feature size | feature memory | flops |
12 | |------------|--------------|----------------|-------|
13 | | 112 x 112 | 3 x 3 x 1024 | 805 MB | 50 GFLOPs |
14 | | 224 x 224 | 7 x 7 x 1024 | 3 GB | 205 GFLOPs |
15 | | 336 x 336 | 10 x 10 x 1024 | 7 GB | 457 GFLOPs |
16 | | 448 x 448 | 14 x 14 x 1024 | 13 GB | 819 GFLOPs |
17 | | 560 x 560 | 17 x 17 x 1024 | 20 GB | 1 TFLOPs |
18 | | 672 x 672 | 21 x 21 x 1024 | 29 GB | 2 TFLOPs |
19 |
20 | A rough outline of where in the network memory is allocated to parameters and features and where the greatest computational cost lies is shown below. The x-axis does not show labels (it becomes hard to read for networks containing hundreds of layers) - it should be interpreted as depicting increasing depth from left to right. The goal is simply to give some idea of the overall profile of the model:
21 |
22 | 
23 |
--------------------------------------------------------------------------------
/reports/inception-v3.md:
--------------------------------------------------------------------------------
1 | ### Report for inception-v3
2 | Model params 91 MB
3 |
4 | Estimates for a single full pass of model at input size 299 x 299:
5 |
6 | * Memory required for features: 89 MB
7 | * Flops: 6 GFLOPs
8 |
9 | Estimates are given below of the burden of computing the `features_19` features in the network for different input sizes using a batch size of 128:
10 |
11 | | input size | feature size | feature memory | flops |
12 | |------------|--------------|----------------|-------|
13 | | 299 x 299 | 1 x 1 x 2048 | 11 GB | 735 GFLOPs |
14 | | 449 x 449 | 1 x 1 x 2048 | 26 GB | 2 TFLOPs |
15 | | 598 x 598 | 2 x 2 x 2048 | 47 GB | 3 TFLOPs |
16 | | 748 x 748 | 2 x 2 x 2048 | 75 GB | 5 TFLOPs |
17 | | 897 x 897 | 3 x 3 x 2048 | 108 GB | 7 TFLOPs |
18 |
19 | A rough outline of where in the network memory is allocated to parameters and features and where the greatest computational cost lies is shown below. The x-axis does not show labels (it becomes hard to read for networks containing hundreds of layers) - it should be interpreted as depicting increasing depth from left to right. The goal is simply to give some idea of the overall profile of the model:
20 |
21 | 
22 |
--------------------------------------------------------------------------------
/reports/matconvnet-alex.md:
--------------------------------------------------------------------------------
1 | ### Report for matconvnet-alex
2 | Model params 233 MB
3 | Estimates for a single full pass of model at input size 227 x 227:
4 |
5 | * Memory required for features: 3 MB
6 | * Flops: 727 MFLOPS
7 |
8 | Estimates are given below of the burden of computing the `pool5` features in the network for different input sizes:
9 |
10 | | input size | feature size | feature memory | flops |
11 | | 114 x 114 | 2 x 2 x 256 | 73 MB | 15 GFLOPS |
12 | | 227 x 227 | 6 x 6 x 256 | 377 MB | 86 GFLOPS |
13 | | 341 x 341 | 9 x 9 x 256 | 872 MB | 200 GFLOPS |
14 | | 454 x 454 | 13 x 13 x 256 | 2 GB | 361 GFLOPS |
15 | | 568 x 568 | 16 x 16 x 256 | 2 GB | 572 GFLOPS |
16 | | 681 x 681 | 20 x 20 x 256 | 4 GB | 829 GFLOPS |
17 |
18 | A rough outline of where in the network memory is allocated to parameters and features and where the greatest computational cost lies is shown below. The x-axis does not show labels (it becomes hard to read with the networks containing hundreds of layers) - it should be interpreted as depicting increasing depth from left to right. The goal is to give some idea of the overall profile of the model:
19 | 
20 |
--------------------------------------------------------------------------------
/reports/mcn-mobilenet-v2.md:
--------------------------------------------------------------------------------
1 | ### Report for mcn-mobilenet-v2
2 | Model params 14 MB
3 |
4 | Estimates for a single full pass of model at input size 224 x 224:
5 |
6 | * Memory required for features: 69 MB
7 | * Flops: 446 MFLOPs
8 |
9 | Estimates are given below of the burden of computing the `fc7` features in the network for different input sizes using a batch size of 128:
10 |
11 | | input size | feature size | feature memory | flops |
12 | |------------|--------------|----------------|-------|
13 | | 224 x 224 | 1 x 1 x 1000 | 9 GB | 57 GFLOPs |
14 | | 336 x 336 | 1 x 1 x 1000 | 20 GB | 130 GFLOPs |
15 | | 448 x 448 | 1 x 1 x 1000 | 35 GB | 228 GFLOPs |
16 | | 560 x 560 | 1 x 1 x 1000 | 54 GB | 360 GFLOPs |
17 | | 672 x 672 | 1 x 1 x 1000 | 78 GB | 512 GFLOPs |
18 |
19 | A rough outline of where in the network memory is allocated to parameters and features and where the greatest computational cost lies is shown below. The x-axis does not show labels (it becomes hard to read for networks containing hundreds of layers) - it should be interpreted as depicting increasing depth from left to right. The goal is simply to give some idea of the overall profile of the model:
20 |
21 | 
22 |
--------------------------------------------------------------------------------
/reports/mcn-mobilenet.md:
--------------------------------------------------------------------------------
1 | ### Report for mcn-mobilenet
2 | Model params 16 MB
3 |
4 | Estimates for a single full pass of model at input size 224 x 224:
5 |
6 | * Memory required for features: 38 MB
7 | * Flops: 579 MFLOPs
8 |
9 | Estimates are given below of the burden of computing the `fc7` features in the network for different input sizes using a batch size of 128:
10 |
11 | | input size | feature size | feature memory | flops |
12 | |------------|--------------|----------------|-------|
13 | | 224 x 224 | 1 x 1 x 1000 | 5 GB | 74 GFLOPs |
14 | | 336 x 336 | 1 x 1 x 1000 | 11 GB | 169 GFLOPs |
15 | | 448 x 448 | 1 x 1 x 1000 | 19 GB | 296 GFLOPs |
16 | | 560 x 560 | 1 x 1 x 1000 | 30 GB | 466 GFLOPs |
17 | | 672 x 672 | 1 x 1 x 1000 | 43 GB | 666 GFLOPs |
18 |
19 | A rough outline of where in the network memory is allocated to parameters and features and where the greatest computational cost lies is shown below. The x-axis does not show labels (it becomes hard to read for networks containing hundreds of layers) - it should be interpreted as depicting increasing depth from left to right. The goal is simply to give some idea of the overall profile of the model:
20 |
21 | 
22 |
--------------------------------------------------------------------------------
/reports/multipose-coco.md:
--------------------------------------------------------------------------------
1 | ### Report for multipose-coco
2 | Model params 200 MB
3 |
4 | Estimates for a single full pass of model at input size 368 x 368:
5 |
6 | * Memory required for features: 246 MB
7 | * Flops: 136 GFLOPS
8 |
9 | Estimates are given below of the burden of computing the `Mconv6_stage6_L2` features in the network for different input sizes using a batch size of 128:
10 |
11 | | input size | feature size | feature memory | flops |
12 | |------------|--------------|----------------|-------|
13 | | 184 x 184 | 23 x 23 x 128 | 7 GB | 4 TFLOPS |
14 | | 368 x 368 | 46 x 46 x 128 | 30 GB | 16 TFLOPS |
15 | | 552 x 552 | 69 x 69 x 128 | 67 GB | 37 TFLOPS |
16 | | 736 x 736 | 92 x 92 x 128 | 119 GB | 65 TFLOPS |
17 | | 920 x 920 | 115 x 115 x 128 | 186 GB | 101 TFLOPS |
18 | | 1104 x 1104 | 138 x 138 x 128 | 268 GB | 146 TFLOPS |
19 |
20 | A rough outline of where in the network memory is allocated to parameters and features and where the greatest computational cost lies is shown below. The x-axis does not show labels (it becomes hard to read for networks containing hundreds of layers) - it should be interpreted as depicting increasing depth from left to right. The goal is simply to give some idea of the overall profile of the model:
21 |
22 | 
23 |
--------------------------------------------------------------------------------
/reports/multipose-mpi.md:
--------------------------------------------------------------------------------
1 | ### Report for multipose-mpi
2 | Model params 196 MB
3 |
4 | Estimates for a single full pass of model at input size 368 x 368:
5 |
6 | * Memory required for features: 245 MB
7 | * Flops: 134 GFLOPS
8 |
9 | Estimates are given below of the burden of computing the `Mconv6_stage6_L2` features in the network for different input sizes using a batch size of 128:
10 |
11 | | input size | feature size | feature memory | flops |
12 | |------------|--------------|----------------|-------|
13 | | 184 x 184 | 23 x 23 x 128 | 7 GB | 4 TFLOPS |
14 | | 368 x 368 | 46 x 46 x 128 | 30 GB | 16 TFLOPS |
15 | | 552 x 552 | 69 x 69 x 128 | 67 GB | 36 TFLOPS |
16 | | 736 x 736 | 92 x 92 x 128 | 119 GB | 64 TFLOPS |
17 | | 920 x 920 | 115 x 115 x 128 | 185 GB | 100 TFLOPS |
18 | | 1104 x 1104 | 138 x 138 x 128 | 267 GB | 144 TFLOPS |
19 |
20 | A rough outline of where in the network memory is allocated to parameters and features and where the greatest computational cost lies is shown below. The x-axis does not show labels (it becomes hard to read for networks containing hundreds of layers) - it should be interpreted as depicting increasing depth from left to right. The goal is simply to give some idea of the overall profile of the model:
21 |
22 | 
23 |
--------------------------------------------------------------------------------
/reports/pascal-fcn16s.md:
--------------------------------------------------------------------------------
1 | ### Report for pascal-fcn16s
2 | Model params 514 MB
3 |
4 | Estimates for a single full pass of model at input size 384 x 384:
5 |
6 | * Memory required for features: 424 MB
7 | * Flops: 125 GFLOPs
8 |
9 | Estimates are given below of the burden of computing the `score_fr` features in the network for different input sizes using a batch size of 128:
10 |
11 | | input size | feature size | feature memory | flops |
12 | |------------|--------------|----------------|-------|
13 | | 192 x 192 | 7 x 7 x 21 | 22 GB | 7 TFLOPs |
14 | | 384 x 384 | 13 x 13 x 21 | 49 GB | 16 TFLOPs |
15 | | 576 x 576 | 19 x 19 x 21 | 87 GB | 29 TFLOPs |
16 | | 768 x 768 | 25 x 25 x 21 | 136 GB | 46 TFLOPs |
17 | | 960 x 960 | 31 x 31 x 21 | 196 GB | 68 TFLOPs |
18 | | 1152 x 1152 | 37 x 37 x 21 | 267 GB | 93 TFLOPs |
19 |
20 | A rough outline of where in the network memory is allocated to parameters and features and where the greatest computational cost lies is shown below. The x-axis does not show labels (it becomes hard to read for networks containing hundreds of layers) - it should be interpreted as depicting increasing depth from left to right. The goal is simply to give some idea of the overall profile of the model:
21 |
22 | 
23 |
--------------------------------------------------------------------------------
/reports/pascal-fcn32s.md:
--------------------------------------------------------------------------------
1 | ### Report for pascal-fcn32s
2 | Model params 519 MB
3 |
4 | Estimates for a single full pass of model at input size 384 x 384:
5 |
6 | * Memory required for features: 423 MB
7 | * Flops: 125 GFLOPs
8 |
9 | Estimates are given below of the burden of computing the `score_fr` features in the network for different input sizes using a batch size of 128:
10 |
11 | | input size | feature size | feature memory | flops |
12 | |------------|--------------|----------------|-------|
13 | | 192 x 192 | 7 x 7 x 21 | 22 GB | 7 TFLOPs |
14 | | 384 x 384 | 13 x 13 x 21 | 49 GB | 16 TFLOPs |
15 | | 576 x 576 | 19 x 19 x 21 | 87 GB | 29 TFLOPs |
16 | | 768 x 768 | 25 x 25 x 21 | 136 GB | 46 TFLOPs |
17 | | 960 x 960 | 31 x 31 x 21 | 196 GB | 68 TFLOPs |
18 | | 1152 x 1152 | 37 x 37 x 21 | 267 GB | 93 TFLOPs |
19 |
20 | A rough outline of where in the network memory is allocated to parameters and features and where the greatest computational cost lies is shown below. The x-axis does not show labels (it becomes hard to read for networks containing hundreds of layers) - it should be interpreted as depicting increasing depth from left to right. The goal is simply to give some idea of the overall profile of the model:
21 |
22 | 
23 |
--------------------------------------------------------------------------------
/reports/pascal-fcn8s.md:
--------------------------------------------------------------------------------
1 | ### Report for pascal-fcn8s
2 | Model params 513 MB
3 |
4 | Estimates for a single full pass of model at input size 384 x 384:
5 |
6 | * Memory required for features: 426 MB
7 | * Flops: 125 GFLOPs
8 |
9 | Estimates are given below of the burden of computing the `score_fr` features in the network for different input sizes using a batch size of 128:
10 |
11 | | input size | feature size | feature memory | flops |
12 | |------------|--------------|----------------|-------|
13 | | 192 x 192 | 7 x 7 x 21 | 22 GB | 7 TFLOPs |
14 | | 384 x 384 | 13 x 13 x 21 | 49 GB | 16 TFLOPs |
15 | | 576 x 576 | 19 x 19 x 21 | 87 GB | 29 TFLOPs |
16 | | 768 x 768 | 25 x 25 x 21 | 136 GB | 46 TFLOPs |
17 | | 960 x 960 | 31 x 31 x 21 | 196 GB | 68 TFLOPs |
18 | | 1152 x 1152 | 37 x 37 x 21 | 267 GB | 93 TFLOPs |
19 |
20 | A rough outline of where in the network memory is allocated to parameters and features and where the greatest computational cost lies is shown below. The x-axis does not show labels (it becomes hard to read for networks containing hundreds of layers) - it should be interpreted as depicting increasing depth from left to right. The goal is simply to give some idea of the overall profile of the model:
21 |
22 | 
23 |
--------------------------------------------------------------------------------
/reports/resnet-101.md:
--------------------------------------------------------------------------------
1 | ### Report for resnet-101
2 | Model params 170 MB
3 |
4 | Estimates for a single full pass of model at input size 224 x 224:
5 |
6 | * Memory required for features: 155 MB
7 | * Flops: 8 GFLOPs
8 |
9 | Estimates are given below of the burden of computing the `res5c_relu` features in the network for different input sizes using a batch size of 128:
10 |
11 | | input size | feature size | feature memory | flops |
12 | |------------|--------------|----------------|-------|
13 | | 112 x 112 | 4 x 4 x 2048 | 5 GB | 251 GFLOPs |
14 | | 224 x 224 | 7 x 7 x 2048 | 19 GB | 974 GFLOPs |
15 | | 336 x 336 | 11 x 11 x 2048 | 44 GB | 2 TFLOPs |
16 | | 448 x 448 | 14 x 14 x 2048 | 77 GB | 4 TFLOPs |
17 | | 560 x 560 | 18 x 18 x 2048 | 121 GB | 6 TFLOPs |
18 | | 672 x 672 | 21 x 21 x 2048 | 174 GB | 9 TFLOPs |
19 |
20 | A rough outline of where in the network memory is allocated to parameters and features and where the greatest computational cost lies is shown below. The x-axis does not show labels (it becomes hard to read for networks containing hundreds of layers) - it should be interpreted as depicting increasing depth from left to right. The goal is simply to give some idea of the overall profile of the model:
21 |
22 | 
23 |
--------------------------------------------------------------------------------
/reports/resnet-152.md:
--------------------------------------------------------------------------------
1 | ### Report for resnet-152
2 | Model params 230 MB
3 |
4 | Estimates for a single full pass of model at input size 224 x 224:
5 |
6 | * Memory required for features: 219 MB
7 | * Flops: 11 GFLOPs
8 |
9 | Estimates are given below of the burden of computing the `res5c_relu` features in the network for different input sizes using a batch size of 128:
10 |
11 | | input size | feature size | feature memory | flops |
12 | |------------|--------------|----------------|-------|
13 | | 112 x 112 | 4 x 4 x 2048 | 7 GB | 370 GFLOPs |
14 | | 224 x 224 | 7 x 7 x 2048 | 27 GB | 1 TFLOPs |
15 | | 336 x 336 | 11 x 11 x 2048 | 62 GB | 3 TFLOPs |
16 | | 448 x 448 | 14 x 14 x 2048 | 109 GB | 6 TFLOPs |
17 | | 560 x 560 | 18 x 18 x 2048 | 171 GB | 9 TFLOPs |
18 | | 672 x 672 | 21 x 21 x 2048 | 246 GB | 13 TFLOPs |
19 |
20 | A rough outline of where in the network memory is allocated to parameters and features and where the greatest computational cost lies is shown below. The x-axis does not show labels (it becomes hard to read for networks containing hundreds of layers) - it should be interpreted as depicting increasing depth from left to right. The goal is simply to give some idea of the overall profile of the model:
21 |
22 | 
23 |
--------------------------------------------------------------------------------
/reports/resnet-50.md:
--------------------------------------------------------------------------------
1 | ### Report for resnet-50
2 | Model params 98 MB
3 |
4 | Estimates for a single full pass of model at input size 224 x 224:
5 |
6 | * Memory required for features: 103 MB
7 | * Flops: 4 GFLOPs
8 |
9 | Estimates are given below of the burden of computing the `res5c_relu` features in the network for different input sizes using a batch size of 128:
10 |
11 | | input size | feature size | feature memory | flops |
12 | |------------|--------------|----------------|-------|
13 | | 112 x 112 | 4 x 4 x 2048 | 3 GB | 131 GFLOPs |
14 | | 224 x 224 | 7 x 7 x 2048 | 13 GB | 497 GFLOPs |
15 | | 336 x 336 | 11 x 11 x 2048 | 29 GB | 1 TFLOPs |
16 | | 448 x 448 | 14 x 14 x 2048 | 51 GB | 2 TFLOPs |
17 | | 560 x 560 | 18 x 18 x 2048 | 80 GB | 3 TFLOPs |
18 | | 672 x 672 | 21 x 21 x 2048 | 115 GB | 4 TFLOPs |
19 |
20 | A rough outline of where in the network memory is allocated to parameters and features and where the greatest computational cost lies is shown below. The x-axis does not show labels (it becomes hard to read for networks containing hundreds of layers) - it should be interpreted as depicting increasing depth from left to right. The goal is simply to give some idea of the overall profile of the model:
21 |
22 | 
23 |
--------------------------------------------------------------------------------
/reports/resnet18.md:
--------------------------------------------------------------------------------
1 | ### Report for resnet18
2 | Model params 45 MB
3 |
4 | Estimates for a single full pass of model at input size 224 x 224:
5 |
6 | * Memory required for features: 23 MB
7 | * Flops: 2 GFLOPs
8 |
9 | Estimates are given below of the burden of computing the `features_7_1_id_relu` features in the network for different input sizes using a batch size of 128:
10 |
11 | | input size | feature size | feature memory | flops |
12 | |------------|--------------|----------------|-------|
13 | | 112 x 112 | 4 x 4 x 512 | 734 MB | 62 GFLOPs |
14 | | 224 x 224 | 7 x 7 x 512 | 3 GB | 233 GFLOPs |
15 | | 336 x 336 | 11 x 11 x 512 | 6 GB | 536 GFLOPs |
16 | | 448 x 448 | 14 x 14 x 512 | 11 GB | 932 GFLOPs |
17 | | 560 x 560 | 18 x 18 x 512 | 18 GB | 1 TFLOPs |
18 | | 672 x 672 | 21 x 21 x 512 | 25 GB | 2 TFLOPs |
19 |
20 | A rough outline of where in the network memory is allocated to parameters and features and where the greatest computational cost lies is shown below. The x-axis does not show labels (it becomes hard to read for networks containing hundreds of layers) - it should be interpreted as depicting increasing depth from left to right. The goal is simply to give some idea of the overall profile of the model:
21 |
22 | 
23 |
--------------------------------------------------------------------------------
/reports/resnet34.md:
--------------------------------------------------------------------------------
1 | ### Report for resnet34
2 | Model params 83 MB
3 |
4 | Estimates for a single full pass of model at input size 224 x 224:
5 |
6 | * Memory required for features: 35 MB
7 | * Flops: 4 GFLOPs
8 |
9 | Estimates are given below of the burden of computing the `features_7_2_id_relu` features in the network for different input sizes using a batch size of 128:
10 |
11 | | input size | feature size | feature memory | flops |
12 | |------------|--------------|----------------|-------|
13 | | 112 x 112 | 4 x 4 x 512 | 1 GB | 124 GFLOPs |
14 | | 224 x 224 | 7 x 7 x 512 | 4 GB | 470 GFLOPs |
15 | | 336 x 336 | 11 x 11 x 512 | 10 GB | 1 TFLOPs |
16 | | 448 x 448 | 14 x 14 x 512 | 17 GB | 2 TFLOPs |
17 | | 560 x 560 | 18 x 18 x 512 | 27 GB | 3 TFLOPs |
18 | | 672 x 672 | 21 x 21 x 512 | 39 GB | 4 TFLOPs |
19 |
20 | A rough outline of where in the network memory is allocated to parameters and features and where the greatest computational cost lies is shown below. The x-axis does not show labels (it becomes hard to read for networks containing hundreds of layers) - it should be interpreted as depicting increasing depth from left to right. The goal is simply to give some idea of the overall profile of the model:
21 |
22 | 
23 |
--------------------------------------------------------------------------------
/reports/resnet50.md:
--------------------------------------------------------------------------------
1 | ### Report for resnet50
2 | Model params 98 MB
3 |
4 | Estimates for a single full pass of model at input size 224 x 224:
5 |
6 | * Memory required for features: 107 MB
7 | * Flops: 4 GFLOPs
8 |
9 | Estimates are given below of the burden of computing the `features_7_2_id_relu` features in the network for different input sizes using a batch size of 128:
10 |
11 | | input size | feature size | feature memory | flops |
12 | |------------|--------------|----------------|-------|
13 | | 112 x 112 | 4 x 4 x 2048 | 3 GB | 139 GFLOPs |
14 | | 224 x 224 | 7 x 7 x 2048 | 13 GB | 527 GFLOPs |
15 | | 336 x 336 | 11 x 11 x 2048 | 30 GB | 1 TFLOPs |
16 | | 448 x 448 | 14 x 14 x 2048 | 53 GB | 2 TFLOPs |
17 | | 560 x 560 | 18 x 18 x 2048 | 84 GB | 3 TFLOPs |
18 | | 672 x 672 | 21 x 21 x 2048 | 120 GB | 5 TFLOPs |
19 |
20 | A rough outline of where in the network memory is allocated to parameters and features and where the greatest computational cost lies is shown below. The x-axis does not show labels (it becomes hard to read for networks containing hundreds of layers) - it should be interpreted as depicting increasing depth from left to right. The goal is simply to give some idea of the overall profile of the model:
21 |
22 | 
23 |
--------------------------------------------------------------------------------
/reports/resnext-101-32x4d.md:
--------------------------------------------------------------------------------
1 | ### Report for resnext-101-32x4d
2 | Model params 169 MB
3 |
4 | Estimates for a single full pass of model at input size 224 x 224:
5 |
6 | * Memory required for features: 197 MB
7 | * Flops: 8 GFLOPs
8 |
9 | Estimates are given below of the burden of computing the `features_7_2_id_relu` features in the network for different input sizes using a batch size of 128:
10 |
11 | | input size | feature size | feature memory | flops |
12 | |------------|--------------|----------------|-------|
13 | | 112 x 112 | 4 x 4 x 2048 | 6 GB | 263 GFLOPs |
14 | | 224 x 224 | 7 x 7 x 2048 | 25 GB | 1 TFLOPs |
15 | | 336 x 336 | 11 x 11 x 2048 | 56 GB | 2 TFLOPs |
16 | | 448 x 448 | 14 x 14 x 2048 | 98 GB | 4 TFLOPs |
17 | | 560 x 560 | 18 x 18 x 2048 | 154 GB | 6 TFLOPs |
18 | | 672 x 672 | 21 x 21 x 2048 | 221 GB | 9 TFLOPs |
19 |
20 | A rough outline of where in the network memory is allocated to parameters and features and where the greatest computational cost lies is shown below. The x-axis does not show labels (it becomes hard to read for networks containing hundreds of layers) - it should be interpreted as depicting increasing depth from left to right. The goal is simply to give some idea of the overall profile of the model:
21 |
22 | 
23 |
--------------------------------------------------------------------------------
/reports/resnext-101-64x4d.md:
--------------------------------------------------------------------------------
1 | ### Report for resnext-101-64x4d
2 | Model params 319 MB
3 |
4 | Estimates for a single full pass of model at input size 224 x 224:
5 |
6 | * Memory required for features: 273 MB
7 | * Flops: 16 GFLOPs
8 |
9 | Estimates are given below of the burden of computing the `features_7_2_id_relu` features in the network for different input sizes using a batch size of 128:
10 |
11 | | input size | feature size | feature memory | flops |
12 | |------------|--------------|----------------|-------|
13 | | 112 x 112 | 4 x 4 x 2048 | 9 GB | 509 GFLOPs |
14 | | 224 x 224 | 7 x 7 x 2048 | 34 GB | 2 TFLOPs |
15 | | 336 x 336 | 11 x 11 x 2048 | 77 GB | 5 TFLOPs |
16 | | 448 x 448 | 14 x 14 x 2048 | 136 GB | 8 TFLOPs |
17 | | 560 x 560 | 18 x 18 x 2048 | 214 GB | 12 TFLOPs |
18 | | 672 x 672 | 21 x 21 x 2048 | 307 GB | 18 TFLOPs |
19 |
20 | A rough outline of where in the network memory is allocated to parameters and features and where the greatest computational cost lies is shown below. The x-axis does not show labels (it becomes hard to read for networks containing hundreds of layers) - it should be interpreted as depicting increasing depth from left to right. The goal is simply to give some idea of the overall profile of the model:
21 |
22 | 
23 |
--------------------------------------------------------------------------------
/reports/resnext-50-32x4d.md:
--------------------------------------------------------------------------------
1 | ### Report for resnext-50-32x4d
2 | Model params 96 MB
3 |
4 | Estimates for a single full pass of model at input size 224 x 224:
5 |
6 | * Memory required for features: 132 MB
7 | * Flops: 4 GFLOPs
8 |
9 | Estimates are given below of the burden of computing the `features_7_2_id_relu` features in the network for different input sizes using a batch size of 128:
10 |
11 | | input size | feature size | feature memory | flops |
12 | |------------|--------------|----------------|-------|
13 | | 112 x 112 | 4 x 4 x 2048 | 4 GB | 143 GFLOPs |
14 | | 224 x 224 | 7 x 7 x 2048 | 16 GB | 545 GFLOPs |
15 | | 336 x 336 | 11 x 11 x 2048 | 37 GB | 1 TFLOPs |
16 | | 448 x 448 | 14 x 14 x 2048 | 66 GB | 2 TFLOPs |
17 | | 560 x 560 | 18 x 18 x 2048 | 103 GB | 3 TFLOPs |
18 | | 672 x 672 | 21 x 21 x 2048 | 148 GB | 5 TFLOPs |
19 |
20 | A rough outline of where in the network memory is allocated to parameters and features and where the greatest computational cost lies is shown below. The x-axis does not show labels (it becomes hard to read for networks containing hundreds of layers) - it should be interpreted as depicting increasing depth from left to right. The goal is simply to give some idea of the overall profile of the model:
21 |
22 | 
23 |
--------------------------------------------------------------------------------
/reports/rfcn-res101-pascal.md:
--------------------------------------------------------------------------------
1 | ### Report for rfcn-res101-pascal
2 | Model params 194 MB
3 |
4 | Estimates for a single full pass of model at input size 600 x 850:
5 |
6 | * Memory required for features: 2 GB
7 | * Flops: 117 GFLOPs
8 |
9 | Estimates are given below of the burden of computing the `res5c_relu` features in the network for different input sizes using a batch size of 128:
10 |
11 | | input size | feature size | feature memory | flops |
12 | |------------|--------------|----------------|-------|
13 | | 300 x 425 | 19 x 27 x 2048 | 55 GB | 3 TFLOPs |
14 | | 600 x 850 | 38 x 53 x 2048 | 218 GB | 13 TFLOPs |
15 | | 900 x 1275 | 57 x 80 x 2048 | 493 GB | 29 TFLOPs |
16 | | 1200 x 1700 | 75 x 107 x 2048 | 871 GB | 51 TFLOPs |
17 | | 1500 x 2125 | 94 x 133 x 2048 | 1 TB | 80 TFLOPs |
18 | | 1800 x 2550 | 113 x 160 x 2048 | 2 TB | 116 TFLOPs |
19 |
20 | A rough outline of where in the network memory is allocated to parameters and features and where the greatest computational cost lies is shown below. The x-axis does not show labels (it becomes hard to read for networks containing hundreds of layers) - it should be interpreted as depicting increasing depth from left to right. The goal is simply to give some idea of the overall profile of the model:
21 |
22 | 
23 |
--------------------------------------------------------------------------------
/reports/rfcn-res50-pascal.md:
--------------------------------------------------------------------------------
1 | ### Report for rfcn-res50-pascal
2 | Model params 122 MB
3 |
4 | Estimates for a single full pass of model at input size 600 x 850:
5 |
6 | * Memory required for features: 1 GB
7 | * Flops: 79 GFLOPs
8 |
9 | Estimates are given below of the burden of computing the `res5c_relu` features in the network for different input sizes using a batch size of 128:
10 |
11 | | input size | feature size | feature memory | flops |
12 | |------------|--------------|----------------|-------|
13 | | 300 x 425 | 19 x 27 x 2048 | 38 GB | 2 TFLOPs |
14 | | 600 x 850 | 38 x 53 x 2048 | 151 GB | 8 TFLOPs |
15 | | 900 x 1275 | 57 x 80 x 2048 | 342 GB | 18 TFLOPs |
16 | | 1200 x 1700 | 75 x 107 x 2048 | 605 GB | 32 TFLOPs |
17 | | 1500 x 2125 | 94 x 133 x 2048 | 944 GB | 50 TFLOPs |
18 | | 1800 x 2550 | 113 x 160 x 2048 | 1 TB | 72 TFLOPs |
19 |
20 | A rough outline of where in the network memory is allocated to parameters and features and where the greatest computational cost lies is shown below. The x-axis does not show labels (it becomes hard to read for networks containing hundreds of layers) - it should be interpreted as depicting increasing depth from left to right. The goal is simply to give some idea of the overall profile of the model:
21 |
22 | 
23 |
--------------------------------------------------------------------------------
/reports/squeezenet1-0.md:
--------------------------------------------------------------------------------
1 | ### Report for squeezenet1-0
2 | Model params 5 MB
3 |
4 | Estimates for a single full pass of model at input size 224 x 224:
5 |
6 | * Memory required for features: 30 MB
7 | * Flops: 837 MFLOPs
8 |
9 | Estimates are given below of the burden of computing the `features_12_cat` features in the network for different input sizes using a batch size of 128:
10 |
11 | | input size | feature size | feature memory | flops |
12 | |------------|--------------|----------------|-------|
13 | | 112 x 112 | 6 x 6 x 512 | 841 MB | 22 GFLOPs |
14 | | 224 x 224 | 13 x 13 x 512 | 4 GB | 96 GFLOPs |
15 | | 336 x 336 | 20 x 20 x 512 | 8 GB | 221 GFLOPs |
16 | | 448 x 448 | 27 x 27 x 512 | 15 GB | 398 GFLOPs |
17 | | 560 x 560 | 34 x 34 x 512 | 23 GB | 626 GFLOPs |
18 | | 672 x 672 | 41 x 41 x 512 | 33 GB | 906 GFLOPs |
19 |
20 | A rough outline of where in the network memory is allocated to parameters and features and where the greatest computational cost lies is shown below. The x-axis does not show labels (it becomes hard to read for networks containing hundreds of layers) - it should be interpreted as depicting increasing depth from left to right. The goal is simply to give some idea of the overall profile of the model:
21 |
22 | 
23 |
--------------------------------------------------------------------------------
/reports/squeezenet1-1.md:
--------------------------------------------------------------------------------
1 | ### Report for squeezenet1-1
2 | Model params 5 MB
3 |
4 | Estimates for a single full pass of model at input size 224 x 224:
5 |
6 | * Memory required for features: 17 MB
7 | * Flops: 360 MFLOPs
8 |
9 | Estimates are given below of the burden of computing the `features_12_cat` features in the network for different input sizes using a batch size of 128:
10 |
11 | | input size | feature size | feature memory | flops |
12 | |------------|--------------|----------------|-------|
13 | | 112 x 112 | 6 x 6 x 512 | 483 MB | 8 GFLOPs |
14 | | 224 x 224 | 13 x 13 x 512 | 2 GB | 35 GFLOPs |
15 | | 336 x 336 | 20 x 20 x 512 | 5 GB | 81 GFLOPs |
16 | | 448 x 448 | 27 x 27 x 512 | 8 GB | 146 GFLOPs |
17 | | 560 x 560 | 34 x 34 x 512 | 13 GB | 230 GFLOPs |
18 | | 672 x 672 | 41 x 41 x 512 | 19 GB | 333 GFLOPs |
19 |
20 | A rough outline of where in the network memory is allocated to parameters and features and where the greatest computational cost lies is shown below. The x-axis does not show labels (it becomes hard to read for networks containing hundreds of layers) - it should be interpreted as depicting increasing depth from left to right. The goal is simply to give some idea of the overall profile of the model:
21 |
22 | 
23 |
--------------------------------------------------------------------------------
/reports/ssd-pascal-mobilenet-ft.md:
--------------------------------------------------------------------------------
1 | ### Report for ssd-pascal-mobilenet-ft
2 | Model params 22 MB
3 |
4 | Estimates for a single full pass of model at input size 300 x 300:
5 |
6 | * Memory required for features: 37 MB
7 | * Flops: 1 GFLOPs
8 |
9 | Estimates are given below of the burden of computing the `conv17_2_relu` features in the network for different input sizes using a batch size of 128:
10 |
11 | | input size | feature size | feature memory | flops |
12 | |------------|--------------|----------------|-------|
13 | | 150 x 150 | 1 x 1 x 128 | 1 GB | 39 GFLOPs |
14 | | 300 x 300 | 1 x 1 x 128 | 4 GB | 146 GFLOPs |
15 | | 450 x 450 | 1 x 1 x 128 | 10 GB | 336 GFLOPs |
16 | | 600 x 600 | 2 x 2 x 128 | 17 GB | 574 GFLOPs |
17 | | 750 x 750 | 2 x 2 x 128 | 27 GB | 890 GFLOPs |
18 | | 900 x 900 | 2 x 2 x 128 | 39 GB | 1 TFLOPs |
19 |
20 | A rough outline of where in the network memory is allocated to parameters and features and where the greatest computational cost lies is shown below. The x-axis does not show labels (it becomes hard to read for networks containing hundreds of layers) - it should be interpreted as depicting increasing depth from left to right. The goal is simply to give some idea of the overall profile of the model:
21 |
22 | 
23 |
--------------------------------------------------------------------------------
/reports/ssd-pascal-vggvd-300.md:
--------------------------------------------------------------------------------
1 | ### Report for ssd-pascal-vggvd-300
2 | Model params 100 MB
3 |
4 | Estimates for a single full pass of model at input size 300 x 300:
5 |
6 | * Memory required for features: 116 MB
7 | * Flops: 31 GFLOPs
8 |
9 | Estimates are given below of the burden of computing the `relu4_3` features in the network for different input sizes using a batch size of 128:
10 |
11 | | input size | feature size | feature memory | flops |
12 | |------------|--------------|----------------|-------|
13 | | 150 x 150 | 19 x 19 x 512 | 3 GB | 818 GFLOPs |
14 | | 300 x 300 | 38 x 38 x 512 | 13 GB | 3 TFLOPs |
15 | | 450 x 450 | 57 x 57 x 512 | 28 GB | 7 TFLOPs |
16 | | 600 x 600 | 75 x 75 x 512 | 50 GB | 13 TFLOPs |
17 | | 750 x 750 | 94 x 94 x 512 | 78 GB | 20 TFLOPs |
18 | | 900 x 900 | 113 x 113 x 512 | 113 GB | 29 TFLOPs |
19 |
20 | A rough outline of where in the network memory is allocated to parameters and features and where the greatest computational cost lies is shown below. The x-axis does not show labels (it becomes hard to read for networks containing hundreds of layers) - it should be interpreted as depicting increasing depth from left to right. The goal is simply to give some idea of the overall profile of the model:
21 |
22 | 
23 |
--------------------------------------------------------------------------------
/reports/ssd-pascal-vggvd-512.md:
--------------------------------------------------------------------------------
1 | ### Report for ssd-pascal-vggvd-512
2 | Model params 104 MB
3 |
4 | Estimates for a single full pass of model at input size 512 x 512:
5 |
6 | * Memory required for features: 337 MB
7 | * Flops: 91 GFLOPs
8 |
9 | Estimates are given below of the burden of computing the `relu4_3` features in the network for different input sizes using a batch size of 128:
10 |
11 | | input size | feature size | feature memory | flops |
12 | |------------|--------------|----------------|-------|
13 | | 256 x 256 | 32 x 32 x 512 | 9 GB | 2 TFLOPs |
14 | | 512 x 512 | 64 x 64 x 512 | 36 GB | 9 TFLOPs |
15 | | 768 x 768 | 96 x 96 x 512 | 82 GB | 21 TFLOPs |
16 | | 1024 x 1024 | 128 x 128 x 512 | 146 GB | 37 TFLOPs |
17 | | 1280 x 1280 | 160 x 160 x 512 | 228 GB | 59 TFLOPs |
18 | | 1536 x 1536 | 192 x 192 x 512 | 328 GB | 84 TFLOPs |
19 |
20 | A rough outline of where in the network memory is allocated to parameters and features and where the greatest computational cost lies is shown below. The x-axis does not show labels (it becomes hard to read for networks containing hundreds of layers) - it should be interpreted as depicting increasing depth from left to right. The goal is simply to give some idea of the overall profile of the model:
21 |
22 | 
23 |
--------------------------------------------------------------------------------
/reports/vgg-f.md:
--------------------------------------------------------------------------------
1 | ### Report for vgg-f
2 | Model params 232 MB
3 |
4 | Estimates for a single full pass of model at input size 224 x 224:
5 |
6 | * Memory required for features: 4 MB
7 | * Flops: 727 MFLOPs
8 |
9 | Estimates are given below of the burden of computing the `pool5` features in the network for different input sizes using a batch size of 128:
10 |
11 | | input size | feature size | feature memory | flops |
12 | |------------|--------------|----------------|-------|
13 | | 112 x 112 | 3 x 3 x 256 | 109 MB | 19 GFLOPs |
14 | | 224 x 224 | 6 x 6 x 256 | 476 MB | 86 GFLOPs |
15 | | 336 x 336 | 10 x 10 x 256 | 1 GB | 200 GFLOPs |
16 | | 448 x 448 | 13 x 13 x 256 | 2 GB | 362 GFLOPs |
17 | | 560 x 560 | 17 x 17 x 256 | 3 GB | 571 GFLOPs |
18 | | 672 x 672 | 20 x 20 x 256 | 4 GB | 828 GFLOPs |
19 |
20 | A rough outline of where in the network memory is allocated to parameters and features and where the greatest computational cost lies is shown below. The x-axis does not show labels (it becomes hard to read for networks containing hundreds of layers) - it should be interpreted as depicting increasing depth from left to right. The goal is simply to give some idea of the overall profile of the model:
21 |
22 | 
23 |
--------------------------------------------------------------------------------
/reports/vgg-m-1024.md:
--------------------------------------------------------------------------------
1 | ### Report for vgg-m-1024
2 | Model params 333 MB
3 |
4 | Estimates for a single full pass of model at input size 224 x 224:
5 |
6 | * Memory required for features: 12 MB
7 | * Flops: 2 GFLOPs
8 |
9 | Estimates are given below of the burden of computing the `pool5` features in the network for different input sizes using a batch size of 128:
10 |
11 | | input size | feature size | feature memory | flops |
12 | |------------|--------------|----------------|-------|
13 | | 112 x 112 | 3 x 3 x 512 | 365 MB | 44 GFLOPs |
14 | | 224 x 224 | 6 x 6 x 512 | 2 GB | 204 GFLOPs |
15 | | 336 x 336 | 10 x 10 x 512 | 4 GB | 480 GFLOPs |
16 | | 448 x 448 | 13 x 13 x 512 | 6 GB | 874 GFLOPs |
17 | | 560 x 560 | 17 x 17 x 512 | 10 GB | 1 TFLOPs |
18 | | 672 x 672 | 20 x 20 x 512 | 15 GB | 2 TFLOPs |
19 |
20 | A rough outline of where in the network memory is allocated to parameters and features and where the greatest computational cost lies is shown below. The x-axis does not show labels (it becomes hard to read for networks containing hundreds of layers) - it should be interpreted as depicting increasing depth from left to right. The goal is simply to give some idea of the overall profile of the model:
21 |
22 | 
23 |
--------------------------------------------------------------------------------
/reports/vgg-m-128.md:
--------------------------------------------------------------------------------
1 | ### Report for vgg-m-128
2 | Model params 315 MB
3 |
4 | Estimates for a single full pass of model at input size 224 x 224:
5 |
6 | * Memory required for features: 12 MB
7 | * Flops: 2 GFLOPs
8 |
9 | Estimates are given below of the burden of computing the `pool5` features in the network for different input sizes using a batch size of 128:
10 |
11 | | input size | feature size | feature memory | flops |
12 | |------------|--------------|----------------|-------|
13 | | 112 x 112 | 3 x 3 x 512 | 365 MB | 44 GFLOPs |
14 | | 224 x 224 | 6 x 6 x 512 | 2 GB | 204 GFLOPs |
15 | | 336 x 336 | 10 x 10 x 512 | 4 GB | 480 GFLOPs |
16 | | 448 x 448 | 13 x 13 x 512 | 6 GB | 874 GFLOPs |
17 | | 560 x 560 | 17 x 17 x 512 | 10 GB | 1 TFLOPs |
18 | | 672 x 672 | 20 x 20 x 512 | 15 GB | 2 TFLOPs |
19 |
20 | A rough outline of where in the network memory is allocated to parameters and features and where the greatest computational cost lies is shown below. The x-axis does not show labels (it becomes hard to read for networks containing hundreds of layers) - it should be interpreted as depicting increasing depth from left to right. The goal is simply to give some idea of the overall profile of the model:
21 |
22 | 
23 |
--------------------------------------------------------------------------------
/reports/vgg-m-2048.md:
--------------------------------------------------------------------------------
1 | ### Report for vgg-m-2048
2 | Model params 353 MB
3 |
4 | Estimates for a single full pass of model at input size 224 x 224:
5 |
6 | * Memory required for features: 12 MB
7 | * Flops: 2 GFLOPs
8 |
9 | Estimates are given below of the burden of computing the `pool5` features in the network for different input sizes using a batch size of 128:
10 |
11 | | input size | feature size | feature memory | flops |
12 | |------------|--------------|----------------|-------|
13 | | 112 x 112 | 3 x 3 x 512 | 365 MB | 44 GFLOPs |
14 | | 224 x 224 | 6 x 6 x 512 | 2 GB | 204 GFLOPs |
15 | | 336 x 336 | 10 x 10 x 512 | 4 GB | 480 GFLOPs |
16 | | 448 x 448 | 13 x 13 x 512 | 6 GB | 874 GFLOPs |
17 | | 560 x 560 | 17 x 17 x 512 | 10 GB | 1 TFLOPs |
18 | | 672 x 672 | 20 x 20 x 512 | 15 GB | 2 TFLOPs |
19 |
20 | A rough outline of where in the network memory is allocated to parameters and features and where the greatest computational cost lies is shown below. The x-axis does not show labels (it becomes hard to read for networks containing hundreds of layers) - it should be interpreted as depicting increasing depth from left to right. The goal is simply to give some idea of the overall profile of the model:
21 |
22 | 
23 |
--------------------------------------------------------------------------------
/reports/vgg-m.md:
--------------------------------------------------------------------------------
1 | ### Report for vgg-m
2 | Model params 393 MB
3 |
4 | Estimates for a single full pass of model at input size 224 x 224:
5 |
6 | * Memory required for features: 12 MB
7 | * Flops: 2 GFLOPs
8 |
9 | Estimates are given below of the burden of computing the `pool5` features in the network for different input sizes using a batch size of 128:
10 |
11 | | input size | feature size | feature memory | flops |
12 | |------------|--------------|----------------|-------|
13 | | 112 x 112 | 3 x 3 x 512 | 365 MB | 44 GFLOPs |
14 | | 224 x 224 | 6 x 6 x 512 | 2 GB | 204 GFLOPs |
15 | | 336 x 336 | 10 x 10 x 512 | 4 GB | 480 GFLOPs |
16 | | 448 x 448 | 13 x 13 x 512 | 6 GB | 874 GFLOPs |
17 | | 560 x 560 | 17 x 17 x 512 | 10 GB | 1 TFLOPs |
18 | | 672 x 672 | 20 x 20 x 512 | 15 GB | 2 TFLOPs |
19 |
20 | A rough outline of where in the network memory is allocated to parameters and features and where the greatest computational cost lies is shown below. The x-axis does not show labels (it becomes hard to read for networks containing hundreds of layers) - it should be interpreted as depicting increasing depth from left to right. The goal is simply to give some idea of the overall profile of the model:
21 |
22 | 
23 |
--------------------------------------------------------------------------------
/reports/vgg-s.md:
--------------------------------------------------------------------------------
1 | ### Report for vgg-s
2 | Model params 393 MB
3 |
4 | Estimates for a single full pass of model at input size 224 x 224:
5 |
6 | * Memory required for features: 12 MB
7 | * Flops: 3 GFLOPs
8 |
9 | Estimates are given below of the burden of computing the `pool5` features in the network for different input sizes using a batch size of 128:
10 |
11 | | input size | feature size | feature memory | flops |
12 | |------------|--------------|----------------|-------|
13 | | 112 x 112 | 3 x 3 x 512 | 348 MB | 58 GFLOPs |
14 | | 224 x 224 | 6 x 6 x 512 | 2 GB | 327 GFLOPs |
15 | | 336 x 336 | 9 x 9 x 512 | 4 GB | 768 GFLOPs |
16 | | 448 x 448 | 12 x 12 x 512 | 6 GB | 1 TFLOPs |
17 | | 560 x 560 | 15 x 15 x 512 | 10 GB | 2 TFLOPs |
18 | | 672 x 672 | 18 x 18 x 512 | 15 GB | 3 TFLOPs |
19 |
20 | A rough outline of where in the network memory is allocated to parameters and features and where the greatest computational cost lies is shown below. The x-axis does not show labels (it becomes hard to read for networks containing hundreds of layers) - it should be interpreted as depicting increasing depth from left to right. The goal is simply to give some idea of the overall profile of the model:
21 |
22 | 
23 |
--------------------------------------------------------------------------------
/reports/vgg-vd-16-atrous.md:
--------------------------------------------------------------------------------
1 | ### Report for vgg-vd-16-atrous
2 | Model params 82 MB
3 |
4 | Estimates for a single full pass of model at input size 224 x 224:
5 |
6 | * Memory required for features: 58 MB
7 | * Flops: 16 GFLOPs
8 |
9 | Estimates are given below of the burden of computing the `pool5` features in the network for different input sizes using a batch size of 128:
10 |
11 | | input size | feature size | feature memory | flops |
12 | |------------|--------------|----------------|-------|
13 | | 112 x 112 | 4 x 4 x 512 | 2 GB | 493 GFLOPs |
14 | | 224 x 224 | 7 x 7 x 512 | 7 GB | 2 TFLOPs |
15 | | 336 x 336 | 11 x 11 x 512 | 16 GB | 4 TFLOPs |
16 | | 448 x 448 | 14 x 14 x 512 | 29 GB | 8 TFLOPs |
17 | | 560 x 560 | 18 x 18 x 512 | 45 GB | 12 TFLOPs |
18 | | 672 x 672 | 21 x 21 x 512 | 65 GB | 18 TFLOPs |
19 |
20 | A rough outline of where in the network memory is allocated to parameters and features and where the greatest computational cost lies is shown below. The x-axis does not show labels (it becomes hard to read for networks containing hundreds of layers) - it should be interpreted as depicting increasing depth from left to right. The goal is simply to give some idea of the overall profile of the model:
21 |
22 | 
23 |
--------------------------------------------------------------------------------
/reports/vgg-vd-16.md:
--------------------------------------------------------------------------------
1 | ### Report for vgg-vd-16
2 | Model params 528 MB
3 |
4 | Estimates for a single full pass of model at input size 224 x 224:
5 |
6 | * Memory required for features: 58 MB
7 | * Flops: 16 GFLOPs
8 |
9 | Estimates are given below of the burden of computing the `pool5` features in the network for different input sizes using a batch size of 128:
10 |
11 | | input size | feature size | feature memory | flops |
12 | |------------|--------------|----------------|-------|
13 | | 112 x 112 | 4 x 4 x 512 | 2 GB | 493 GFLOPs |
14 | | 224 x 224 | 7 x 7 x 512 | 7 GB | 2 TFLOPs |
15 | | 336 x 336 | 11 x 11 x 512 | 16 GB | 4 TFLOPs |
16 | | 448 x 448 | 14 x 14 x 512 | 29 GB | 8 TFLOPs |
17 | | 560 x 560 | 18 x 18 x 512 | 45 GB | 12 TFLOPs |
18 | | 672 x 672 | 21 x 21 x 512 | 65 GB | 18 TFLOPs |
19 |
20 | A rough outline of where in the network memory is allocated to parameters and features and where the greatest computational cost lies is shown below. The x-axis does not show labels (it becomes hard to read for networks containing hundreds of layers) - it should be interpreted as depicting increasing depth from left to right. The goal is simply to give some idea of the overall profile of the model:
21 |
22 | 
23 |
--------------------------------------------------------------------------------
/reports/vgg-vd-19.md:
--------------------------------------------------------------------------------
1 | ### Report for vgg-vd-19
2 | Model params 548 MB
3 |
4 | Estimates for a single full pass of model at input size 224 x 224:
5 |
6 | * Memory required for features: 63 MB
7 | * Flops: 20 GFLOPs
8 |
9 | Estimates are given below of the burden of computing the `pool5` features in the network for different input sizes using a batch size of 128:
10 |
11 | | input size | feature size | feature memory | flops |
12 | |------------|--------------|----------------|-------|
13 | | 112 x 112 | 4 x 4 x 512 | 2 GB | 626 GFLOPs |
14 | | 224 x 224 | 7 x 7 x 512 | 8 GB | 3 TFLOPs |
15 | | 336 x 336 | 11 x 11 x 512 | 18 GB | 6 TFLOPs |
16 | | 448 x 448 | 14 x 14 x 512 | 31 GB | 10 TFLOPs |
17 | | 560 x 560 | 18 x 18 x 512 | 49 GB | 16 TFLOPs |
18 | | 672 x 672 | 21 x 21 x 512 | 70 GB | 23 TFLOPs |
19 |
20 | A rough outline of where in the network memory is allocated to parameters and features and where the greatest computational cost lies is shown below. The x-axis does not show labels (it becomes hard to read for networks containing hundreds of layers) - it should be interpreted as depicting increasing depth from left to right. The goal is simply to give some idea of the overall profile of the model:
21 |
22 | 
23 |
--------------------------------------------------------------------------------
/setup_convnet_burden.m:
--------------------------------------------------------------------------------
1 | function setup_convnet_burden()
2 | %SETUP_CONVNET_BURDEN Sets up convnet-burden, by adding its folders
3 | % to the Matlab path
4 | %
5 | % Copyright (C) 2017 Samuel Albanie
6 | % Licensed under The MIT License [see LICENSE.md for details]
7 |
8 | check_dependency('mcnExtraLayers') ;
9 | root = fileparts(mfilename('fullpath')) ;
10 | addpath(root, [root '/matlab'], [root '/core']) ;
11 |
12 | % -----------------------------------
13 | function check_dependency(moduleName)
14 | % -----------------------------------
15 |
16 | name2path = @(name) strrep(name, '-', '_') ;
17 | setupFunc = ['setup_', name2path(moduleName)] ;
18 | if exist(setupFunc, 'file')
19 | vl_contrib('setup', moduleName) ;
20 | else
21 | % try adding the module to the path, supressing the warning
22 | warning('off', 'MATLAB:dispatcher:pathWarning') ;
23 | addpath(fullfile(vl_rootnn, 'contrib', moduleName)) ;
24 | warning('on', 'MATLAB:dispatcher:pathWarning') ;
25 |
26 | if exist(setupFunc, 'file')
27 | vl_contrib('setup', moduleName) ;
28 | else
29 | waiting = true ;
30 | msg = ['module %s was not found on the MATLAB path. Would you like ' ...
31 | 'to install it now? (y/n)\n'] ;
32 | prompt = sprintf(msg, moduleName) ;
33 | while waiting
34 | str = input(prompt,'s') ;
35 | switch str
36 | case 'y'
37 | vl_contrib('install', moduleName) ;
38 | vl_contrib('compile', moduleName) ;
39 | vl_contrib('setup', moduleName) ;
40 | return ;
41 | case 'n'
42 | throw(exception) ;
43 | otherwise
44 | fprintf('input %s not recognised, please use `y` or `n`\n', str) ;
45 | end
46 | end
47 | end
48 | end
49 |
50 |
--------------------------------------------------------------------------------