├── LICENSE.md
├── README.md
├── compute_burdens.m
├── core
    └── burden.m
├── matlab
    ├── fcn_autonn_custom_fn.m
    ├── inception_autonn_custom_fn.m
    └── squeezenet_autonn_custom_fn.m
├── misc
    └── generate_markdown.sh
├── reports
    ├── SE-BN-Inception.md
    ├── SE-ResNeXt-101-32x4d.md
    ├── SE-ResNeXt-50-32x4d.md
    ├── SE-ResNet-101.md
    ├── SE-ResNet-152.md
    ├── SE-ResNet-50.md
    ├── SENet.md
    ├── alexnet.md
    ├── caffe-ref.md
    ├── caffenet.md
    ├── deeplab-res101-v2.md
    ├── deeplab-vggvd-v2.md
    ├── densenet121.md
    ├── densenet161.md
    ├── densenet169.md
    ├── densenet201.md
    ├── faster-rcnn-vggvd-pascal.md
    ├── figs
    │   ├── SE-BN-Inception.png
    │   ├── SE-ResNeXt-101-32x4d.png
    │   ├── SE-ResNeXt-50-32x4d.png
    │   ├── SE-ResNet-101.png
    │   ├── SE-ResNet-152.png
    │   ├── SE-ResNet-50.png
    │   ├── SENet.png
    │   ├── alexnet.png
    │   ├── caffe-ref.png
    │   ├── caffenet.png
    │   ├── deeplab-res101-v2.png
    │   ├── deeplab-vggvd-v2.png
    │   ├── densenet121.png
    │   ├── densenet161.png
    │   ├── densenet169.png
    │   ├── densenet201.png
    │   ├── faster-rcnn-vggvd-pascal.png
    │   ├── googlenet.png
    │   ├── inception-v3.png
    │   ├── matconvnet-alex.png
    │   ├── mcn-mobilenet-v2.png
    │   ├── mcn-mobilenet.png
    │   ├── multipose-coco.png
    │   ├── multipose-mpi.png
    │   ├── pascal-fcn16s.png
    │   ├── pascal-fcn32s.png
    │   ├── pascal-fcn8s.png
    │   ├── resnet-101.png
    │   ├── resnet-152.png
    │   ├── resnet-50.png
    │   ├── resnet18.png
    │   ├── resnet34.png
    │   ├── resnet50.png
    │   ├── resnext-101-32x4d.png
    │   ├── resnext-101-64x4d.png
    │   ├── resnext-50-32x4d.png
    │   ├── rfcn-res101-pascal.png
    │   ├── rfcn-res50-pascal.png
    │   ├── squeezenet1-0.png
    │   ├── squeezenet1-1.png
    │   ├── ssd-mcn-pascal-vggvd-300.png
    │   ├── ssd-mcn-pascal-vggvd-512.png
    │   ├── ssd-pascal-mobilenet-ft.png
    │   ├── ssd-pascal-vggvd-300.png
    │   ├── ssd-pascal-vggvd-512.png
    │   ├── vgg-f.png
    │   ├── vgg-m-1024.png
    │   ├── vgg-m-128.png
    │   ├── vgg-m-2048.png
    │   ├── vgg-m.png
    │   ├── vgg-s.png
    │   ├── vgg-vd-16-atrous.png
    │   ├── vgg-vd-16.png
    │   └── vgg-vd-19.png
    ├── googlenet.md
    ├── inception-v3.md
    ├── matconvnet-alex.md
    ├── mcn-mobilenet-v2.md
    ├── mcn-mobilenet.md
    ├── multipose-coco.md
    ├── multipose-mpi.md
    ├── pascal-fcn16s.md
    ├── pascal-fcn32s.md
    ├── pascal-fcn8s.md
    ├── resnet-101.md
    ├── resnet-152.md
    ├── resnet-50.md
    ├── resnet18.md
    ├── resnet34.md
    ├── resnet50.md
    ├── resnext-101-32x4d.md
    ├── resnext-101-64x4d.md
    ├── resnext-50-32x4d.md
    ├── rfcn-res101-pascal.md
    ├── rfcn-res50-pascal.md
    ├── squeezenet1-0.md
    ├── squeezenet1-1.md
    ├── ssd-pascal-mobilenet-ft.md
    ├── ssd-pascal-vggvd-300.md
    ├── ssd-pascal-vggvd-512.md
    ├── vgg-f.md
    ├── vgg-m-1024.md
    ├── vgg-m-128.md
    ├── vgg-m-2048.md
    ├── vgg-m.md
    ├── vgg-s.md
    ├── vgg-vd-16-atrous.md
    ├── vgg-vd-16.md
    └── vgg-vd-19.md
└── setup_convnet_burden.m


/LICENSE.md:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2017 Samuel Albanie
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | convnet-burden
  2 | ---
  3 | 
  4 | Estimates of memory consumption and FLOP counts for various convolutional neural networks.
  5 | 
  6 | 
  7 | ### Image Classification Architectures
  8 | 
  9 | The numbers below are given for single element batches. 
 10 | 
 11 | | model | input size | param mem | feat. mem | flops | src | performance |
 12 | |-------|------------|--------------|----------------|-------|-----|-------------|
 13 | | [alexnet](reports/alexnet.md) | 227 x 227 | 233 MB | 3 MB | 727 MFLOPs | MCN | 41.80 / 19.20 |
 14 | | [caffenet](reports/caffenet.md) | 224 x 224 | 233 MB | 3 MB | 724 MFLOPs | MCN | 42.60 / 19.70 |
 15 | | [squeezenet1-0](reports/squeezenet1-0.md) | 224 x 224 | 5 MB | 30 MB | 837 MFLOPs | PT | 41.90 / 19.58 |
 16 | | [squeezenet1-1](reports/squeezenet1-1.md) | 224 x 224 | 5 MB | 17 MB | 360 MFLOPs | PT | 41.81 / 19.38 |
 17 | | [vgg-f](reports/vgg-f.md) | 224 x 224 | 232 MB | 4 MB | 727 MFLOPs | MCN | 41.40 / 19.10 |
 18 | | [vgg-m](reports/vgg-m.md) | 224 x 224 | 393 MB | 12 MB | 2 GFLOPs | MCN | 36.90 / 15.50 |
 19 | | [vgg-s](reports/vgg-s.md) | 224 x 224 | 393 MB | 12 MB | 3 GFLOPs | MCN | 37.00 / 15.80 |
 20 | | [vgg-m-2048](reports/vgg-m-2048.md) | 224 x 224 | 353 MB | 12 MB | 2 GFLOPs | MCN | 37.10 / 15.80 |
 21 | | [vgg-m-1024](reports/vgg-m-1024.md) | 224 x 224 | 333 MB | 12 MB | 2 GFLOPs | MCN | 37.80 / 16.10 |
 22 | | [vgg-m-128](reports/vgg-m-128.md) | 224 x 224 | 315 MB | 12 MB | 2 GFLOPs | MCN | 40.80 / 18.40 |
 23 | | [vgg-vd-16-atrous](reports/vgg-vd-16-atrous.md) | 224 x 224 | 82 MB | 58 MB | 16 GFLOPs | N/A | - / -  |
 24 | | [vgg-vd-16](reports/vgg-vd-16.md) | 224 x 224 | 528 MB | 58 MB | 16 GFLOPs | MCN | 28.50 / 9.90 |
 25 | | [vgg-vd-19](reports/vgg-vd-19.md) | 224 x 224 | 548 MB | 63 MB | 20 GFLOPs | MCN | 28.70 / 9.90 |
 26 | | [googlenet](reports/googlenet.md) | 224 x 224 | 51 MB | 26 MB | 2 GFLOPs | MCN | 34.20 / 12.90 |
 27 | | [resnet18](reports/resnet18.md) | 224 x 224 | 45 MB | 23 MB | 2 GFLOPs | PT | 30.24 / 10.92 |
 28 | | [resnet34](reports/resnet34.md) | 224 x 224 | 83 MB | 35 MB | 4 GFLOPs | PT | 26.70 / 8.58 |
 29 | | [resnet-50](reports/resnet-50.md) | 224 x 224 | 98 MB | 103 MB | 4 GFLOPs | MCN | 24.60 / 7.70 |
 30 | | [resnet-101](reports/resnet-101.md) | 224 x 224 | 170 MB | 155 MB | 8 GFLOPs | MCN | 23.40 / 7.00 |
 31 | | [resnet-152](reports/resnet-152.md) | 224 x 224 | 230 MB | 219 MB | 11 GFLOPs | MCN | 23.00 / 6.70 |
 32 | | [resnext-50-32x4d](reports/resnext-50-32x4d.md) | 224 x 224 | 96 MB | 132 MB | 4 GFLOPs | L1 | 22.60 / 6.49 |
 33 | | [resnext-101-32x4d](reports/resnext-101-32x4d.md) | 224 x 224 | 169 MB | 197 MB | 8 GFLOPs | L1 | 21.55 / 5.93 |
 34 | | [resnext-101-64x4d](reports/resnext-101-64x4d.md) | 224 x 224 | 319 MB | 273 MB | 16 GFLOPs | PT | 20.81 / 5.66 |
 35 | | [inception-v3](reports/inception-v3.md) | 299 x 299 | 91 MB | 89 MB | 6 GFLOPs | PT | 22.55 / 6.44 |
 36 | | [SE-ResNet-50](reports/SE-ResNet-50.md) | 224 x 224 | 107 MB | 103 MB | 4 GFLOPs | SE | 22.37 / 6.36 |
 37 | | [SE-ResNet-101](reports/SE-ResNet-101.md) | 224 x 224 | 189 MB | 155 MB | 8 GFLOPs | SE | 21.75 / 5.72 |
 38 | | [SE-ResNet-152](reports/SE-ResNet-152.md) | 224 x 224 | 255 MB | 220 MB | 11 GFLOPs | SE | 21.34 / 5.54 |
 39 | | [SE-ResNeXt-50-32x4d](reports/SE-ResNeXt-50-32x4d.md) | 224 x 224 | 105 MB | 132 MB | 4 GFLOPs | SE | 20.97 / 5.54 |
 40 | | [SE-ResNeXt-101-32x4d](reports/SE-ResNeXt-101-32x4d.md) | 224 x 224 | 187 MB | 197 MB | 8 GFLOPs | SE | 19.81 / 4.96 |
 41 | | [SENet](reports/SENet.md) | 224 x 224 | 440 MB | 347 MB | 21 GFLOPs | SE | 18.68 / 4.47 |
 42 | | [SE-BN-Inception](reports/SE-BN-Inception.md) | 224 x 224 | 46 MB | 43 MB | 2 GFLOPs | SE | 23.62 / 7.04 |
 43 | | [densenet121](reports/densenet121.md) | 224 x 224 | 31 MB | 126 MB | 3 GFLOPs | PT | 25.35 / 7.83 |
 44 | | [densenet161](reports/densenet161.md) | 224 x 224 | 110 MB | 235 MB | 8 GFLOPs | PT | 22.35 / 6.20 |
 45 | | [densenet169](reports/densenet169.md) | 224 x 224 | 55 MB | 152 MB | 3 GFLOPs | PT | 24.00 / 7.00 |
 46 | | [densenet201](reports/densenet201.md) | 224 x 224 | 77 MB | 196 MB | 4 GFLOPs | PT | 22.80 / 6.43 |
 47 | | [mcn-mobilenet](reports/mcn-mobilenet.md) | 224 x 224 | 16 MB | 38 MB | 579 MFLOPs | AU | 29.40 / - |
 48 | 
 49 | 
 50 | Click on the model name for a more detailed breakdown of feature extraction costs at different input image/batch sizes if needed.  The performance numbers are reported as `top-1 error/top-5 error` on the 2012 ILSVRC validation data.  The `src` column indicates the source of the benchmark scores using the following abberviations:
 51 | 
 52 | * [MCN](http://www.vlfeat.org/matconvnet/pretrained/) - scores obtained from the matconvnet website.
 53 | * [PT](http://pytorch.org/docs/master/torchvision/models.html) - scores obtained from the PyTorch torchvision module.
 54 | * [L1](https://github.com/albanie/mcnPyTorch/blob/master/benchmarks/cnn_imagenet_pt_mcn.m) - evaluated locally (follow link to view benchmark code).
 55 | * AU - numbers reported by the paper authors.
 56 | 
 57 | 
 58 | These numbers provide an estimate of performance, but note that there may be small differences between the evaluation scripts from different sources.
 59 | 
 60 | **References:**
 61 | 
 62 | * [alexnet](http://papers.nips.cc/paper/4824-imagenet-classification-with-deep-convolutional-neural-networks) - *Krizhevsky, Alex, Ilya Sutskever, and Geoffrey E. Hinton. "Imagenet classification with deep convolutional neural networks." Advances in neural information processing systems. 2012.*  
 63 | * [squeezenet](https://arxiv.org/abs/1602.07360) - *Iandola, Forrest N., et al. "SqueezeNet: AlexNet-level accuracy with 50x fewer parameters and< 0.5 MB model size." arXiv preprint arXiv:1602.07360 (2016).*
 64 | * [vgg-m](https://arxiv.org/abs/1405.3531) -  *Chatfield, Ken, et al. "Return of the devil in the details: Delving deep into convolutional nets." arXiv preprint arXiv:1405.3531 (2014).*
 65 | * [vgg-vd-16/vgg-vd-19](https://arxiv.org/abs/1409.1556) -  *Simonyan, Karen, and Andrew Zisserman. "Very deep convolutional networks for large-scale image recognition." arXiv preprint arXiv:1409.1556 (2014).*
 66 | * [vgg-vd-16-reduced](https://arxiv.org/abs/1506.04579) - *Liu, Wei, Andrew Rabinovich, and Alexander C. Berg. "Parsenet: Looking wider to see better." arXiv preprint arXiv:1506.04579 (2015)*
 67 | * [googlenet](http://www.cv-foundation.org/openaccess/content_cvpr_2015/html/Szegedy_Going_Deeper_With_2015_CVPR_paper.html) - *Szegedy, Christian, et al. "Going deeper with convolutions." Proceedings of the IEEE conference on computer vision and pattern recognition. 2015.*
 68 | * [inception](https://arxiv.org/abs/1512.00567) - *Szegedy, Christian, et al. "Rethinking the inception architecture for computer vision." Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition. 2016.*
 69 | * [resnet](https://arxiv.org/abs/1512.03385) - *He, Kaiming, et al. "Deep residual learning for image recognition." Proceedings of the IEEE conference on computer vision and pattern recognition. 2016.*
 70 | * [resnext](https://arxiv.org/abs/1611.05431) - *Xie, Saining, et al. "Aggregated residual transformations for deep neural networks." arXiv preprint arXiv:1611.05431 (2016).*
 71 | * [SENets](https://arxiv.org/abs/1709.01507) - *Jie Hu, Li Shen and Gang Sun. "Squeeze-and-Excitation Networks." arXiv preprint arXiv:1709.01507 (2017).*
 72 | * [Densenet](https://arxiv.org/abs/1608.06993) - *Huang, Gao, et al. "Densely connected convolutional networks." CVPR, (2017).*
 73 | 
 74 | ### Object Detection Architectures
 75 | 
 76 | | model | input size | param memory | feature memory | flops |
 77 | |-------|------------|--------------|----------------|-------|
 78 | | [rfcn-res50-pascal](reports/rfcn-res50-pascal.md) | 600 x 850 | 122 MB | 1 GB | 79 GFLOPS|
 79 | | [rfcn-res101-pascal](reports/rfcn-res101-pascal.md) | 600 x 850 | 194 MB | 2 GB | 117 GFLOPS|
 80 | | [ssd-pascal-vggvd-300](reports/ssd-pascal-vggvd-300.md) | 300 x 300 | 100 MB | 116 MB | 31 GFLOPS|
 81 | | [ssd-pascal-vggvd-512](reports/ssd-pascal-vggvd-512.md) | 512 x 512 | 104 MB | 337 MB | 91 GFLOPS|
 82 | | [ssd-pascal-mobilenet-ft](reports/ssd-pascal-mobilenet-ft.md) | 300 x 300 | 22 MB | 37 MB | 1 GFLOPs|
 83 | | [faster-rcnn-vggvd-pascal](reports/faster-rcnn-vggvd-pascal.md) | 600 x 850 | 523 MB | 600 MB | 172 GFLOPS|
 84 | 
 85 | The input sizes used are "typical" for each of the architectures listed, but can be varied.  *Anchor/priorbox* generation and *roi/psroi*-pooling are not included in flop estimates.  The *ssd-pascal-mobilenet-ft* detector uses the MobileNet feature extractor (the model used here was imported from the architecture made available by [chuanqi305](https://github.com/chuanqi305/MobileNet-SSD)).
 86 | 
 87 | **References:**
 88 | 
 89 | * [faster-rcnn](http://papers.nips.cc/paper/5638-faster-r-cnn-towards-real-time-object-detection-with-region-proposal-networks) - *Ren, Shaoqing, et al. "Faster R-CNN: Towards real-time object detection with region proposal networks." Advances in neural information processing systems. 2015..*  
 90 | * [r-fcn](https://arxiv.org/abs/1605.06409) - *Li, Yi, Kaiming He, and Jian Sun. "R-fcn: Object detection via region-based fully convolutional networks." Advances in Neural Information Processing Systems. 2016.*
 91 | * [ssd](https://link.springer.com/chapter/10.1007%2F978-3-319-46448-0_2) - *Liu, Wei, et al. "Ssd: Single shot multibox detector." European conference on computer vision. Springer, Cham, 2016.*
 92 | * [mobilenets](https://arxiv.org/abs/1704.04861) - *Howard, Andrew G., Menglong Zhu, Bo Chen, Dmitry Kalenichenko, Weijun Wang, Tobias Weyand, Marco Andreetto, and Hartwig Adam. "Mobilenets: Efficient convolutional neural networks for mobile vision applications." arXiv preprint arXiv:1704.04861 (2017).*
 93 | 
 94 | 
 95 | ### Semantic Segmentation Architectures
 96 | 
 97 | | model | input size | param memory | feature memory | flops |
 98 | |-------|------------|--------------|----------------|-------|
 99 | | [pascal-fcn32s](reports/pascal-fcn32s.md) | 384 x 384 | 519 MB | 423 MB | 125 GFLOPS|
100 | | [pascal-fcn16s](reports/pascal-fcn16s.md) | 384 x 384 | 514 MB | 424 MB | 125 GFLOPS|
101 | | [pascal-fcn8s](reports/pascal-fcn8s.md) | 384 x 384 | 513 MB | 426 MB | 125 GFLOPS|
102 | | [deeplab-vggvd-v2](reports/deeplab-vggvd-v2.md) | 513 x 513 | 144 MB | 755 MB | 202 GFLOPs|
103 | | [deeplab-res101-v2](reports/deeplab-res101-v2.md) | 513 x 513 | 505 MB | 4 GB | 346 GFLOPs|
104 | 
105 | In this case, the input sizes are those which are typically taken as input crops during training.  The *deeplab-res101-v2* model uses multi-scale input, with scales `x1, x0.75, x0.5` (computed relative to the given input size).
106 | 
107 | **References:**
108 | 
109 | * [pascal-fcn](http://www.cv-foundation.org/openaccess/content_cvpr_2015/html/Long_Fully_Convolutional_Networks_2015_CVPR_paper.html) - *Long, Jonathan, Evan Shelhamer, and Trevor Darrell. "Fully convolutional networks for semantic segmentation." Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition. 2015..*
110 | * [deeplab](https://arxiv.org/abs/1606.00915) - *DeepLab: Semantic Image Segmentation with Deep Convolutional Nets, Atrous Convolution, and Fully Connected CRFs
111 | Liang-Chieh Chen^, George Papandreou^, Iasonas Kokkinos, Kevin Murphy, and Alan L. Yuille (^equal contribution)
112 | Transactions on Pattern Analysis and Machine Intelligence (TPAMI)*
113 | 
114 | ### Keypoint Detection Architectures
115 | 
116 | | model | input size | param memory | feature memory | flops |
117 | |-------|------------|--------------|----------------|-------|
118 | | [multipose-mpi](reports/multipose-mpi.md) | 368 x 368 | 196 MB | 245 MB | 134 GFLOPS|
119 | | [multipose-coco](reports/multipose-coco.md) | 368 x 368 | 200 MB | 246 MB | 136 GFLOPS|
120 | 
121 | **References:**
122 | 
123 | * [multipose](https://arxiv.org/abs/1611.08050) - *Cao, Zhe, et al. "Realtime multi-person 2d pose estimation using part affinity fields." arXiv preprint arXiv:1611.08050 (2016)..*
124 | 
125 | 
126 | <h3>Notes and Assumptions</h3>
127 | 
128 | 
129 | The numbers for each architecture should be reasonably framework agnostic. It is assumed that all weights and activations are stored as floats (with 4 bytes per datum) and that all relus are performed in-place.  Feature memory therefore represents an estimate of the total memory consumption of the features computed via a forward pass of the network for a given input, assuming that memory is not re-used (the exception to this is that, as noted above, relus are performed in-place and do not add to the feature memory total).  In practice, many frameworks will clear features from memory when they are no-longer required by the execution path and will therefore require less memory than is noted here.  The feature memory statistic is simply a rough guide as to "how big" the activations of the network look.
130 | 
131 | Fused multiply-adds are counted as single operations.  The numbers should be considered to be rough approximations -  modern hardware makes it very difficult to accurately count operations (and even if you could, pipelining etc. means that it is not necessarily a good estimate of inference time).
132 | 
133 | The tool for computing the estimates is implemented as a module for the [autonn](https://github.com/vlfeat/autonn) wrapper of matconvnet and is included in this [repo](core/burden.m), so feel free to take a look for extra details.  This module can be installed with the `vl_contrib` package manager (it has two dependencies which can be installed in a similar manner: [autonn](https://github.com/vlfeat/autonn) and [mcnExtraLayers](https://github.com/albanie/mcnExtraLayers)). Matconvnet versions of all of the models can be obtained from either [here](http://www.vlfeat.org/matconvnet/pretrained/) or [here](http://www.robots.ox.ac.uk/~albanie/mcn-models.html).
134 | 
135 | For further reading on the topic, the 2017 ICLR submission [An analysis of deep neural network models for practical applications](https://openreview.net/pdf?id=Bygq-H9eg) is interesting.  If you find any issues, or would like to add additional models, add an issue/PR.
136 | 


--------------------------------------------------------------------------------
/compute_burdens.m:
--------------------------------------------------------------------------------
  1 | function compute_burdens(varargin)
  2 | %COMPUTE_BURDENS Compute burden estimates for common architectures
  3 | %   COMPUTE_BURDENS computes estimates of the memory and computational
  4 | %   requirements of a set of common convolutional neural network architectures.
  5 | %   COMPUTE_BURDENS(..'name', value) accepts the following
  6 | %   options:
  7 | %
  8 | %   `includeClassifiers` :: true
  9 | %    Compute burden estimates for common image classification architectures.
 10 | %
 11 | %   `includeObjDetectors` :: false
 12 | %    Compute burden estimates for common object detection architectures.
 13 | %
 14 | %   `includeSegmenters` :: false
 15 | %    Compute burden estimates for a few semantic segmentation architectures.
 16 | %
 17 | %   `includeKeypointDetectors` :: false
 18 | %    Compute burden estimates for a few keypoint detection architectures.
 19 | %
 20 | %   `logDir` :: fullfile(vl_rootnn, 'data/burden')
 21 | %    Directory location to store logged analysis.
 22 | %
 23 | %   `modelDir` :: fullfile(vl_rootnn, 'data/models-import')
 24 | %    Directory containing models to be analysed.
 25 | %
 26 | % Copyright (C) 2017 Samuel Albanie
 27 | % Licensed under The MIT License [see LICENSE.md for details]
 28 | 
 29 |   opts.includeClassifiers = true ;
 30 |   opts.includeObjDetectors = false ;
 31 |   opts.includeSegmenters = false ;
 32 |   opts.includeKeypointDetectors = false ;
 33 |   opts.logDir = fullfile(vl_rootnn, 'data/burden') ;
 34 |   opts.modelDir = fullfile(vl_rootnn, 'data/models-import') ;
 35 |   opts = vl_argparse(opts, varargin) ;
 36 | 
 37 |   models = {} ; logName = 'log' ;
 38 | 
 39 |   if opts.includeClassifiers
 40 |     models = [ models { ...
 41 |        {'imagenet-matconvnet-alex.mat', [227 227], {'I', 'MCN', 41.8, 19.2}}, ...
 42 |        {'imagenet-caffe-ref.mat', [224 224], {'I', 'MCN', 42.6, 19.7}},...
 43 |        {'squeezenet1_0-pt-mcn.mat', [224 224], {'I', 'PT', 41.90, 19.58}},...
 44 |        {'squeezenet1_1-pt-mcn.mat', [224 224], {'I', 'PT', 41.81, 19.38}}...
 45 |        {'imagenet-vgg-f.mat', [224 224], {'I', 'MCN', 41.4, 19.1}},...
 46 |        {'imagenet-vgg-m.mat', [224 224], {'I', 'MCN', 36.9, 15.5}},...
 47 |        {'imagenet-vgg-s.mat', [224 224], {'I', 'MCN', 37.0, 15.8}},...
 48 |        {'imagenet-vgg-m-2048.mat', [224 224], {'I', 'MCN', 37.1, 15.8}},...
 49 |        {'imagenet-vgg-m-1024.mat', [224 224], {'I', 'MCN', 37.8, 16.1}},...
 50 |        {'imagenet-vgg-m-128.mat', [224 224], {'I', 'MCN', 40.8, 18.4}},...
 51 |        {'vgg-vd-16-reduced.mat', [224 224], {'I', 'N/A', '', ''}},...
 52 |        {'imagenet-vgg-verydeep-16.mat', [224 224], {'I', 'MCN', 28.5, 9.9}},...
 53 |        {'imagenet-vgg-verydeep-19.mat', [224 224], {'I', 'MCN', 28.7, 9.9}},...
 54 |        {'imagenet-googlenet-dag.mat', [224 224], {'I', 'MCN', 34.2, 12.9}},...
 55 |        {'resnet18-pt-mcn.mat', [224 224], {'I', 'PT', 30.24, 10.92}},...
 56 |        {'resnet34-pt-mcn.mat', [224 224], {'I', 'PT', 26.70, 8.58}},...
 57 |        {'imagenet-resnet-50-dag.mat', [224 224], {'I', 'MCN', 24.6, 7.7}},...
 58 |        {'imagenet-resnet-101-dag.mat', [224 224], {'I', 'MCN', 23.4, 7.0}},...
 59 |        {'imagenet-resnet-152-dag.mat', [224 224], {'I', 'MCN', 23.0, 6.7}},...
 60 |        {'resnext_50_32x4d-pt-mcn.mat', [224 224], {'I', 'L1', 22.6, 6.49}},...
 61 |        {'resnext_101_32x4d-pt-mcn.mat', [224 224], {'I', 'L1', 21.55, 5.93}},...
 62 |        {'resnext_101_64x4d-pt-mcn.mat', [224 224], {'I', 'PT', 20.81, 5.66}},...
 63 |        {'inception_v3-pt-mcn.mat', [299 299], {'I', 'PT', 22.55, 6.44}, 1:0.5:3},... % breaks on small inputs
 64 |        {'SE-ResNet-50-mcn.mat', [224 224], {'I', 'AU', 22.37, 6.36}},...
 65 |        {'SE-ResNet-101-mcn.mat', [224 224], {'I', 'AU', 21.75, 5.72}},...
 66 |        {'SE-ResNet-152-mcn.mat', [224 224], {'I', 'AU', 21.34, 5.54}},...
 67 |        {'SE-ResNeXt-50-32x4d-mcn.mat', [224 224], {'I', 'AU', 20.97, 5.54}},...
 68 |        {'SE-ResNeXt-101-32x4d-mcn.mat', [224 224], {'I', 'AU', 19.81, 4.96}},...
 69 |        {'SENet-mcn.mat', [224 224], {'I', 'AU', 18.68, 4.47}},...
 70 |        {'SE-BN-Inception-mcn.mat', [224 224], {'I', 'AU', 23.62, 7.04}, 1}... % breaks on most inputs
 71 |        {'densenet121-pt-mcn.mat', [224 224], {'I', 'PT', 25.35, 7.83}, 1:0.5:3},...
 72 |        {'densenet161-pt-mcn.mat', [224 224], {'I', 'PT', 22.35, 6.20}, 1:0.5:3},...
 73 |        {'densenet169-pt-mcn.mat', [224 224], {'I', 'PT', 24.00, 7.00}, 1:0.5:3},...
 74 |        {'densenet201-pt-mcn.mat', [224 224], {'I', 'PT', 22.80, 6.43}, 1:0.5:3},...
 75 |         }] ;
 76 |     models = [ models { ...
 77 |        {'imagenet-matconvnet-alex.mat', [227 227], {'I', 'MCN', 41.8, 19.2}}, ...
 78 |        {'imagenet-caffe-ref.mat', [224 224], {'I', 'MCN', 42.6, 19.7}},...
 79 |        {'squeezenet1_0-pt-mcn.mat', [224 224], {'I', 'PT', 41.90, 19.58}},...
 80 |        {'squeezenet1_1-pt-mcn.mat', [224 224], {'I', 'PT', 41.81, 19.38}}...
 81 |        {'imagenet-vgg-f.mat', [224 224], {'I', 'MCN', 41.4, 19.1}},...
 82 |        {'imagenet-vgg-m.mat', [224 224], {'I', 'MCN', 36.9, 15.5}},...
 83 |        {'imagenet-vgg-s.mat', [224 224], {'I', 'MCN', 37.0, 15.8}},...
 84 |        {'imagenet-vgg-m-2048.mat', [224 224], {'I', 'MCN', 37.1, 15.8}},...
 85 |        {'imagenet-vgg-m-1024.mat', [224 224], {'I', 'MCN', 37.8, 16.1}},...
 86 |        {'imagenet-vgg-m-128.mat', [224 224], {'I', 'MCN', 40.8, 18.4}},...
 87 |        {'vgg-vd-16-reduced.mat', [224 224], {'I', 'N/A', '', ''}},...
 88 |        {'imagenet-vgg-verydeep-16.mat', [224 224], {'I', 'MCN', 28.5, 9.9}},...
 89 |        {'imagenet-vgg-verydeep-19.mat', [224 224], {'I', 'MCN', 28.7, 9.9}},...
 90 |        {'imagenet-googlenet-dag.mat', [224 224], {'I', 'MCN', 34.2, 12.9}},...
 91 |        {'resnet18-pt-mcn.mat', [224 224], {'I', 'PT', 30.24, 10.92}},...
 92 |        {'resnet34-pt-mcn.mat', [224 224], {'I', 'PT', 26.70, 8.58}},...
 93 |        {'imagenet-resnet-50-dag.mat', [224 224], {'I', 'MCN', 24.6, 7.7}},...
 94 |        {'imagenet-resnet-101-dag.mat', [224 224], {'I', 'MCN', 23.4, 7.0}},...
 95 |        {'imagenet-resnet-152-dag.mat', [224 224], {'I', 'MCN', 23.0, 6.7}},...
 96 |        {'resnext_50_32x4d-pt-mcn.mat', [224 224], {'I', 'L1', 22.6, 6.49}},...
 97 |        {'resnext_101_32x4d-pt-mcn.mat', [224 224], {'I', 'L1', 21.55, 5.93}},...
 98 |        {'resnext_101_64x4d-pt-mcn.mat', [224 224], {'I', 'PT', 20.81, 5.66}},...
 99 |        {'inception_v3-pt-mcn.mat', [299 299], {'I', 'PT', 22.55, 6.44}, 1:0.5:3},... % breaks on small inputs
100 |        {'SE-ResNet-50-mcn.mat', [224 224], {'I', 'AU', 22.37, 6.36}},...
101 |        {'SE-ResNet-101-mcn.mat', [224 224], {'I', 'AU', 21.75, 5.72}},...
102 |        {'SE-ResNet-152-mcn.mat', [224 224], {'I', 'AU', 21.34, 5.54}},...
103 |        {'SE-ResNeXt-50-32x4d-mcn.mat', [224 224], {'I', 'AU', 20.97, 5.54}},...
104 |        {'SE-ResNeXt-101-32x4d-mcn.mat', [224 224], {'I', 'AU', 19.81, 4.96}},...
105 |        {'SENet-mcn.mat', [224 224], {'I', 'AU', 18.68, 4.47}},...
106 |        {'SE-BN-Inception-mcn.mat', [224 224], {'I', 'AU', 23.62, 7.04}, 1}... % breaks on most inputs
107 |        {'densenet121-pt-mcn.mat', [224 224], {'I', 'PT', 25.35, 7.83}, 1:0.5:3},...
108 |        {'densenet161-pt-mcn.mat', [224 224], {'I', 'PT', 22.35, 6.20}, 1:0.5:3},...
109 |        {'densenet169-pt-mcn.mat', [224 224], {'I', 'PT', 24.00, 7.00}, 1:0.5:3},...
110 |        {'densenet201-pt-mcn.mat', [224 224], {'I', 'PT', 22.80, 6.43}, 1:0.5:3},...
111 |        } ] ;
112 |     logName = [ logName '-cls'] ;
113 |     models = [{...
114 |         {'mcn-mobilenet.mat', [224 224], {'I', 'AU', 29.4, '-'}, 1:0.5:3}, ...
115 |         }] ;
116 |       % fix later
117 |       % {'mcn-mobilenet-v2.mat', [224 224], {'I', 'AU', 29.4, '-'}, 1:0.5:3} ...
118 |    end
119 | 
120 |   if opts.includeObjDetectors
121 |     models = [ models { ...
122 |       {'ssd-pascal-mobilenet-ft.mat', [300 300]}, ...
123 |       {'rfcn-res50-pascal', [600 850]}, ...
124 |       {'rfcn-res101-pascal', [600 850]}, ...
125 |       {'ssd-mcn-pascal-vggvd-300.mat', [300 300]}, ...
126 |       {'ssd-mcn-pascal-vggvd-512.mat', [512 512]}, ...
127 |       {'faster-rcnn-vggvd-pascal', [600 850]}, ...
128 |       } ] ;
129 |     logName = [logName '-det'] ;
130 |    end
131 | 
132 |   if opts.includeSegmenters
133 |     models = [ models { ...
134 |       {'pascal-fcn32s-dag.mat', [384 384]}, ...
135 |       {'pascal-fcn16s-dag.mat', [384 384]}, ...
136 |       {'pascal-fcn8s-dag.mat', [384 384]}, ...
137 |       {'deeplab-vggvd-v2.mat', [513 513]}, ...
138 |       {'deeplab-res101-v2.mat', [513 513]}, ...
139 |       } ] ;
140 |     logName = [logName '-seg'] ;
141 |   end
142 | 
143 |   if opts.includeKeypointDetectors
144 |     models = [ models { ...
145 |       {'multipose-mpi.mat', [368 368]}, ...
146 |       {'multipose-coco.mat', [368 368]}, ...
147 |       } ] ;
148 |     logName = [logName '-key'] ;
149 |   end
150 | 
151 |   if ~exist(opts.logDir, 'dir'), mkdir(opts.logDir) ; end
152 |   logFile = fullfile(opts.logDir, [logName '.txt']) ;
153 |   diary(logFile) ; diary on ;
154 | 
155 |   for ii = 1:numel(models)
156 |     mm =  models{ii} ;
157 |     if numel(mm) == 4, sc = {'scales', mm{4}} ; else, sc = {} ; end
158 |     modelPath = fullfile(opts.modelDir, mm{1}) ;
159 |     burden('modelPath', modelPath, 'imsz', mm{2}, 'scores', mm{3}, sc{:}) ;
160 |   end
161 |   diary off ;
162 | 


--------------------------------------------------------------------------------
/core/burden.m:
--------------------------------------------------------------------------------
  1 | function burden(varargin)
  2 | %BURDEN compute memory and computational burden of network %
  3 | % Copyright (C) 2017 Samuel Albanie
  4 | % Licensed under The MIT License [see LICENSE.md for details]
  5 | 
  6 |   opts.gpus = 1 ;
  7 |   opts.helper = [] ;
  8 |   opts.imsz = [224 224] ;
  9 |   opts.type = 'single' ;
 10 |   opts.scores = {} ;
 11 |   opts.batchSize = 128 ;
 12 |   opts.lastConvFeats = '' ;
 13 |   opts.scales = 0.5:0.5:3 ;
 14 |   opts.reportDir = fullfile(vl_rootnn, 'contrib/convnet-burden/reports') ;
 15 |   opts.modelPath = 'data/models-import/imagenet-matconvnet-alex.mat' ;
 16 |   opts = vl_argparse(opts, varargin) ;
 17 | 
 18 |   useGpu = numel(opts.gpus) > 0 ; dag = loadDagNN(opts) ;
 19 | 
 20 |   % set options which are specific to current model
 21 |   [~,modelName,~] = fileparts(opts.modelPath) ;
 22 |   modelOpts.name = modelName ; modelOpts.inputVars = dag.getInputs() ;
 23 |   modelOpts.lastConvFeats = getLastFullyConv(modelName, opts) ;
 24 |   opts.modelOpts = modelOpts ; out = toAutonn(dag, opts) ; net = Net(out{:}) ;
 25 | 
 26 |   if useGpu, net.move('gpu') ; end
 27 |   imsz = opts.imsz ;
 28 |   baseParams = computeBurden(net, 'params', imsz, opts) ;
 29 |   base.paramMem = sum(baseParams) ;
 30 |   [featMem,flops] = computeBurden(net, 'full', imsz, opts) ;
 31 |   base.featMem = sum(featMem) ; base.flops = sum(flops) ;
 32 |   base.scores = opts.scores ;
 33 |   plotProfile(baseParams, featMem, flops, opts) ;
 34 | 
 35 |   % find fully convolutional component
 36 |   if ~isempty(modelOpts.lastConvFeats)
 37 |     for ii = 1:numel(out) % to avoid hardcoding head ordering, try them in turn
 38 |       try tail = out{ii}.find(modelOpts.lastConvFeats, 1) ; break
 39 |       catch ME, tail = [] ; %#ok -> continue to try remaining heads
 40 |       end
 41 |     end
 42 |     trunk = Net(tail) ;
 43 |     if useGpu, trunk.move('gpu') ; end
 44 |   else
 45 |     trunk = net ;
 46 |   end
 47 |   report(numel(opts.scales)).imsz = [] ;
 48 | 
 49 |   for ii = 1:numel(opts.scales)
 50 |     imsz_ = round(imsz * opts.scales(ii)) ;
 51 |     [mem_, flops_, lastSz] = computeBurden(trunk, 'feats', imsz_, opts) ;
 52 |     mem = sum(mem_) * opts.batchSize ; flops = sum(flops_) * opts.batchSize ;
 53 |     report(ii).imsz = sprintf('%d x %d', imsz_) ;
 54 |     report(ii).flops = readableFlops(flops) ;
 55 |     report(ii).featMem = readableMemory(mem) ;
 56 |     report(ii).featSz = sprintf('%d x %d x %d', lastSz) ;
 57 |   end
 58 |   printReport(base, report, opts) ;
 59 |   if useGpu, trunk.move('cpu') ; end
 60 | 
 61 | % --------------------------------------
 62 | function printReport(base, report, opts)
 63 | % --------------------------------------
 64 |   modelName = readableName(opts.modelOpts.name) ;
 65 | 
 66 |   % produce readable output
 67 |   header = sprintf('Report for %s\n', modelName) ;
 68 |   fprintf('%s\n', repmat('-', 1, numel(header))) ;
 69 |   fprintf(header) ;
 70 |   fprintf('Data type of feats and params: %s\n', opts.type) ; % for humans
 71 |   fprintf('Memory used by params: %s\n', readableMemory(base.paramMem)) ;
 72 | 
 73 |   msg1 = 'Computing burden for single item batch at imsz %s: \n' ;
 74 |   msg2 = '    Memory consumed by full feats: %s\n' ;
 75 |   msg3 = '    Estimated total flops: %s\n' ;
 76 |   baseImsz = report(opts.scales ==1).imsz ;
 77 |   fprintf(msg1, baseImsz) ;
 78 |   fprintf(msg2, readableMemory(base.featMem)) ;
 79 |   fprintf(msg3, readableFlops(base.flops)) ;
 80 | 
 81 |   msg1 = 'Computing burden for %d item batch at imsz %s: \n' ;
 82 |   msg2 = '    Memory consumed by full feats: %s\n' ;
 83 |   msg3 = '    Estimated total flops: %s\n' ;
 84 |   fprintf(msg1, opts.batchSize, baseImsz) ;
 85 |   fprintf(msg2, readableMemory(opts.batchSize*base.featMem)) ;
 86 |   fprintf(msg3, readableFlops(base.flops * opts.batchSize)) ;
 87 | 
 88 |   % produce output for shared table
 89 |   detailedReport = sprintf('reports/%s.md', modelName) ;
 90 |   stats = {readableMemory(base.paramMem), ...
 91 |            readableMemory(base.featMem), ...
 92 |            readableFlops(base.flops), ...
 93 |            readableScores(base.scores)} ; % note: scores adds two columns
 94 |   markdown = 'MD:: | [%s](%s) | %s | %s | %s | %s | %s |\n' ;
 95 |   fprintf(markdown, modelName, detailedReport, baseImsz, stats{:}) ;
 96 | 
 97 |   fprintf('%s\n', repmat('-', 1, numel(header))) ;
 98 |   msg = '\nFeature extraction burden at %s with batch size %d: \n\n' ;
 99 |   fprintf(msg, opts.modelOpts.lastConvFeats, opts.batchSize) ;
100 |   disp(struct2table(report)) ;
101 | 
102 |   % generate detailed report for feature extraction
103 |   if ~exist(opts.reportDir, 'dir'), mkdir(opts.reportDir) ; end
104 |   reportPath = fullfile(opts.reportDir, sprintf('%s.md', modelName)) ;
105 |   header = '### Report for %s\n' ;
106 |   body = ['Model params %s \n\n' ...
107 |           'Estimates for a single full pass of model at input size %s: \n' ...
108 |           '\n' ...
109 |           '* Memory required for features: %s \n' ...
110 |           '* Flops: %s \n' ...
111 |           '\n' ...
112 |           'Estimates are given below of the burden of computing the `%s` ' ...
113 |           'features in the network for different input sizes using a '...
114 |           'batch size of %d: \n\n'] ;
115 |   bodyArgs = {readableMemory(base.paramMem), baseImsz, ...
116 |              readableMemory(base.featMem), readableFlops(base.flops), ...
117 |              opts.modelOpts.lastConvFeats, opts.batchSize} ;
118 | 
119 |   tableHeader = ['| input size | feature size | feature memory | flops | \n' ...
120 |                  '|------------|--------------|----------------|-------| \n'] ;
121 |   tableRow = '| %s | %s | %s | %s |\n' ;
122 |   graphDescription = ['\nA rough outline of where in the network memory is ' ...
123 |   'allocated to parameters and features and where the greatest computational '...
124 |   'cost lies is shown below.  The x-axis does not show labels (it becomes hard' ...
125 |   ' to read for networks containing hundreds of layers) - it should be ' ...
126 |   'interpreted as depicting increasing depth from left to right.  The goal is'  ...
127 |   ' simply to give some idea of the overall profile of the model: \n\n'] ;
128 |   graph = '![%s profile](figs/%s.png)\n' ;
129 | 
130 |   fid = fopen(reportPath, 'w') ;
131 |   fprintf(fid, header, modelName) ;
132 |   fprintf(fid, body, bodyArgs{:}) ;
133 |   fprintf(fid, tableHeader) ;
134 |   for ii = 1:numel(report)
135 |     rec = report(ii) ;
136 |     fprintf(fid, tableRow, rec.imsz, rec.featSz, rec.featMem, rec.flops) ;
137 |   end
138 |   fprintf(fid, graphDescription) ;
139 |   fprintf(fid, graph, modelName, modelName) ;
140 |   fclose(fid) ;
141 | 
142 | % ----------------------------------------------------
143 | function plotProfile(baseParams, featMem, flops, opts)
144 | % ----------------------------------------------------
145 |   subplot(3,1,1) ;
146 |   [~,units,factor] = readableMemory(max(baseParams)) ;
147 |   scaledParams = baseParams ./ factor ;
148 |   bar(scaledParams, 'FaceAlpha', 0.6, 'edgecolor','none') ;
149 |   title('Parameter memory profile') ;  set(gca,'xtick',[]) ;
150 |   ylabel(sprintf('memory (%s)', units)) ;
151 | 
152 |   subplot(3,1,2) ;
153 |   [~,units,factor] = readableMemory(max(featMem)) ;
154 |   scaledFeats = featMem ./ factor ;
155 |   bar(scaledFeats, 'FaceAlpha', 0.4, 'FaceColor', 'r', 'edgecolor','none') ;
156 |   title('Feature memory profile') ;  set(gca,'xtick',[]) ;
157 |   ylabel(sprintf('memory (%s)', units)) ;
158 | 
159 |   subplot(3,1,3) ;
160 |   [~,units,factor] = readableFlops(max(flops)) ;
161 |   scaledFlops = flops ./ factor ;
162 |   bar(scaledFlops, 'FaceAlpha', 0.3, 'FaceColor', 'm', 'edgecolor','none') ;
163 |   title('Flops profile') ;  set(gca,'xtick',[]) ;
164 |   ylabel(sprintf('%sFLOPS', units)) ; xlabel('depth') ;
165 |   figDir = fullfile(opts.reportDir, 'figs') ;
166 |   if ~exist(figDir, 'dir'), mkdir(figDir) ; end
167 |   figName = sprintf('%s.png', readableName(opts.modelOpts.name)) ;
168 |   figPath = fullfile(figDir, figName)  ;
169 |   print(figPath, '-dpng') ;
170 | 
171 | % -------------------------------------
172 | function name = readableName(modelName)
173 | % -------------------------------------
174 | % READABLENAME(MODELNAME) renames the model to its canonical name
175 | % for easier reading
176 | 
177 | name = strrep(modelName, '_', '-') ; % use consistent separators
178 | name = strrep(name, 'imagenet-', '') ; % clean up prefixes
179 | name = strrep(name, '-pt-mcn', '') ; % clean up suffixes
180 | name = strrep(name, '-mcn', '') ;
181 | name = strrep(name, '-dag', '') ;
182 | name = strrep(name, 'verydeep', 'vd') ; % consistent naming
183 | name = strrep(name, 'reduced', 'atrous') ;
184 | 
185 | switch name % handle special cases
186 |   case 'matconvnet-alex', name = 'alexnet' ;
187 |   case 'caffe-ref', name = 'caffenet' ;
188 | end
189 | 
190 | % ----------------------------------------------------
191 | function [memStr, units, factor] = readableMemory(mem)
192 | % ----------------------------------------------------
193 | % READABLEMEMORY(MEM) convert total raw bytes into more readable summary
194 | % based on J. Henriques' autonn varDisplay() function
195 | 
196 |   suffixes = {'B', 'KB', 'MB', 'GB', 'TB', 'PB', 'EB'} ;
197 |   place = floor(log(mem) / log(1024)) ;  % 0-based index into 'suffixes'
198 |   place(mem == 0) = 0 ;  % 0 bytes needs special handling
199 |   num = mem ./ (1024 .^ place) ; memStr = num2str(num, '%.0f') ;
200 |   memStr(:,end+1) = ' ' ; units = suffixes{max(1, place + 1)} ;
201 |   memStr = [memStr, char(units)] ; factor = 1024^(max(place,1)) ;
202 |   memStr(isnan(mem),:) = ' ' ;  % leave invalid values blank
203 | 
204 | % ------------------------------------------------------
205 | function scoreStr = readableScores(scores)
206 | % ------------------------------------------------------
207 | % READABLESCORES(SCORES) produce a summary string describing model
208 | % performance
209 |   format = scores{1} ; res = scores(2:end) ;
210 |   switch format
211 |     case 'I'
212 |       if strcmp(res{1}, 'N/A')
213 |         scoreStr = 'N/A | - / - ' ;
214 |       else
215 |         template = '%s |' ;
216 |         if isa(res{2}, 'double')
217 |           template = [template ' %.2f'] ;
218 |         else
219 |           template = [template ' %s'] ;
220 |         end
221 |         if isa(res{3}, 'double')
222 |           template = [template ' / %.2f'] ;
223 |         else
224 |           template = [template ' / %s'] ;
225 |         end
226 |         scoreStr = sprintf(template, res{:}) ; % imagenet
227 |       end
228 |   end
229 | 
230 | % ------------------------------------------------------
231 | function [flopStr, units, factor] = readableFlops(flops)
232 | % ------------------------------------------------------
233 | % READABLEFLOPS(FLOPS) convert total flops into more readable summary
234 | 
235 |   suffixes = {' ', 'K', 'M', 'G', 'T', 'P', 'E', 'Z', 'Y'} ;
236 |   place = floor(log(flops) / log(1000)) ;  % 0-based index into 'suffixes'
237 |   place(flops == 0) = 0 ;  % 0 bytes needs special handling
238 |   num = flops ./ (1000 .^ place) ; flopStr = num2str(num, '%.0f') ;
239 |   flopStr(:,end+1) = ' ' ; units = suffixes{max(1, place + 1)} ;
240 |   flopStr = [flopStr, char(units) 'FLOPs'] ; factor = 1000^(max(place,1)) ;
241 |   flopStr(isnan(flops),:) = ' ' ;  % leave invalid values blank
242 | 
243 | % --------------------------------
244 | function dag = loadDagNN(opts)
245 | % --------------------------------
246 |   stored = load(opts.modelPath) ;
247 |   if ~isfield(stored, 'params') % simplenn
248 |     dag = dagnn.DagNN.fromSimpleNN(stored) ;
249 |   else
250 |     dag = dagnn.DagNN.loadobj(stored) ;
251 |   end
252 | 
253 | % --------------------------------
254 | function out = toAutonn(net, opts)
255 | % --------------------------------
256 | % provide required helper functions for custom architectures
257 | 
258 |   args = {net} ;
259 |   if contains(opts.modelOpts.name, 'faster-rcnn')
260 |     args = [args {@faster_rcnn_autonn_custom_fn}] ;
261 |   elseif contains(opts.modelOpts.name, 'ssd')
262 |     args = [args {@ssd_autonn_custom_fn}] ;
263 |   elseif contains(opts.modelOpts.name, 'rfcn')
264 |     args = [args {@rfcn_autonn_custom_fn}] ;
265 |   %elsef contains(opts.modelOpts.name, {'SE', '-pt', '-fcn', 'deeplab-'})
266 |   else
267 |     args = [args {@extras_autonn_custom_fn}] ;
268 |   end
269 |   out = Layer.fromDagNN(args{:}) ;
270 | 
271 | % -----------------------------------------------
272 | function last = getLastFullyConv(modelName, opts)
273 | % -----------------------------------------------
274 | %GETlASTCONV - find the last convolutional layer of the network
275 | %  GETlASTCONV(OPTS) - looks up the last "fully convolutional"
276 | %  layer of the network architecture. This is the last layer that can
277 | %  be computed with any input image size (fully connected layers
278 | %  typically break under varying input sizes).  In this function the
279 | %  last layer is "looked up" for common architectures as a convenience.
280 | %  However, the user may also specify the name of the layer output
281 | %  variable directly.
282 | 
283 |   last = opts.lastConvFeats ;
284 |   if ~isempty(last) ; return ; end
285 |   alexFamily = {'imagenet-matconvnet-alex', ...
286 |                 'imagenet-vgg-f', ...
287 |                 'imagenet-vgg-m', ...
288 |                 'imagenet-vgg-s', ...
289 |                 'imagenet-vgg-m-2048', ...
290 |                 'imagenet-vgg-m-1024', ...
291 |                 'imagenet-vgg-m-128', ...
292 |                 'imagenet-caffe-ref', ...
293 |                 'imagenet-vgg-verydeep-16', ...
294 |                 'imagenet-vgg-verydeep-19', ...
295 |                 'vgg-vd-16-reduced'}  ;
296 |   resnets = {'imagenet-resnet-50-dag', ...
297 |              'imagenet-resnet-101-dag', ...
298 |              'imagenet-resnet-152-dag'} ;
299 |   small_resnets = {'resnet18-pt-mcn'} ;
300 |   pt_imports = {'resnet34-pt-mcn', ...
301 |                 'resnext_50_32x4d-pt-mcn', ...
302 |                 'resnext_101_32x4d-pt-mcn', ...
303 |                 'resnext_101_64x4d-pt-mcn'} ;
304 |   fcns = {'pascal-fcn32s-dag', 'pascal-fcn16s-dag', 'pascal-fcn8s-dag'} ;
305 |   squeezenets = {'squeezenet1_0-pt-mcn', 'squeezenet1_1-pt-mcn'} ;
306 |   if ismember(modelName, alexFamily), last = 'pool5' ;
307 |   elseif ismember(modelName, resnets), last = 'res5c_relu' ;
308 |   elseif ismember(modelName, small_resnets), last = 'features_7_1_id_relu' ;
309 |   elseif ismember(modelName, pt_imports), last = 'features_7_2_id_relu' ;
310 |   elseif ismember(modelName, squeezenets), last = 'features_12_cat' ;
311 |   elseif ismember(modelName, fcns), last = 'score_fr' ;
312 |   elseif contains(modelName, 'googlenet'), last = 'icp9_out' ;
313 |   elseif contains(modelName, 'multipose'), last = 'Mconv6_stage6_L2' ;
314 |   elseif contains(modelName, 'faster-rcnn') || contains(modelName, 'rfcn')
315 |     if contains(modelName, 'vggvd'), last = 'relu5_3' ; end
316 |     if contains(modelName, 'res50'), last = 'res5c_relu' ; end
317 |     if contains(modelName, 'res101'), last = 'res5c_relu' ; end
318 |   elseif contains(modelName, 'ssd')
319 |     if contains(modelName, 'vggvd'), last = 'relu4_3' ; end
320 |     if contains(modelName, 'res50'), last = 'res5c_relu' ; end
321 |     if contains(modelName, 'res101'), last = 'res5c_relu' ; end
322 |     if contains(modelName, 'mobilenet'), last = 'conv17_2_relu' ; end
323 |   elseif contains(modelName, 'inception'), last = 'features_19' ;
324 |   elseif contains(modelName, 'SE-BN-Inception'), last = 'inception_5b_scale' ;
325 |   elseif contains(modelName, 'SE'), last = 'conv5_3' ;
326 |   elseif strcmp(modelName, 'deeplab-vggvd-v2'), last = 'fc8_interp' ;
327 |   elseif strcmp(modelName, 'deeplab-res101-v2'), last = 'fc1_interp' ;
328 |   elseif contains(modelName, 'densenet'), last = 'features_2' ;
329 |   elseif contains(modelName, 'mcn-mobilenet'), last = 'fc7' ;
330 |   else
331 |     keyboard
332 |   end
333 |   msg = ['architecture not recognised, last fully convolutional layer must' ...
334 |          ' be specified directly using the lastConvFeats option'] ;
335 |   assert(~isempty(last), msg) ;
336 | 
337 | % -----------------------------------------------------------------
338 | function [mem,flops,lastSz] = computeBurden(net, target, imsz, opts)
339 | % -----------------------------------------------------------------
340 | 
341 |   flops = 0 ; lastSz = [] ;
342 |   last = opts.modelOpts.lastConvFeats ;
343 |   params = [net.params.var] ;
344 |   inputs = cellfun(@(x) net.inputs.(x), fieldnames(net.inputs))' ;
345 |   feats = 3:2:numel(net.vars) ;
346 |   keep = arrayfun(@(x) ~ismember(x, [params inputs]), feats) ;
347 |   feats = feats(keep) ;
348 | 
349 |   switch target
350 |     case 'params'
351 |       p = params ; mem = computeMemory(net, p, opts) ; return
352 |     case {'feats', 'full'}
353 |       x = zeros([imsz 3], opts.type) ;
354 |       if numel(opts.gpus), x = gpuArray(x) ; end
355 |       inVars = opts.modelOpts.inputVars ; args = {inVars{1}, x} ;
356 |       if ismember('im_info', inVars) && strcmp(target, 'full') % handle custom inputs
357 |         args = [args {'im_info', [imsz 1]}] ;
358 |       end
359 |       net.eval(args, 'test') ; p = feats ; lastSz = size(net.getValue(last)) ;
360 |       mem = computeMemory(net, p, opts) ;  flops = computeFlops(net) ;
361 |     otherwise, error('%s not recognised') ;
362 |   end
363 | 
364 | % ---------------------------------------
365 | function mem = computeMemory(net, p, opts)
366 | % ---------------------------------------
367 |   switch opts.type
368 |     case 'int8', bytes = 1 ;
369 |     case 'uint8', bytes = 1 ;
370 |     case 'int16', bytes = 2 ;
371 |     case 'uint16', bytes = 2 ;
372 |     case 'int32', bytes = 4 ;
373 |     case 'uint32', bytes = 4 ;
374 |     case 'int64', bytes = 8 ;
375 |     case 'uint64', bytes = 8 ;
376 |     case 'single', bytes = 4 ;
377 |     case 'double', bytes = 8 ;
378 |     otherwise, error('data type %s not recognised') ;
379 |   end
380 |   mem = arrayfun(@(x) numel(net.vars{x}), p) * bytes ;
381 | 
382 | % -------------------------------------------
383 | function totals = computeFlops(net, varargin)
384 | % -------------------------------------------
385 |   opts.includeExp = 0 ;
386 |   opts = vl_argparse(opts, varargin) ;
387 | 
388 |   totals = zeros(1, numel(net.forward)) ;
389 |   for ii = 1:numel(net.forward)
390 |     layer = net.forward(ii) ;
391 |     ins = gather(net.vars(layer.inputVars)) ;
392 |     outs = gather(net.vars(layer.outputVar)) ;
393 |     funcStr = func2str(layer.func) ;
394 |     switch funcStr
395 |       case 'vl_nnconv' % count fused multiply-adds
396 |         hasBias = (numel(ins) == 3) ;
397 |         flops = numel(outs{1}) * numel(ins{2}(:,:,:,1)) ;
398 |         if hasBias, flops = flops + numel(outs{1}) ; end
399 |       case 'vl_nnconvt'
400 |         hasBias = (numel(ins) == 3) ;
401 |         flops = numel(ins{1}) * numel(ins{2}(:,:,1,:)) ;
402 |         if hasBias, flops = flops + numel(outs{1}) ; end
403 |       case 'vl_nnrelu' % count as comparison + multiply
404 |         flops = 2 * numel(outs{1}) ;
405 |       case 'vl_nnpool' % assume two flops per location
406 |         pos = find(cellfun(@(x) isequal(x, 'stride'), layer.args)) ;
407 |         stride = layer.args{pos+1} ;
408 |         flops = 2 * numel(outs{1}) * prod(stride) ;
409 |       case 'vl_nnglobalpool' % FMA
410 |         flops = numel(ins{1}) ;
411 |       case 'vl_nnbnorm_wrapper', flops = 0 ; % assume merged at test time
412 |       case 'vl_nnwsum', flops = numel(outs{1}) ; % count fused multiply-adds
413 |       case 'vl_nnreshape', flops = 0 ; % essentially free
414 |       case 'vl_nnflatten', flops = 0 ; % essentially free
415 |       case 'vl_nncrop', flops = 0 ; % index slicing
416 |       case 'permute', flops = 0 ; % expensive, but no flops
417 |       case 'cat', flops = 0 ; % can be expensive, but no flops
418 |       case 'size', flops = 0 ;
419 |       case 'max', flops = numel(ins{1}) ; % comparisons
420 |       case 'vl_nnproposalrpn', flops = 0 ; % would be too inaccurate
421 |       case 'vl_nnmultiboxdetector', flops = 0 ; % would be too inaccurate
422 |       case 'vl_nnpriorbox', flops = 0 ; % not worth computing
423 |       case 'vl_nnroipool', flops = 0 ; % would be too inaccurate
424 |       case 'vl_nnpsroipool', flops = 0 ; % would be too inaccurate
425 |       case 'vl_nnmask', flops = 0 ; % dropout would be removed during inference
426 |       case 'vl_nndropout_wrapper', flops = 0 ; % ditto
427 |       case 'vl_nninterp', flops = 4 * numel(outs{1}) ;
428 |       case 'vl_nnmax', flops = numel(outs{1}) * numel(ins) ;
429 |       case {'vl_nnscalenorm', 'vl_nnnormalize'}
430 |         outSz = size(outs{1}) ; % simplifying assumption: common norm factors
431 |         normFactors = (1 + 1 + 2 * outSz(3)) * prod(outSz(1:2)) ;
432 |         flops = numel(outs{1}) + normFactors ;
433 |       case {'vl_nnsoftmax', 'vl_nnsoftmaxt'} % counting flops for exp is tricky
434 |         if opts.includeExp
435 |           flops = (2+1+5+1+2)*numel(outs{1}) ;
436 |         else
437 |           flops = 0 ;
438 |         end
439 |       case 'vl_nnsigmoid' % counting flops for exp is tricky
440 |         if opts.includeExp, flops = 3*numel(outs{1}) ; else, flops = 0 ; end
441 |       case 'vl_nnaxpy', flops = 2*numel(outs{1}) ; % use FMA
442 |       case 'vl_nnscale', flops = numel(outs{1}) ; % use FMA
443 |       case 'root', continue
444 |       otherwise, error('layer %s not recognised', func2str(layer.func)) ;
445 |     end
446 |     totals(ii) = flops ;
447 |   end
448 | 


--------------------------------------------------------------------------------
/matlab/fcn_autonn_custom_fn.m:
--------------------------------------------------------------------------------
 1 | function obj = fcn_autonn_custom_fn(block, inputs, ~)
 2 | % FCN_AUTONN_CUSTOM_FN autonn custom layer converter
 3 | %
 4 | % Copyright (C) 2017 Samuel Albanie 
 5 | % Licensed under The MIT License [see LICENSE.md for details]
 6 | 
 7 |   switch class(block)
 8 |     case 'dagnn.Crop'
 9 |       obj = vl_nncrop_wrapper(inputs{1}, inputs{2}, block.crop) ;
10 |   end
11 | 


--------------------------------------------------------------------------------
/matlab/inception_autonn_custom_fn.m:
--------------------------------------------------------------------------------
 1 | function obj = inception_autonn_custom_fn(block, inputs, ~)
 2 | % INCEPTION_AUTONN_CUSTOM_FN autonn custom layer converter
 3 | %
 4 | % Copyright (C) 2017 Samuel Albanie 
 5 | % Licensed under The MIT License [see LICENSE.md for details]
 6 | 
 7 |   switch class(block)
 8 |     case 'dagnn.Permute'
 9 |       obj = Layer.create(@permute, {inputs{1}, block.order}) ;
10 |     case 'dagnn.Flatten'
11 |       obj = Layer.create(@vl_nnflatten, {inputs{1}, block.axis}) ;
12 |     case 'dagnn.Reshape'
13 |       obj = Layer.create(@vl_nnreshape, {inputs{1}, block.shape}) ;
14 |   end
15 | 


--------------------------------------------------------------------------------
/matlab/squeezenet_autonn_custom_fn.m:
--------------------------------------------------------------------------------
 1 | function obj = squeezenet_autonn_custom_fn(block, inputs, ~)
 2 | % SQUEEZENET_AUTONN_CUSTOM_FN autonn custom layer converter
 3 | %
 4 | % Copyright (C) 2017 Samuel Albanie 
 5 | % Licensed under The MIT License [see LICENSE.md for details]
 6 | 
 7 |   switch class(block)
 8 |     case 'dagnn.Permute'
 9 |       obj = Layer.create(@permute, {inputs{1}, block.order}) ;
10 |     case 'dagnn.Flatten'
11 |       obj = Layer.create(@vl_nnflatten, {inputs{1}, block.axis}) ;
12 |     case 'dagnn.Reshape'
13 |       obj = Layer.create(@vl_nnreshape, {inputs{1}, block.shape}) ;
14 |   end
15 | 


--------------------------------------------------------------------------------
/misc/generate_markdown.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # generate_markdown.sh generates a set of markdown tables
 3 | # for more a readable summary of convnet computational costs
 4 | #
 5 | # --------------------------------------------------------
 6 | # convnet-burden
 7 | # Licensed under The MIT License [see LICENSE.md for details]
 8 | # Copyright (C) 2017 Samuel Albanie
 9 | # --------------------------------------------------------
10 | add_links="false"
11 | 
12 | function gen_table() {
13 | # generate markdown table summaries
14 | 
15 |     res=`cat $1 | grep "MD::" | cut -f 1 -d ' ' --complement`
16 |     echo "$res"
17 | 
18 |     # clean up dataset prefixes
19 |     res=$(echo "$res" | sed "s/imagenet-//g")
20 | 
21 |     # update model names
22 |     res=$(echo "$res" | sed "s/vgg-verydeep-\([0-9+]\)/vgg-vd-\1/g")
23 |     res=$(echo "$res" | sed "s/vgg-\([a-z+]\)/vgg-\1/g")
24 |     res=$(echo "$res" | sed "s/ssd-mcn-pascal-vggvd-\([0-9+]\)/ssd-pascal-\1/g")
25 |     res=$(echo "$res" | sed "s/resnet-\([0-9+]\)/resnet-\1/g")
26 |     res="${res/matconvnet-alex/alexnet}"
27 |     res="${res/caffe-ref/caffenet}"
28 | 
29 |     # clean up suffixes and mcn notation
30 |     res=$(echo "$res" | sed "s/_/-/g")
31 |     res=$(echo "$res" | sed "s/-dag//g")
32 |     res=$(echo "$res" | sed "s/-pt-mcn//g")
33 | 
34 |     # add links to download models
35 |     if [ "$add_links" = "true" ] ; then
36 |         mcn_home="(http://www.vlfeat.org/matconvnet/pretrained/)"
37 |         imported="(http://www.robots.ox.ac.uk/~albanie/models.html)"
38 |         res=$(echo "$res" | awk '{$2="\\["$2"\\]" ; print}')
39 |         res=$(echo "$res" | sed 's/\\/ /g')
40 |         res="${res//]/]$mcn_home}"
41 |         echo "$res"
42 |     else
43 |         echo "$res"
44 |     fi
45 | }
46 | 
47 | # point this out the dir containing outputs of the compute_burdens.m script
48 | LOGDIR="${HOME}/coding/libs/mcn/contrib-matconvnet/data/burden"
49 | 
50 | declare -a tasks=("cls" "det" "seg" "key")
51 | for sfx in "${tasks[@]}"
52 | do
53 |    echo ""
54 |    echo "task: ${sfx}"
55 |    echo ""
56 |    echo "| model | input size | param mem | feat. mem | flops | src | performance |"
57 |    echo "|-------|------------|-----------|-----------|-------|-----|-------------|"
58 |    gen_table "${LOGDIR}/log-${sfx}.txt"
59 | done
60 | 


--------------------------------------------------------------------------------
/reports/SE-BN-Inception.md:
--------------------------------------------------------------------------------
 1 | ### Report for SE-BN-Inception
 2 | Model params 46 MB 
 3 | 
 4 | Estimates for a single full pass of model at input size 224 x 224: 
 5 | 
 6 | * Memory required for features: 43 MB 
 7 | * Flops: 2 GFLOPs 
 8 | 
 9 | Estimates are given below of the burden of computing the `inception_5b_scale` features in the network for different input sizes using a batch size of 128: 
10 | 
11 | | input size | feature size | feature memory | flops | 
12 | |------------|--------------|----------------|-------| 
13 | | 224 x 224 | 7 x 7 x 1024 | 5 GB | 262 GFLOPs |
14 | 
15 | A rough outline of where in the network memory is allocated to parameters and features and where the greatest computational cost lies is shown below.  The x-axis does not show labels (it becomes hard to read for networks containing hundreds of layers) - it should be interpreted as depicting increasing depth from left to right.  The goal is simply to give some idea of the overall profile of the model: 
16 | 
17 | ![SE-BN-Inception profile](figs/SE-BN-Inception.png)
18 | 


--------------------------------------------------------------------------------
/reports/SE-ResNeXt-101-32x4d.md:
--------------------------------------------------------------------------------
 1 | ### Report for SE-ResNeXt-101-32x4d
 2 | Model params 187 MB 
 3 | 
 4 | Estimates for a single full pass of model at input size 224 x 224: 
 5 | 
 6 | * Memory required for features: 197 MB 
 7 | * Flops: 8 GFLOPs 
 8 | 
 9 | Estimates are given below of the burden of computing the `conv5_3` features in the network for different input sizes using a batch size of 128: 
10 | 
11 | | input size | feature size | feature memory | flops | 
12 | |------------|--------------|----------------|-------| 
13 | | 112 x 112 | 4 x 4 x 2048 | 6 GB | 264 GFLOPs |
14 | | 224 x 224 | 7 x 7 x 2048 | 25 GB | 1 TFLOPs |
15 | | 336 x 336 | 11 x 11 x 2048 | 56 GB | 2 TFLOPs |
16 | | 448 x 448 | 14 x 14 x 2048 | 98 GB | 4 TFLOPs |
17 | | 560 x 560 | 18 x 18 x 2048 | 154 GB | 6 TFLOPs |
18 | | 672 x 672 | 21 x 21 x 2048 | 221 GB | 9 TFLOPs |
19 | 
20 | A rough outline of where in the network memory is allocated to parameters and features and where the greatest computational cost lies is shown below.  The x-axis does not show labels (it becomes hard to read for networks containing hundreds of layers) - it should be interpreted as depicting increasing depth from left to right.  The goal is simply to give some idea of the overall profile of the model: 
21 | 
22 | ![SE-ResNeXt-101-32x4d profile](figs/SE-ResNeXt-101-32x4d.png)
23 | 


--------------------------------------------------------------------------------
/reports/SE-ResNeXt-50-32x4d.md:
--------------------------------------------------------------------------------
 1 | ### Report for SE-ResNeXt-50-32x4d
 2 | Model params 105 MB 
 3 | 
 4 | Estimates for a single full pass of model at input size 224 x 224: 
 5 | 
 6 | * Memory required for features: 132 MB 
 7 | * Flops: 4 GFLOPs 
 8 | 
 9 | Estimates are given below of the burden of computing the `conv5_3` features in the network for different input sizes using a batch size of 128: 
10 | 
11 | | input size | feature size | feature memory | flops | 
12 | |------------|--------------|----------------|-------| 
13 | | 112 x 112 | 4 x 4 x 2048 | 4 GB | 144 GFLOPs |
14 | | 224 x 224 | 7 x 7 x 2048 | 16 GB | 547 GFLOPs |
15 | | 336 x 336 | 11 x 11 x 2048 | 37 GB | 1 TFLOPs |
16 | | 448 x 448 | 14 x 14 x 2048 | 66 GB | 2 TFLOPs |
17 | | 560 x 560 | 18 x 18 x 2048 | 103 GB | 3 TFLOPs |
18 | | 672 x 672 | 21 x 21 x 2048 | 148 GB | 5 TFLOPs |
19 | 
20 | A rough outline of where in the network memory is allocated to parameters and features and where the greatest computational cost lies is shown below.  The x-axis does not show labels (it becomes hard to read for networks containing hundreds of layers) - it should be interpreted as depicting increasing depth from left to right.  The goal is simply to give some idea of the overall profile of the model: 
21 | 
22 | ![SE-ResNeXt-50-32x4d profile](figs/SE-ResNeXt-50-32x4d.png)
23 | 


--------------------------------------------------------------------------------
/reports/SE-ResNet-101.md:
--------------------------------------------------------------------------------
 1 | ### Report for SE-ResNet-101
 2 | Model params 189 MB 
 3 | 
 4 | Estimates for a single full pass of model at input size 224 x 224: 
 5 | 
 6 | * Memory required for features: 155 MB 
 7 | * Flops: 8 GFLOPs 
 8 | 
 9 | Estimates are given below of the burden of computing the `conv5_3` features in the network for different input sizes using a batch size of 128: 
10 | 
11 | | input size | feature size | feature memory | flops | 
12 | |------------|--------------|----------------|-------| 
13 | | 112 x 112 | 4 x 4 x 2048 | 5 GB | 252 GFLOPs |
14 | | 224 x 224 | 7 x 7 x 2048 | 19 GB | 977 GFLOPs |
15 | | 336 x 336 | 11 x 11 x 2048 | 44 GB | 2 TFLOPs |
16 | | 448 x 448 | 14 x 14 x 2048 | 77 GB | 4 TFLOPs |
17 | | 560 x 560 | 18 x 18 x 2048 | 121 GB | 6 TFLOPs |
18 | | 672 x 672 | 21 x 21 x 2048 | 174 GB | 9 TFLOPs |
19 | 
20 | A rough outline of where in the network memory is allocated to parameters and features and where the greatest computational cost lies is shown below.  The x-axis does not show labels (it becomes hard to read for networks containing hundreds of layers) - it should be interpreted as depicting increasing depth from left to right.  The goal is simply to give some idea of the overall profile of the model: 
21 | 
22 | ![SE-ResNet-101 profile](figs/SE-ResNet-101.png)
23 | 


--------------------------------------------------------------------------------
/reports/SE-ResNet-152.md:
--------------------------------------------------------------------------------
 1 | ### Report for SE-ResNet-152
 2 | Model params 255 MB 
 3 | 
 4 | Estimates for a single full pass of model at input size 224 x 224: 
 5 | 
 6 | * Memory required for features: 220 MB 
 7 | * Flops: 11 GFLOPs 
 8 | 
 9 | Estimates are given below of the burden of computing the `conv5_3` features in the network for different input sizes using a batch size of 128: 
10 | 
11 | | input size | feature size | feature memory | flops | 
12 | |------------|--------------|----------------|-------| 
13 | | 112 x 112 | 4 x 4 x 2048 | 7 GB | 372 GFLOPs |
14 | | 224 x 224 | 7 x 7 x 2048 | 27 GB | 1 TFLOPs |
15 | | 336 x 336 | 11 x 11 x 2048 | 62 GB | 3 TFLOPs |
16 | | 448 x 448 | 14 x 14 x 2048 | 110 GB | 6 TFLOPs |
17 | | 560 x 560 | 18 x 18 x 2048 | 171 GB | 9 TFLOPs |
18 | | 672 x 672 | 21 x 21 x 2048 | 246 GB | 13 TFLOPs |
19 | 
20 | A rough outline of where in the network memory is allocated to parameters and features and where the greatest computational cost lies is shown below.  The x-axis does not show labels (it becomes hard to read for networks containing hundreds of layers) - it should be interpreted as depicting increasing depth from left to right.  The goal is simply to give some idea of the overall profile of the model: 
21 | 
22 | ![SE-ResNet-152 profile](figs/SE-ResNet-152.png)
23 | 


--------------------------------------------------------------------------------
/reports/SE-ResNet-50.md:
--------------------------------------------------------------------------------
 1 | ### Report for SE-ResNet-50
 2 | Model params 107 MB 
 3 | 
 4 | Estimates for a single full pass of model at input size 224 x 224: 
 5 | 
 6 | * Memory required for features: 103 MB 
 7 | * Flops: 4 GFLOPs 
 8 | 
 9 | Estimates are given below of the burden of computing the `conv5_3` features in the network for different input sizes using a batch size of 128: 
10 | 
11 | | input size | feature size | feature memory | flops | 
12 | |------------|--------------|----------------|-------| 
13 | | 112 x 112 | 4 x 4 x 2048 | 3 GB | 132 GFLOPs |
14 | | 224 x 224 | 7 x 7 x 2048 | 13 GB | 499 GFLOPs |
15 | | 336 x 336 | 11 x 11 x 2048 | 29 GB | 1 TFLOPs |
16 | | 448 x 448 | 14 x 14 x 2048 | 51 GB | 2 TFLOPs |
17 | | 560 x 560 | 18 x 18 x 2048 | 80 GB | 3 TFLOPs |
18 | | 672 x 672 | 21 x 21 x 2048 | 115 GB | 4 TFLOPs |
19 | 
20 | A rough outline of where in the network memory is allocated to parameters and features and where the greatest computational cost lies is shown below.  The x-axis does not show labels (it becomes hard to read for networks containing hundreds of layers) - it should be interpreted as depicting increasing depth from left to right.  The goal is simply to give some idea of the overall profile of the model: 
21 | 
22 | ![SE-ResNet-50 profile](figs/SE-ResNet-50.png)
23 | 


--------------------------------------------------------------------------------
/reports/SENet.md:
--------------------------------------------------------------------------------
 1 | ### Report for SENet
 2 | Model params 440 MB 
 3 | 
 4 | Estimates for a single full pass of model at input size 224 x 224: 
 5 | 
 6 | * Memory required for features: 347 MB 
 7 | * Flops: 21 GFLOPs 
 8 | 
 9 | Estimates are given below of the burden of computing the `conv5_3` features in the network for different input sizes using a batch size of 128: 
10 | 
11 | | input size | feature size | feature memory | flops | 
12 | |------------|--------------|----------------|-------| 
13 | | 112 x 112 | 4 x 4 x 2048 | 11 GB | 684 GFLOPs |
14 | | 224 x 224 | 7 x 7 x 2048 | 43 GB | 3 TFLOPs |
15 | | 336 x 336 | 11 x 11 x 2048 | 98 GB | 6 TFLOPs |
16 | | 448 x 448 | 14 x 14 x 2048 | 173 GB | 11 TFLOPs |
17 | | 560 x 560 | 18 x 18 x 2048 | 271 GB | 17 TFLOPs |
18 | | 672 x 672 | 21 x 21 x 2048 | 390 GB | 24 TFLOPs |
19 | 
20 | A rough outline of where in the network memory is allocated to parameters and features and where the greatest computational cost lies is shown below.  The x-axis does not show labels (it becomes hard to read for networks containing hundreds of layers) - it should be interpreted as depicting increasing depth from left to right.  The goal is simply to give some idea of the overall profile of the model: 
21 | 
22 | ![SENet profile](figs/SENet.png)
23 | 


--------------------------------------------------------------------------------
/reports/alexnet.md:
--------------------------------------------------------------------------------
 1 | ### Report for alexnet
 2 | Model params 233 MB 
 3 | 
 4 | Estimates for a single full pass of model at input size 227 x 227: 
 5 | 
 6 | * Memory required for features: 3 MB 
 7 | * Flops: 727 MFLOPs 
 8 | 
 9 | Estimates are given below of the burden of computing the `pool5` features in the network for different input sizes using a batch size of 128: 
10 | 
11 | | input size | feature size | feature memory | flops | 
12 | |------------|--------------|----------------|-------| 
13 | | 114 x 114 | 2 x 2 x 256 | 73 MB | 15 GFLOPs |
14 | | 227 x 227 | 6 x 6 x 256 | 377 MB | 86 GFLOPs |
15 | | 341 x 341 | 9 x 9 x 256 | 872 MB | 200 GFLOPs |
16 | | 454 x 454 | 13 x 13 x 256 | 2 GB | 361 GFLOPs |
17 | | 568 x 568 | 16 x 16 x 256 | 2 GB | 572 GFLOPs |
18 | | 681 x 681 | 20 x 20 x 256 | 4 GB | 829 GFLOPs |
19 | 
20 | A rough outline of where in the network memory is allocated to parameters and features and where the greatest computational cost lies is shown below.  The x-axis does not show labels (it becomes hard to read for networks containing hundreds of layers) - it should be interpreted as depicting increasing depth from left to right.  The goal is simply to give some idea of the overall profile of the model: 
21 | 
22 | ![alexnet profile](figs/alexnet.png)
23 | 


--------------------------------------------------------------------------------
/reports/caffe-ref.md:
--------------------------------------------------------------------------------
 1 | ### Report for caffe-ref
 2 | Model params 233 MB 
 3 | Estimates for a single full pass of model at input size 224 x 224: 
 4 | 
 5 | * Memory required for features: 3 MB 
 6 | * Flops: 724 MFLOPS 
 7 | 
 8 | Estimates are given below of the burden of computing the `pool5` features in the network for different input sizes: 
 9 | 
10 | | input size | feature size | feature memory | flops | 
11 |  | 112 x 112 | 3 x 3 x 256 | 97 MB | 19 GFLOPS |
12 |  | 224 x 224 | 6 x 6 x 256 | 427 MB | 85 GFLOPS |
13 |  | 336 x 336 | 10 x 10 x 256 | 995 MB | 199 GFLOPS |
14 |  | 448 x 448 | 13 x 13 x 256 | 2 GB | 360 GFLOPS |
15 |  | 560 x 560 | 17 x 17 x 256 | 3 GB | 569 GFLOPS |
16 |  | 672 x 672 | 20 x 20 x 256 | 4 GB | 826 GFLOPS |
17 | 
18 | A rough outline of where in the network memory is allocated to parameters and features and where the greatest computational cost lies is shown below.  The x-axis does not show labels (it becomes hard to read with the networks containing hundreds of layers) - it should be interpreted as depicting increasing depth from left to right.  The goal is to give some idea of the overall profile of the model: 
19 | ![caffe-ref profile](figs/caffe-ref.png)
20 | 


--------------------------------------------------------------------------------
/reports/caffenet.md:
--------------------------------------------------------------------------------
 1 | ### Report for caffenet
 2 | Model params 233 MB 
 3 | 
 4 | Estimates for a single full pass of model at input size 224 x 224: 
 5 | 
 6 | * Memory required for features: 3 MB 
 7 | * Flops: 724 MFLOPs 
 8 | 
 9 | Estimates are given below of the burden of computing the `pool5` features in the network for different input sizes using a batch size of 128: 
10 | 
11 | | input size | feature size | feature memory | flops | 
12 | |------------|--------------|----------------|-------| 
13 | | 112 x 112 | 3 x 3 x 256 | 97 MB | 19 GFLOPs |
14 | | 224 x 224 | 6 x 6 x 256 | 427 MB | 85 GFLOPs |
15 | | 336 x 336 | 10 x 10 x 256 | 995 MB | 199 GFLOPs |
16 | | 448 x 448 | 13 x 13 x 256 | 2 GB | 360 GFLOPs |
17 | | 560 x 560 | 17 x 17 x 256 | 3 GB | 569 GFLOPs |
18 | | 672 x 672 | 20 x 20 x 256 | 4 GB | 826 GFLOPs |
19 | 
20 | A rough outline of where in the network memory is allocated to parameters and features and where the greatest computational cost lies is shown below.  The x-axis does not show labels (it becomes hard to read for networks containing hundreds of layers) - it should be interpreted as depicting increasing depth from left to right.  The goal is simply to give some idea of the overall profile of the model: 
21 | 
22 | ![caffenet profile](figs/caffenet.png)
23 | 


--------------------------------------------------------------------------------
/reports/deeplab-res101-v2.md:
--------------------------------------------------------------------------------
 1 | ### Report for deeplab-res101-v2
 2 | Model params 505 MB 
 3 | 
 4 | Estimates for a single full pass of model at input size 513 x 513: 
 5 | 
 6 | * Memory required for features: 4 GB 
 7 | * Flops: 346 GFLOPs 
 8 | 
 9 | Estimates are given below of the burden of computing the `fc1_interp` features in the network for different input sizes using a batch size of 128: 
10 | 
11 | | input size | feature size | feature memory | flops | 
12 | |------------|--------------|----------------|-------| 
13 | | 257 x 257 | 257 x 257 x 21 | 144 GB | 11 TFLOPs |
14 | | 513 x 513 | 513 x 513 x 21 | 557 GB | 44 TFLOPs |
15 | | 770 x 770 | 769 x 769 x 21 | 1 TB | 98 TFLOPs |
16 | | 1026 x 1026 | 1025 x 1025 x 21 | 2 TB | 174 TFLOPs |
17 | | 1283 x 1283 | 1281 x 1281 x 21 | 3 TB | 271 TFLOPs |
18 | | 1539 x 1539 | 1537 x 1537 x 21 | 5 TB | 389 TFLOPs |
19 | 
20 | A rough outline of where in the network memory is allocated to parameters and features and where the greatest computational cost lies is shown below.  The x-axis does not show labels (it becomes hard to read for networks containing hundreds of layers) - it should be interpreted as depicting increasing depth from left to right.  The goal is simply to give some idea of the overall profile of the model: 
21 | 
22 | ![deeplab-res101-v2 profile](figs/deeplab-res101-v2.png)
23 | 


--------------------------------------------------------------------------------
/reports/deeplab-vggvd-v2.md:
--------------------------------------------------------------------------------
 1 | ### Report for deeplab-vggvd-v2
 2 | Model params 144 MB 
 3 | 
 4 | Estimates for a single full pass of model at input size 513 x 513: 
 5 | 
 6 | * Memory required for features: 755 MB 
 7 | * Flops: 202 GFLOPs 
 8 | 
 9 | Estimates are given below of the burden of computing the `fc8_interp` features in the network for different input sizes using a batch size of 128: 
10 | 
11 | | input size | feature size | feature memory | flops | 
12 | |------------|--------------|----------------|-------| 
13 | | 257 x 257 | 257 x 257 x 21 | 24 GB | 7 TFLOPs |
14 | | 513 x 513 | 513 x 513 x 21 | 94 GB | 26 TFLOPs |
15 | | 770 x 770 | 777 x 777 x 21 | 214 GB | 59 TFLOPs |
16 | | 1026 x 1026 | 1033 x 1033 x 21 | 378 GB | 104 TFLOPs |
17 | | 1283 x 1283 | 1289 x 1289 x 21 | 588 GB | 161 TFLOPs |
18 | | 1539 x 1539 | 1545 x 1545 x 21 | 844 GB | 231 TFLOPs |
19 | 
20 | A rough outline of where in the network memory is allocated to parameters and features and where the greatest computational cost lies is shown below.  The x-axis does not show labels (it becomes hard to read for networks containing hundreds of layers) - it should be interpreted as depicting increasing depth from left to right.  The goal is simply to give some idea of the overall profile of the model: 
21 | 
22 | ![deeplab-vggvd-v2 profile](figs/deeplab-vggvd-v2.png)
23 | 


--------------------------------------------------------------------------------
/reports/densenet121.md:
--------------------------------------------------------------------------------
 1 | ### Report for densenet121
 2 | Model params 31 MB 
 3 | 
 4 | Estimates for a single full pass of model at input size 224 x 224: 
 5 | 
 6 | * Memory required for features: 126 MB 
 7 | * Flops: 3 GFLOPs 
 8 | 
 9 | Estimates are given below of the burden of computing the `features_2` features in the network for different input sizes using a batch size of 128: 
10 | 
11 | | input size | feature size | feature memory | flops | 
12 | |------------|--------------|----------------|-------| 
13 | | 224 x 224 | 1 x 1 x 1024 | 16 GB | 367 GFLOPs |
14 | | 336 x 336 | 1 x 1 x 1024 | 35 GB | 823 GFLOPs |
15 | | 448 x 448 | 2 x 2 x 1024 | 63 GB | 1 TFLOPs |
16 | | 560 x 560 | 2 x 2 x 1024 | 98 GB | 2 TFLOPs |
17 | | 672 x 672 | 3 x 3 x 1024 | 142 GB | 3 TFLOPs |
18 | 
19 | A rough outline of where in the network memory is allocated to parameters and features and where the greatest computational cost lies is shown below.  The x-axis does not show labels (it becomes hard to read for networks containing hundreds of layers) - it should be interpreted as depicting increasing depth from left to right.  The goal is simply to give some idea of the overall profile of the model: 
20 | 
21 | ![densenet121 profile](figs/densenet121.png)
22 | 


--------------------------------------------------------------------------------
/reports/densenet161.md:
--------------------------------------------------------------------------------
 1 | ### Report for densenet161
 2 | Model params 110 MB 
 3 | 
 4 | Estimates for a single full pass of model at input size 224 x 224: 
 5 | 
 6 | * Memory required for features: 235 MB 
 7 | * Flops: 8 GFLOPs 
 8 | 
 9 | Estimates are given below of the burden of computing the `features_2` features in the network for different input sizes using a batch size of 128: 
10 | 
11 | | input size | feature size | feature memory | flops | 
12 | |------------|--------------|----------------|-------| 
13 | | 224 x 224 | 1 x 1 x 2208 | 29 GB | 997 GFLOPs |
14 | | 336 x 336 | 1 x 1 x 2208 | 66 GB | 2 TFLOPs |
15 | | 448 x 448 | 2 x 2 x 2208 | 118 GB | 4 TFLOPs |
16 | | 560 x 560 | 2 x 2 x 2208 | 183 GB | 6 TFLOPs |
17 | | 672 x 672 | 3 x 3 x 2208 | 265 GB | 9 TFLOPs |
18 | 
19 | A rough outline of where in the network memory is allocated to parameters and features and where the greatest computational cost lies is shown below.  The x-axis does not show labels (it becomes hard to read for networks containing hundreds of layers) - it should be interpreted as depicting increasing depth from left to right.  The goal is simply to give some idea of the overall profile of the model: 
20 | 
21 | ![densenet161 profile](figs/densenet161.png)
22 | 


--------------------------------------------------------------------------------
/reports/densenet169.md:
--------------------------------------------------------------------------------
 1 | ### Report for densenet169
 2 | Model params 55 MB 
 3 | 
 4 | Estimates for a single full pass of model at input size 224 x 224: 
 5 | 
 6 | * Memory required for features: 152 MB 
 7 | * Flops: 3 GFLOPs 
 8 | 
 9 | Estimates are given below of the burden of computing the `features_2` features in the network for different input sizes using a batch size of 128: 
10 | 
11 | | input size | feature size | feature memory | flops | 
12 | |------------|--------------|----------------|-------| 
13 | | 224 x 224 | 1 x 1 x 1664 | 19 GB | 435 GFLOPs |
14 | | 336 x 336 | 1 x 1 x 1664 | 42 GB | 971 GFLOPs |
15 | | 448 x 448 | 2 x 2 x 1664 | 76 GB | 2 TFLOPs |
16 | | 560 x 560 | 2 x 2 x 1664 | 118 GB | 3 TFLOPs |
17 | | 672 x 672 | 3 x 3 x 1664 | 171 GB | 4 TFLOPs |
18 | 
19 | A rough outline of where in the network memory is allocated to parameters and features and where the greatest computational cost lies is shown below.  The x-axis does not show labels (it becomes hard to read for networks containing hundreds of layers) - it should be interpreted as depicting increasing depth from left to right.  The goal is simply to give some idea of the overall profile of the model: 
20 | 
21 | ![densenet169 profile](figs/densenet169.png)
22 | 


--------------------------------------------------------------------------------
/reports/densenet201.md:
--------------------------------------------------------------------------------
 1 | ### Report for densenet201
 2 | Model params 77 MB 
 3 | 
 4 | Estimates for a single full pass of model at input size 224 x 224: 
 5 | 
 6 | * Memory required for features: 196 MB 
 7 | * Flops: 4 GFLOPs 
 8 | 
 9 | Estimates are given below of the burden of computing the `features_2` features in the network for different input sizes using a batch size of 128: 
10 | 
11 | | input size | feature size | feature memory | flops | 
12 | |------------|--------------|----------------|-------| 
13 | | 224 x 224 | 1 x 1 x 1920 | 25 GB | 556 GFLOPs |
14 | | 336 x 336 | 1 x 1 x 1920 | 55 GB | 1 TFLOPs |
15 | | 448 x 448 | 2 x 2 x 1920 | 98 GB | 2 TFLOPs |
16 | | 560 x 560 | 2 x 2 x 1920 | 152 GB | 3 TFLOPs |
17 | | 672 x 672 | 3 x 3 x 1920 | 221 GB | 5 TFLOPs |
18 | 
19 | A rough outline of where in the network memory is allocated to parameters and features and where the greatest computational cost lies is shown below.  The x-axis does not show labels (it becomes hard to read for networks containing hundreds of layers) - it should be interpreted as depicting increasing depth from left to right.  The goal is simply to give some idea of the overall profile of the model: 
20 | 
21 | ![densenet201 profile](figs/densenet201.png)
22 | 


--------------------------------------------------------------------------------
/reports/faster-rcnn-vggvd-pascal.md:
--------------------------------------------------------------------------------
 1 | ### Report for faster-rcnn-vggvd-pascal
 2 | Model params 523 MB 
 3 | 
 4 | Estimates for a single full pass of model at input size 600 x 850: 
 5 | 
 6 | * Memory required for features: 600 MB 
 7 | * Flops: 172 GFLOPs 
 8 | 
 9 | Estimates are given below of the burden of computing the `relu5_3` features in the network for different input sizes using a batch size of 128: 
10 | 
11 | | input size | feature size | feature memory | flops | 
12 | |------------|--------------|----------------|-------| 
13 | | 300 x 425 | 19 x 27 x 512 | 18 GB | 5 TFLOPs |
14 | | 600 x 850 | 38 x 54 x 512 | 73 GB | 20 TFLOPs |
15 | | 900 x 1275 | 57 x 80 x 512 | 164 GB | 45 TFLOPs |
16 | | 1200 x 1700 | 75 x 107 x 512 | 292 GB | 80 TFLOPs |
17 | | 1500 x 2125 | 94 x 133 x 512 | 456 GB | 125 TFLOPs |
18 | | 1800 x 2550 | 113 x 160 x 512 | 657 GB | 181 TFLOPs |
19 | 
20 | A rough outline of where in the network memory is allocated to parameters and features and where the greatest computational cost lies is shown below.  The x-axis does not show labels (it becomes hard to read for networks containing hundreds of layers) - it should be interpreted as depicting increasing depth from left to right.  The goal is simply to give some idea of the overall profile of the model: 
21 | 
22 | ![faster-rcnn-vggvd-pascal profile](figs/faster-rcnn-vggvd-pascal.png)
23 | 


--------------------------------------------------------------------------------
/reports/figs/SE-BN-Inception.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/albanie/convnet-burden/9c7f31ba3bc108cb5b5c30a8fcd12afce07c207e/reports/figs/SE-BN-Inception.png


--------------------------------------------------------------------------------
/reports/figs/SE-ResNeXt-101-32x4d.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/albanie/convnet-burden/9c7f31ba3bc108cb5b5c30a8fcd12afce07c207e/reports/figs/SE-ResNeXt-101-32x4d.png


--------------------------------------------------------------------------------
/reports/figs/SE-ResNeXt-50-32x4d.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/albanie/convnet-burden/9c7f31ba3bc108cb5b5c30a8fcd12afce07c207e/reports/figs/SE-ResNeXt-50-32x4d.png


--------------------------------------------------------------------------------
/reports/figs/SE-ResNet-101.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/albanie/convnet-burden/9c7f31ba3bc108cb5b5c30a8fcd12afce07c207e/reports/figs/SE-ResNet-101.png


--------------------------------------------------------------------------------
/reports/figs/SE-ResNet-152.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/albanie/convnet-burden/9c7f31ba3bc108cb5b5c30a8fcd12afce07c207e/reports/figs/SE-ResNet-152.png


--------------------------------------------------------------------------------
/reports/figs/SE-ResNet-50.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/albanie/convnet-burden/9c7f31ba3bc108cb5b5c30a8fcd12afce07c207e/reports/figs/SE-ResNet-50.png


--------------------------------------------------------------------------------
/reports/figs/SENet.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/albanie/convnet-burden/9c7f31ba3bc108cb5b5c30a8fcd12afce07c207e/reports/figs/SENet.png


--------------------------------------------------------------------------------
/reports/figs/alexnet.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/albanie/convnet-burden/9c7f31ba3bc108cb5b5c30a8fcd12afce07c207e/reports/figs/alexnet.png


--------------------------------------------------------------------------------
/reports/figs/caffe-ref.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/albanie/convnet-burden/9c7f31ba3bc108cb5b5c30a8fcd12afce07c207e/reports/figs/caffe-ref.png


--------------------------------------------------------------------------------
/reports/figs/caffenet.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/albanie/convnet-burden/9c7f31ba3bc108cb5b5c30a8fcd12afce07c207e/reports/figs/caffenet.png


--------------------------------------------------------------------------------
/reports/figs/deeplab-res101-v2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/albanie/convnet-burden/9c7f31ba3bc108cb5b5c30a8fcd12afce07c207e/reports/figs/deeplab-res101-v2.png


--------------------------------------------------------------------------------
/reports/figs/deeplab-vggvd-v2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/albanie/convnet-burden/9c7f31ba3bc108cb5b5c30a8fcd12afce07c207e/reports/figs/deeplab-vggvd-v2.png


--------------------------------------------------------------------------------
/reports/figs/densenet121.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/albanie/convnet-burden/9c7f31ba3bc108cb5b5c30a8fcd12afce07c207e/reports/figs/densenet121.png


--------------------------------------------------------------------------------
/reports/figs/densenet161.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/albanie/convnet-burden/9c7f31ba3bc108cb5b5c30a8fcd12afce07c207e/reports/figs/densenet161.png


--------------------------------------------------------------------------------
/reports/figs/densenet169.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/albanie/convnet-burden/9c7f31ba3bc108cb5b5c30a8fcd12afce07c207e/reports/figs/densenet169.png


--------------------------------------------------------------------------------
/reports/figs/densenet201.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/albanie/convnet-burden/9c7f31ba3bc108cb5b5c30a8fcd12afce07c207e/reports/figs/densenet201.png


--------------------------------------------------------------------------------
/reports/figs/faster-rcnn-vggvd-pascal.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/albanie/convnet-burden/9c7f31ba3bc108cb5b5c30a8fcd12afce07c207e/reports/figs/faster-rcnn-vggvd-pascal.png


--------------------------------------------------------------------------------
/reports/figs/googlenet.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/albanie/convnet-burden/9c7f31ba3bc108cb5b5c30a8fcd12afce07c207e/reports/figs/googlenet.png


--------------------------------------------------------------------------------
/reports/figs/inception-v3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/albanie/convnet-burden/9c7f31ba3bc108cb5b5c30a8fcd12afce07c207e/reports/figs/inception-v3.png


--------------------------------------------------------------------------------
/reports/figs/matconvnet-alex.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/albanie/convnet-burden/9c7f31ba3bc108cb5b5c30a8fcd12afce07c207e/reports/figs/matconvnet-alex.png


--------------------------------------------------------------------------------
/reports/figs/mcn-mobilenet-v2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/albanie/convnet-burden/9c7f31ba3bc108cb5b5c30a8fcd12afce07c207e/reports/figs/mcn-mobilenet-v2.png


--------------------------------------------------------------------------------
/reports/figs/mcn-mobilenet.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/albanie/convnet-burden/9c7f31ba3bc108cb5b5c30a8fcd12afce07c207e/reports/figs/mcn-mobilenet.png


--------------------------------------------------------------------------------
/reports/figs/multipose-coco.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/albanie/convnet-burden/9c7f31ba3bc108cb5b5c30a8fcd12afce07c207e/reports/figs/multipose-coco.png


--------------------------------------------------------------------------------
/reports/figs/multipose-mpi.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/albanie/convnet-burden/9c7f31ba3bc108cb5b5c30a8fcd12afce07c207e/reports/figs/multipose-mpi.png


--------------------------------------------------------------------------------
/reports/figs/pascal-fcn16s.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/albanie/convnet-burden/9c7f31ba3bc108cb5b5c30a8fcd12afce07c207e/reports/figs/pascal-fcn16s.png


--------------------------------------------------------------------------------
/reports/figs/pascal-fcn32s.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/albanie/convnet-burden/9c7f31ba3bc108cb5b5c30a8fcd12afce07c207e/reports/figs/pascal-fcn32s.png


--------------------------------------------------------------------------------
/reports/figs/pascal-fcn8s.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/albanie/convnet-burden/9c7f31ba3bc108cb5b5c30a8fcd12afce07c207e/reports/figs/pascal-fcn8s.png


--------------------------------------------------------------------------------
/reports/figs/resnet-101.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/albanie/convnet-burden/9c7f31ba3bc108cb5b5c30a8fcd12afce07c207e/reports/figs/resnet-101.png


--------------------------------------------------------------------------------
/reports/figs/resnet-152.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/albanie/convnet-burden/9c7f31ba3bc108cb5b5c30a8fcd12afce07c207e/reports/figs/resnet-152.png


--------------------------------------------------------------------------------
/reports/figs/resnet-50.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/albanie/convnet-burden/9c7f31ba3bc108cb5b5c30a8fcd12afce07c207e/reports/figs/resnet-50.png


--------------------------------------------------------------------------------
/reports/figs/resnet18.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/albanie/convnet-burden/9c7f31ba3bc108cb5b5c30a8fcd12afce07c207e/reports/figs/resnet18.png


--------------------------------------------------------------------------------
/reports/figs/resnet34.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/albanie/convnet-burden/9c7f31ba3bc108cb5b5c30a8fcd12afce07c207e/reports/figs/resnet34.png


--------------------------------------------------------------------------------
/reports/figs/resnet50.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/albanie/convnet-burden/9c7f31ba3bc108cb5b5c30a8fcd12afce07c207e/reports/figs/resnet50.png


--------------------------------------------------------------------------------
/reports/figs/resnext-101-32x4d.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/albanie/convnet-burden/9c7f31ba3bc108cb5b5c30a8fcd12afce07c207e/reports/figs/resnext-101-32x4d.png


--------------------------------------------------------------------------------
/reports/figs/resnext-101-64x4d.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/albanie/convnet-burden/9c7f31ba3bc108cb5b5c30a8fcd12afce07c207e/reports/figs/resnext-101-64x4d.png


--------------------------------------------------------------------------------
/reports/figs/resnext-50-32x4d.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/albanie/convnet-burden/9c7f31ba3bc108cb5b5c30a8fcd12afce07c207e/reports/figs/resnext-50-32x4d.png


--------------------------------------------------------------------------------
/reports/figs/rfcn-res101-pascal.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/albanie/convnet-burden/9c7f31ba3bc108cb5b5c30a8fcd12afce07c207e/reports/figs/rfcn-res101-pascal.png


--------------------------------------------------------------------------------
/reports/figs/rfcn-res50-pascal.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/albanie/convnet-burden/9c7f31ba3bc108cb5b5c30a8fcd12afce07c207e/reports/figs/rfcn-res50-pascal.png


--------------------------------------------------------------------------------
/reports/figs/squeezenet1-0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/albanie/convnet-burden/9c7f31ba3bc108cb5b5c30a8fcd12afce07c207e/reports/figs/squeezenet1-0.png


--------------------------------------------------------------------------------
/reports/figs/squeezenet1-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/albanie/convnet-burden/9c7f31ba3bc108cb5b5c30a8fcd12afce07c207e/reports/figs/squeezenet1-1.png


--------------------------------------------------------------------------------
/reports/figs/ssd-mcn-pascal-vggvd-300.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/albanie/convnet-burden/9c7f31ba3bc108cb5b5c30a8fcd12afce07c207e/reports/figs/ssd-mcn-pascal-vggvd-300.png


--------------------------------------------------------------------------------
/reports/figs/ssd-mcn-pascal-vggvd-512.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/albanie/convnet-burden/9c7f31ba3bc108cb5b5c30a8fcd12afce07c207e/reports/figs/ssd-mcn-pascal-vggvd-512.png


--------------------------------------------------------------------------------
/reports/figs/ssd-pascal-mobilenet-ft.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/albanie/convnet-burden/9c7f31ba3bc108cb5b5c30a8fcd12afce07c207e/reports/figs/ssd-pascal-mobilenet-ft.png


--------------------------------------------------------------------------------
/reports/figs/ssd-pascal-vggvd-300.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/albanie/convnet-burden/9c7f31ba3bc108cb5b5c30a8fcd12afce07c207e/reports/figs/ssd-pascal-vggvd-300.png


--------------------------------------------------------------------------------
/reports/figs/ssd-pascal-vggvd-512.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/albanie/convnet-burden/9c7f31ba3bc108cb5b5c30a8fcd12afce07c207e/reports/figs/ssd-pascal-vggvd-512.png


--------------------------------------------------------------------------------
/reports/figs/vgg-f.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/albanie/convnet-burden/9c7f31ba3bc108cb5b5c30a8fcd12afce07c207e/reports/figs/vgg-f.png


--------------------------------------------------------------------------------
/reports/figs/vgg-m-1024.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/albanie/convnet-burden/9c7f31ba3bc108cb5b5c30a8fcd12afce07c207e/reports/figs/vgg-m-1024.png


--------------------------------------------------------------------------------
/reports/figs/vgg-m-128.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/albanie/convnet-burden/9c7f31ba3bc108cb5b5c30a8fcd12afce07c207e/reports/figs/vgg-m-128.png


--------------------------------------------------------------------------------
/reports/figs/vgg-m-2048.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/albanie/convnet-burden/9c7f31ba3bc108cb5b5c30a8fcd12afce07c207e/reports/figs/vgg-m-2048.png


--------------------------------------------------------------------------------
/reports/figs/vgg-m.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/albanie/convnet-burden/9c7f31ba3bc108cb5b5c30a8fcd12afce07c207e/reports/figs/vgg-m.png


--------------------------------------------------------------------------------
/reports/figs/vgg-s.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/albanie/convnet-burden/9c7f31ba3bc108cb5b5c30a8fcd12afce07c207e/reports/figs/vgg-s.png


--------------------------------------------------------------------------------
/reports/figs/vgg-vd-16-atrous.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/albanie/convnet-burden/9c7f31ba3bc108cb5b5c30a8fcd12afce07c207e/reports/figs/vgg-vd-16-atrous.png


--------------------------------------------------------------------------------
/reports/figs/vgg-vd-16.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/albanie/convnet-burden/9c7f31ba3bc108cb5b5c30a8fcd12afce07c207e/reports/figs/vgg-vd-16.png


--------------------------------------------------------------------------------
/reports/figs/vgg-vd-19.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/albanie/convnet-burden/9c7f31ba3bc108cb5b5c30a8fcd12afce07c207e/reports/figs/vgg-vd-19.png


--------------------------------------------------------------------------------
/reports/googlenet.md:
--------------------------------------------------------------------------------
 1 | ### Report for googlenet
 2 | Model params 51 MB 
 3 | 
 4 | Estimates for a single full pass of model at input size 224 x 224: 
 5 | 
 6 | * Memory required for features: 26 MB 
 7 | * Flops: 2 GFLOPs 
 8 | 
 9 | Estimates are given below of the burden of computing the `icp9_out` features in the network for different input sizes using a batch size of 128: 
10 | 
11 | | input size | feature size | feature memory | flops | 
12 | |------------|--------------|----------------|-------| 
13 | | 112 x 112 | 3 x 3 x 1024 | 805 MB | 50 GFLOPs |
14 | | 224 x 224 | 7 x 7 x 1024 | 3 GB | 205 GFLOPs |
15 | | 336 x 336 | 10 x 10 x 1024 | 7 GB | 457 GFLOPs |
16 | | 448 x 448 | 14 x 14 x 1024 | 13 GB | 819 GFLOPs |
17 | | 560 x 560 | 17 x 17 x 1024 | 20 GB | 1 TFLOPs |
18 | | 672 x 672 | 21 x 21 x 1024 | 29 GB | 2 TFLOPs |
19 | 
20 | A rough outline of where in the network memory is allocated to parameters and features and where the greatest computational cost lies is shown below.  The x-axis does not show labels (it becomes hard to read for networks containing hundreds of layers) - it should be interpreted as depicting increasing depth from left to right.  The goal is simply to give some idea of the overall profile of the model: 
21 | 
22 | ![googlenet profile](figs/googlenet.png)
23 | 


--------------------------------------------------------------------------------
/reports/inception-v3.md:
--------------------------------------------------------------------------------
 1 | ### Report for inception-v3
 2 | Model params 91 MB 
 3 | 
 4 | Estimates for a single full pass of model at input size 299 x 299: 
 5 | 
 6 | * Memory required for features: 89 MB 
 7 | * Flops: 6 GFLOPs 
 8 | 
 9 | Estimates are given below of the burden of computing the `features_19` features in the network for different input sizes using a batch size of 128: 
10 | 
11 | | input size | feature size | feature memory | flops | 
12 | |------------|--------------|----------------|-------| 
13 | | 299 x 299 | 1 x 1 x 2048 | 11 GB | 735 GFLOPs |
14 | | 449 x 449 | 1 x 1 x 2048 | 26 GB | 2 TFLOPs |
15 | | 598 x 598 | 2 x 2 x 2048 | 47 GB | 3 TFLOPs |
16 | | 748 x 748 | 2 x 2 x 2048 | 75 GB | 5 TFLOPs |
17 | | 897 x 897 | 3 x 3 x 2048 | 108 GB | 7 TFLOPs |
18 | 
19 | A rough outline of where in the network memory is allocated to parameters and features and where the greatest computational cost lies is shown below.  The x-axis does not show labels (it becomes hard to read for networks containing hundreds of layers) - it should be interpreted as depicting increasing depth from left to right.  The goal is simply to give some idea of the overall profile of the model: 
20 | 
21 | ![inception-v3 profile](figs/inception-v3.png)
22 | 


--------------------------------------------------------------------------------
/reports/matconvnet-alex.md:
--------------------------------------------------------------------------------
 1 | ### Report for matconvnet-alex
 2 | Model params 233 MB 
 3 | Estimates for a single full pass of model at input size 227 x 227: 
 4 | 
 5 | * Memory required for features: 3 MB 
 6 | * Flops: 727 MFLOPS 
 7 | 
 8 | Estimates are given below of the burden of computing the `pool5` features in the network for different input sizes: 
 9 | 
10 | | input size | feature size | feature memory | flops | 
11 |  | 114 x 114 | 2 x 2 x 256 | 73 MB | 15 GFLOPS |
12 |  | 227 x 227 | 6 x 6 x 256 | 377 MB | 86 GFLOPS |
13 |  | 341 x 341 | 9 x 9 x 256 | 872 MB | 200 GFLOPS |
14 |  | 454 x 454 | 13 x 13 x 256 | 2 GB | 361 GFLOPS |
15 |  | 568 x 568 | 16 x 16 x 256 | 2 GB | 572 GFLOPS |
16 |  | 681 x 681 | 20 x 20 x 256 | 4 GB | 829 GFLOPS |
17 | 
18 | A rough outline of where in the network memory is allocated to parameters and features and where the greatest computational cost lies is shown below.  The x-axis does not show labels (it becomes hard to read with the networks containing hundreds of layers) - it should be interpreted as depicting increasing depth from left to right.  The goal is to give some idea of the overall profile of the model: 
19 | ![matconvnet-alex profile](figs/matconvnet-alex.png)
20 | 


--------------------------------------------------------------------------------
/reports/mcn-mobilenet-v2.md:
--------------------------------------------------------------------------------
 1 | ### Report for mcn-mobilenet-v2
 2 | Model params 14 MB 
 3 | 
 4 | Estimates for a single full pass of model at input size 224 x 224: 
 5 | 
 6 | * Memory required for features: 69 MB 
 7 | * Flops: 446 MFLOPs 
 8 | 
 9 | Estimates are given below of the burden of computing the `fc7` features in the network for different input sizes using a batch size of 128: 
10 | 
11 | | input size | feature size | feature memory | flops | 
12 | |------------|--------------|----------------|-------| 
13 | | 224 x 224 | 1 x 1 x 1000 | 9 GB | 57 GFLOPs |
14 | | 336 x 336 | 1 x 1 x 1000 | 20 GB | 130 GFLOPs |
15 | | 448 x 448 | 1 x 1 x 1000 | 35 GB | 228 GFLOPs |
16 | | 560 x 560 | 1 x 1 x 1000 | 54 GB | 360 GFLOPs |
17 | | 672 x 672 | 1 x 1 x 1000 | 78 GB | 512 GFLOPs |
18 | 
19 | A rough outline of where in the network memory is allocated to parameters and features and where the greatest computational cost lies is shown below.  The x-axis does not show labels (it becomes hard to read for networks containing hundreds of layers) - it should be interpreted as depicting increasing depth from left to right.  The goal is simply to give some idea of the overall profile of the model: 
20 | 
21 | ![mcn-mobilenet-v2 profile](figs/mcn-mobilenet-v2.png)
22 | 


--------------------------------------------------------------------------------
/reports/mcn-mobilenet.md:
--------------------------------------------------------------------------------
 1 | ### Report for mcn-mobilenet
 2 | Model params 16 MB 
 3 | 
 4 | Estimates for a single full pass of model at input size 224 x 224: 
 5 | 
 6 | * Memory required for features: 38 MB 
 7 | * Flops: 579 MFLOPs 
 8 | 
 9 | Estimates are given below of the burden of computing the `fc7` features in the network for different input sizes using a batch size of 128: 
10 | 
11 | | input size | feature size | feature memory | flops | 
12 | |------------|--------------|----------------|-------| 
13 | | 224 x 224 | 1 x 1 x 1000 | 5 GB | 74 GFLOPs |
14 | | 336 x 336 | 1 x 1 x 1000 | 11 GB | 169 GFLOPs |
15 | | 448 x 448 | 1 x 1 x 1000 | 19 GB | 296 GFLOPs |
16 | | 560 x 560 | 1 x 1 x 1000 | 30 GB | 466 GFLOPs |
17 | | 672 x 672 | 1 x 1 x 1000 | 43 GB | 666 GFLOPs |
18 | 
19 | A rough outline of where in the network memory is allocated to parameters and features and where the greatest computational cost lies is shown below.  The x-axis does not show labels (it becomes hard to read for networks containing hundreds of layers) - it should be interpreted as depicting increasing depth from left to right.  The goal is simply to give some idea of the overall profile of the model: 
20 | 
21 | ![mcn-mobilenet profile](figs/mcn-mobilenet.png)
22 | 


--------------------------------------------------------------------------------
/reports/multipose-coco.md:
--------------------------------------------------------------------------------
 1 | ### Report for multipose-coco
 2 | Model params 200 MB 
 3 | 
 4 | Estimates for a single full pass of model at input size 368 x 368: 
 5 | 
 6 | * Memory required for features: 246 MB 
 7 | * Flops: 136 GFLOPS 
 8 | 
 9 | Estimates are given below of the burden of computing the `Mconv6_stage6_L2` features in the network for different input sizes using a batch size of 128: 
10 | 
11 | | input size | feature size | feature memory | flops | 
12 | |------------|--------------|----------------|-------| 
13 | | 184 x 184 | 23 x 23 x 128 | 7 GB | 4 TFLOPS |
14 | | 368 x 368 | 46 x 46 x 128 | 30 GB | 16 TFLOPS |
15 | | 552 x 552 | 69 x 69 x 128 | 67 GB | 37 TFLOPS |
16 | | 736 x 736 | 92 x 92 x 128 | 119 GB | 65 TFLOPS |
17 | | 920 x 920 | 115 x 115 x 128 | 186 GB | 101 TFLOPS |
18 | | 1104 x 1104 | 138 x 138 x 128 | 268 GB | 146 TFLOPS |
19 | 
20 | A rough outline of where in the network memory is allocated to parameters and features and where the greatest computational cost lies is shown below.  The x-axis does not show labels (it becomes hard to read for networks containing hundreds of layers) - it should be interpreted as depicting increasing depth from left to right.  The goal is simply to give some idea of the overall profile of the model: 
21 | 
22 | ![multipose-coco profile](figs/multipose-coco.png)
23 | 


--------------------------------------------------------------------------------
/reports/multipose-mpi.md:
--------------------------------------------------------------------------------
 1 | ### Report for multipose-mpi
 2 | Model params 196 MB 
 3 | 
 4 | Estimates for a single full pass of model at input size 368 x 368: 
 5 | 
 6 | * Memory required for features: 245 MB 
 7 | * Flops: 134 GFLOPS 
 8 | 
 9 | Estimates are given below of the burden of computing the `Mconv6_stage6_L2` features in the network for different input sizes using a batch size of 128: 
10 | 
11 | | input size | feature size | feature memory | flops | 
12 | |------------|--------------|----------------|-------| 
13 | | 184 x 184 | 23 x 23 x 128 | 7 GB | 4 TFLOPS |
14 | | 368 x 368 | 46 x 46 x 128 | 30 GB | 16 TFLOPS |
15 | | 552 x 552 | 69 x 69 x 128 | 67 GB | 36 TFLOPS |
16 | | 736 x 736 | 92 x 92 x 128 | 119 GB | 64 TFLOPS |
17 | | 920 x 920 | 115 x 115 x 128 | 185 GB | 100 TFLOPS |
18 | | 1104 x 1104 | 138 x 138 x 128 | 267 GB | 144 TFLOPS |
19 | 
20 | A rough outline of where in the network memory is allocated to parameters and features and where the greatest computational cost lies is shown below.  The x-axis does not show labels (it becomes hard to read for networks containing hundreds of layers) - it should be interpreted as depicting increasing depth from left to right.  The goal is simply to give some idea of the overall profile of the model: 
21 | 
22 | ![multipose-mpi profile](figs/multipose-mpi.png)
23 | 


--------------------------------------------------------------------------------
/reports/pascal-fcn16s.md:
--------------------------------------------------------------------------------
 1 | ### Report for pascal-fcn16s
 2 | Model params 514 MB 
 3 | 
 4 | Estimates for a single full pass of model at input size 384 x 384: 
 5 | 
 6 | * Memory required for features: 424 MB 
 7 | * Flops: 125 GFLOPs 
 8 | 
 9 | Estimates are given below of the burden of computing the `score_fr` features in the network for different input sizes using a batch size of 128: 
10 | 
11 | | input size | feature size | feature memory | flops | 
12 | |------------|--------------|----------------|-------| 
13 | | 192 x 192 | 7 x 7 x 21 | 22 GB | 7 TFLOPs |
14 | | 384 x 384 | 13 x 13 x 21 | 49 GB | 16 TFLOPs |
15 | | 576 x 576 | 19 x 19 x 21 | 87 GB | 29 TFLOPs |
16 | | 768 x 768 | 25 x 25 x 21 | 136 GB | 46 TFLOPs |
17 | | 960 x 960 | 31 x 31 x 21 | 196 GB | 68 TFLOPs |
18 | | 1152 x 1152 | 37 x 37 x 21 | 267 GB | 93 TFLOPs |
19 | 
20 | A rough outline of where in the network memory is allocated to parameters and features and where the greatest computational cost lies is shown below.  The x-axis does not show labels (it becomes hard to read for networks containing hundreds of layers) - it should be interpreted as depicting increasing depth from left to right.  The goal is simply to give some idea of the overall profile of the model: 
21 | 
22 | ![pascal-fcn16s profile](figs/pascal-fcn16s.png)
23 | 


--------------------------------------------------------------------------------
/reports/pascal-fcn32s.md:
--------------------------------------------------------------------------------
 1 | ### Report for pascal-fcn32s
 2 | Model params 519 MB 
 3 | 
 4 | Estimates for a single full pass of model at input size 384 x 384: 
 5 | 
 6 | * Memory required for features: 423 MB 
 7 | * Flops: 125 GFLOPs 
 8 | 
 9 | Estimates are given below of the burden of computing the `score_fr` features in the network for different input sizes using a batch size of 128: 
10 | 
11 | | input size | feature size | feature memory | flops | 
12 | |------------|--------------|----------------|-------| 
13 | | 192 x 192 | 7 x 7 x 21 | 22 GB | 7 TFLOPs |
14 | | 384 x 384 | 13 x 13 x 21 | 49 GB | 16 TFLOPs |
15 | | 576 x 576 | 19 x 19 x 21 | 87 GB | 29 TFLOPs |
16 | | 768 x 768 | 25 x 25 x 21 | 136 GB | 46 TFLOPs |
17 | | 960 x 960 | 31 x 31 x 21 | 196 GB | 68 TFLOPs |
18 | | 1152 x 1152 | 37 x 37 x 21 | 267 GB | 93 TFLOPs |
19 | 
20 | A rough outline of where in the network memory is allocated to parameters and features and where the greatest computational cost lies is shown below.  The x-axis does not show labels (it becomes hard to read for networks containing hundreds of layers) - it should be interpreted as depicting increasing depth from left to right.  The goal is simply to give some idea of the overall profile of the model: 
21 | 
22 | ![pascal-fcn32s profile](figs/pascal-fcn32s.png)
23 | 


--------------------------------------------------------------------------------
/reports/pascal-fcn8s.md:
--------------------------------------------------------------------------------
 1 | ### Report for pascal-fcn8s
 2 | Model params 513 MB 
 3 | 
 4 | Estimates for a single full pass of model at input size 384 x 384: 
 5 | 
 6 | * Memory required for features: 426 MB 
 7 | * Flops: 125 GFLOPs 
 8 | 
 9 | Estimates are given below of the burden of computing the `score_fr` features in the network for different input sizes using a batch size of 128: 
10 | 
11 | | input size | feature size | feature memory | flops | 
12 | |------------|--------------|----------------|-------| 
13 | | 192 x 192 | 7 x 7 x 21 | 22 GB | 7 TFLOPs |
14 | | 384 x 384 | 13 x 13 x 21 | 49 GB | 16 TFLOPs |
15 | | 576 x 576 | 19 x 19 x 21 | 87 GB | 29 TFLOPs |
16 | | 768 x 768 | 25 x 25 x 21 | 136 GB | 46 TFLOPs |
17 | | 960 x 960 | 31 x 31 x 21 | 196 GB | 68 TFLOPs |
18 | | 1152 x 1152 | 37 x 37 x 21 | 267 GB | 93 TFLOPs |
19 | 
20 | A rough outline of where in the network memory is allocated to parameters and features and where the greatest computational cost lies is shown below.  The x-axis does not show labels (it becomes hard to read for networks containing hundreds of layers) - it should be interpreted as depicting increasing depth from left to right.  The goal is simply to give some idea of the overall profile of the model: 
21 | 
22 | ![pascal-fcn8s profile](figs/pascal-fcn8s.png)
23 | 


--------------------------------------------------------------------------------
/reports/resnet-101.md:
--------------------------------------------------------------------------------
 1 | ### Report for resnet-101
 2 | Model params 170 MB 
 3 | 
 4 | Estimates for a single full pass of model at input size 224 x 224: 
 5 | 
 6 | * Memory required for features: 155 MB 
 7 | * Flops: 8 GFLOPs 
 8 | 
 9 | Estimates are given below of the burden of computing the `res5c_relu` features in the network for different input sizes using a batch size of 128: 
10 | 
11 | | input size | feature size | feature memory | flops | 
12 | |------------|--------------|----------------|-------| 
13 | | 112 x 112 | 4 x 4 x 2048 | 5 GB | 251 GFLOPs |
14 | | 224 x 224 | 7 x 7 x 2048 | 19 GB | 974 GFLOPs |
15 | | 336 x 336 | 11 x 11 x 2048 | 44 GB | 2 TFLOPs |
16 | | 448 x 448 | 14 x 14 x 2048 | 77 GB | 4 TFLOPs |
17 | | 560 x 560 | 18 x 18 x 2048 | 121 GB | 6 TFLOPs |
18 | | 672 x 672 | 21 x 21 x 2048 | 174 GB | 9 TFLOPs |
19 | 
20 | A rough outline of where in the network memory is allocated to parameters and features and where the greatest computational cost lies is shown below.  The x-axis does not show labels (it becomes hard to read for networks containing hundreds of layers) - it should be interpreted as depicting increasing depth from left to right.  The goal is simply to give some idea of the overall profile of the model: 
21 | 
22 | ![resnet-101 profile](figs/resnet-101.png)
23 | 


--------------------------------------------------------------------------------
/reports/resnet-152.md:
--------------------------------------------------------------------------------
 1 | ### Report for resnet-152
 2 | Model params 230 MB 
 3 | 
 4 | Estimates for a single full pass of model at input size 224 x 224: 
 5 | 
 6 | * Memory required for features: 219 MB 
 7 | * Flops: 11 GFLOPs 
 8 | 
 9 | Estimates are given below of the burden of computing the `res5c_relu` features in the network for different input sizes using a batch size of 128: 
10 | 
11 | | input size | feature size | feature memory | flops | 
12 | |------------|--------------|----------------|-------| 
13 | | 112 x 112 | 4 x 4 x 2048 | 7 GB | 370 GFLOPs |
14 | | 224 x 224 | 7 x 7 x 2048 | 27 GB | 1 TFLOPs |
15 | | 336 x 336 | 11 x 11 x 2048 | 62 GB | 3 TFLOPs |
16 | | 448 x 448 | 14 x 14 x 2048 | 109 GB | 6 TFLOPs |
17 | | 560 x 560 | 18 x 18 x 2048 | 171 GB | 9 TFLOPs |
18 | | 672 x 672 | 21 x 21 x 2048 | 246 GB | 13 TFLOPs |
19 | 
20 | A rough outline of where in the network memory is allocated to parameters and features and where the greatest computational cost lies is shown below.  The x-axis does not show labels (it becomes hard to read for networks containing hundreds of layers) - it should be interpreted as depicting increasing depth from left to right.  The goal is simply to give some idea of the overall profile of the model: 
21 | 
22 | ![resnet-152 profile](figs/resnet-152.png)
23 | 


--------------------------------------------------------------------------------
/reports/resnet-50.md:
--------------------------------------------------------------------------------
 1 | ### Report for resnet-50
 2 | Model params 98 MB 
 3 | 
 4 | Estimates for a single full pass of model at input size 224 x 224: 
 5 | 
 6 | * Memory required for features: 103 MB 
 7 | * Flops: 4 GFLOPs 
 8 | 
 9 | Estimates are given below of the burden of computing the `res5c_relu` features in the network for different input sizes using a batch size of 128: 
10 | 
11 | | input size | feature size | feature memory | flops | 
12 | |------------|--------------|----------------|-------| 
13 | | 112 x 112 | 4 x 4 x 2048 | 3 GB | 131 GFLOPs |
14 | | 224 x 224 | 7 x 7 x 2048 | 13 GB | 497 GFLOPs |
15 | | 336 x 336 | 11 x 11 x 2048 | 29 GB | 1 TFLOPs |
16 | | 448 x 448 | 14 x 14 x 2048 | 51 GB | 2 TFLOPs |
17 | | 560 x 560 | 18 x 18 x 2048 | 80 GB | 3 TFLOPs |
18 | | 672 x 672 | 21 x 21 x 2048 | 115 GB | 4 TFLOPs |
19 | 
20 | A rough outline of where in the network memory is allocated to parameters and features and where the greatest computational cost lies is shown below.  The x-axis does not show labels (it becomes hard to read for networks containing hundreds of layers) - it should be interpreted as depicting increasing depth from left to right.  The goal is simply to give some idea of the overall profile of the model: 
21 | 
22 | ![resnet-50 profile](figs/resnet-50.png)
23 | 


--------------------------------------------------------------------------------
/reports/resnet18.md:
--------------------------------------------------------------------------------
 1 | ### Report for resnet18
 2 | Model params 45 MB 
 3 | 
 4 | Estimates for a single full pass of model at input size 224 x 224: 
 5 | 
 6 | * Memory required for features: 23 MB 
 7 | * Flops: 2 GFLOPs 
 8 | 
 9 | Estimates are given below of the burden of computing the `features_7_1_id_relu` features in the network for different input sizes using a batch size of 128: 
10 | 
11 | | input size | feature size | feature memory | flops | 
12 | |------------|--------------|----------------|-------| 
13 | | 112 x 112 | 4 x 4 x 512 | 734 MB | 62 GFLOPs |
14 | | 224 x 224 | 7 x 7 x 512 | 3 GB | 233 GFLOPs |
15 | | 336 x 336 | 11 x 11 x 512 | 6 GB | 536 GFLOPs |
16 | | 448 x 448 | 14 x 14 x 512 | 11 GB | 932 GFLOPs |
17 | | 560 x 560 | 18 x 18 x 512 | 18 GB | 1 TFLOPs |
18 | | 672 x 672 | 21 x 21 x 512 | 25 GB | 2 TFLOPs |
19 | 
20 | A rough outline of where in the network memory is allocated to parameters and features and where the greatest computational cost lies is shown below.  The x-axis does not show labels (it becomes hard to read for networks containing hundreds of layers) - it should be interpreted as depicting increasing depth from left to right.  The goal is simply to give some idea of the overall profile of the model: 
21 | 
22 | ![resnet18 profile](figs/resnet18.png)
23 | 


--------------------------------------------------------------------------------
/reports/resnet34.md:
--------------------------------------------------------------------------------
 1 | ### Report for resnet34
 2 | Model params 83 MB 
 3 | 
 4 | Estimates for a single full pass of model at input size 224 x 224: 
 5 | 
 6 | * Memory required for features: 35 MB 
 7 | * Flops: 4 GFLOPs 
 8 | 
 9 | Estimates are given below of the burden of computing the `features_7_2_id_relu` features in the network for different input sizes using a batch size of 128: 
10 | 
11 | | input size | feature size | feature memory | flops | 
12 | |------------|--------------|----------------|-------| 
13 | | 112 x 112 | 4 x 4 x 512 | 1 GB | 124 GFLOPs |
14 | | 224 x 224 | 7 x 7 x 512 | 4 GB | 470 GFLOPs |
15 | | 336 x 336 | 11 x 11 x 512 | 10 GB | 1 TFLOPs |
16 | | 448 x 448 | 14 x 14 x 512 | 17 GB | 2 TFLOPs |
17 | | 560 x 560 | 18 x 18 x 512 | 27 GB | 3 TFLOPs |
18 | | 672 x 672 | 21 x 21 x 512 | 39 GB | 4 TFLOPs |
19 | 
20 | A rough outline of where in the network memory is allocated to parameters and features and where the greatest computational cost lies is shown below.  The x-axis does not show labels (it becomes hard to read for networks containing hundreds of layers) - it should be interpreted as depicting increasing depth from left to right.  The goal is simply to give some idea of the overall profile of the model: 
21 | 
22 | ![resnet34 profile](figs/resnet34.png)
23 | 


--------------------------------------------------------------------------------
/reports/resnet50.md:
--------------------------------------------------------------------------------
 1 | ### Report for resnet50
 2 | Model params 98 MB 
 3 | 
 4 | Estimates for a single full pass of model at input size 224 x 224: 
 5 | 
 6 | * Memory required for features: 107 MB 
 7 | * Flops: 4 GFLOPs 
 8 | 
 9 | Estimates are given below of the burden of computing the `features_7_2_id_relu` features in the network for different input sizes using a batch size of 128: 
10 | 
11 | | input size | feature size | feature memory | flops | 
12 | |------------|--------------|----------------|-------| 
13 | | 112 x 112 | 4 x 4 x 2048 | 3 GB | 139 GFLOPs |
14 | | 224 x 224 | 7 x 7 x 2048 | 13 GB | 527 GFLOPs |
15 | | 336 x 336 | 11 x 11 x 2048 | 30 GB | 1 TFLOPs |
16 | | 448 x 448 | 14 x 14 x 2048 | 53 GB | 2 TFLOPs |
17 | | 560 x 560 | 18 x 18 x 2048 | 84 GB | 3 TFLOPs |
18 | | 672 x 672 | 21 x 21 x 2048 | 120 GB | 5 TFLOPs |
19 | 
20 | A rough outline of where in the network memory is allocated to parameters and features and where the greatest computational cost lies is shown below.  The x-axis does not show labels (it becomes hard to read for networks containing hundreds of layers) - it should be interpreted as depicting increasing depth from left to right.  The goal is simply to give some idea of the overall profile of the model: 
21 | 
22 | ![resnet50 profile](figs/resnet50.png)
23 | 


--------------------------------------------------------------------------------
/reports/resnext-101-32x4d.md:
--------------------------------------------------------------------------------
 1 | ### Report for resnext-101-32x4d
 2 | Model params 169 MB 
 3 | 
 4 | Estimates for a single full pass of model at input size 224 x 224: 
 5 | 
 6 | * Memory required for features: 197 MB 
 7 | * Flops: 8 GFLOPs 
 8 | 
 9 | Estimates are given below of the burden of computing the `features_7_2_id_relu` features in the network for different input sizes using a batch size of 128: 
10 | 
11 | | input size | feature size | feature memory | flops | 
12 | |------------|--------------|----------------|-------| 
13 | | 112 x 112 | 4 x 4 x 2048 | 6 GB | 263 GFLOPs |
14 | | 224 x 224 | 7 x 7 x 2048 | 25 GB | 1 TFLOPs |
15 | | 336 x 336 | 11 x 11 x 2048 | 56 GB | 2 TFLOPs |
16 | | 448 x 448 | 14 x 14 x 2048 | 98 GB | 4 TFLOPs |
17 | | 560 x 560 | 18 x 18 x 2048 | 154 GB | 6 TFLOPs |
18 | | 672 x 672 | 21 x 21 x 2048 | 221 GB | 9 TFLOPs |
19 | 
20 | A rough outline of where in the network memory is allocated to parameters and features and where the greatest computational cost lies is shown below.  The x-axis does not show labels (it becomes hard to read for networks containing hundreds of layers) - it should be interpreted as depicting increasing depth from left to right.  The goal is simply to give some idea of the overall profile of the model: 
21 | 
22 | ![resnext-101-32x4d profile](figs/resnext-101-32x4d.png)
23 | 


--------------------------------------------------------------------------------
/reports/resnext-101-64x4d.md:
--------------------------------------------------------------------------------
 1 | ### Report for resnext-101-64x4d
 2 | Model params 319 MB 
 3 | 
 4 | Estimates for a single full pass of model at input size 224 x 224: 
 5 | 
 6 | * Memory required for features: 273 MB 
 7 | * Flops: 16 GFLOPs 
 8 | 
 9 | Estimates are given below of the burden of computing the `features_7_2_id_relu` features in the network for different input sizes using a batch size of 128: 
10 | 
11 | | input size | feature size | feature memory | flops | 
12 | |------------|--------------|----------------|-------| 
13 | | 112 x 112 | 4 x 4 x 2048 | 9 GB | 509 GFLOPs |
14 | | 224 x 224 | 7 x 7 x 2048 | 34 GB | 2 TFLOPs |
15 | | 336 x 336 | 11 x 11 x 2048 | 77 GB | 5 TFLOPs |
16 | | 448 x 448 | 14 x 14 x 2048 | 136 GB | 8 TFLOPs |
17 | | 560 x 560 | 18 x 18 x 2048 | 214 GB | 12 TFLOPs |
18 | | 672 x 672 | 21 x 21 x 2048 | 307 GB | 18 TFLOPs |
19 | 
20 | A rough outline of where in the network memory is allocated to parameters and features and where the greatest computational cost lies is shown below.  The x-axis does not show labels (it becomes hard to read for networks containing hundreds of layers) - it should be interpreted as depicting increasing depth from left to right.  The goal is simply to give some idea of the overall profile of the model: 
21 | 
22 | ![resnext-101-64x4d profile](figs/resnext-101-64x4d.png)
23 | 


--------------------------------------------------------------------------------
/reports/resnext-50-32x4d.md:
--------------------------------------------------------------------------------
 1 | ### Report for resnext-50-32x4d
 2 | Model params 96 MB 
 3 | 
 4 | Estimates for a single full pass of model at input size 224 x 224: 
 5 | 
 6 | * Memory required for features: 132 MB 
 7 | * Flops: 4 GFLOPs 
 8 | 
 9 | Estimates are given below of the burden of computing the `features_7_2_id_relu` features in the network for different input sizes using a batch size of 128: 
10 | 
11 | | input size | feature size | feature memory | flops | 
12 | |------------|--------------|----------------|-------| 
13 | | 112 x 112 | 4 x 4 x 2048 | 4 GB | 143 GFLOPs |
14 | | 224 x 224 | 7 x 7 x 2048 | 16 GB | 545 GFLOPs |
15 | | 336 x 336 | 11 x 11 x 2048 | 37 GB | 1 TFLOPs |
16 | | 448 x 448 | 14 x 14 x 2048 | 66 GB | 2 TFLOPs |
17 | | 560 x 560 | 18 x 18 x 2048 | 103 GB | 3 TFLOPs |
18 | | 672 x 672 | 21 x 21 x 2048 | 148 GB | 5 TFLOPs |
19 | 
20 | A rough outline of where in the network memory is allocated to parameters and features and where the greatest computational cost lies is shown below.  The x-axis does not show labels (it becomes hard to read for networks containing hundreds of layers) - it should be interpreted as depicting increasing depth from left to right.  The goal is simply to give some idea of the overall profile of the model: 
21 | 
22 | ![resnext-50-32x4d profile](figs/resnext-50-32x4d.png)
23 | 


--------------------------------------------------------------------------------
/reports/rfcn-res101-pascal.md:
--------------------------------------------------------------------------------
 1 | ### Report for rfcn-res101-pascal
 2 | Model params 194 MB 
 3 | 
 4 | Estimates for a single full pass of model at input size 600 x 850: 
 5 | 
 6 | * Memory required for features: 2 GB 
 7 | * Flops: 117 GFLOPs 
 8 | 
 9 | Estimates are given below of the burden of computing the `res5c_relu` features in the network for different input sizes using a batch size of 128: 
10 | 
11 | | input size | feature size | feature memory | flops | 
12 | |------------|--------------|----------------|-------| 
13 | | 300 x 425 | 19 x 27 x 2048 | 55 GB | 3 TFLOPs |
14 | | 600 x 850 | 38 x 53 x 2048 | 218 GB | 13 TFLOPs |
15 | | 900 x 1275 | 57 x 80 x 2048 | 493 GB | 29 TFLOPs |
16 | | 1200 x 1700 | 75 x 107 x 2048 | 871 GB | 51 TFLOPs |
17 | | 1500 x 2125 | 94 x 133 x 2048 | 1 TB | 80 TFLOPs |
18 | | 1800 x 2550 | 113 x 160 x 2048 | 2 TB | 116 TFLOPs |
19 | 
20 | A rough outline of where in the network memory is allocated to parameters and features and where the greatest computational cost lies is shown below.  The x-axis does not show labels (it becomes hard to read for networks containing hundreds of layers) - it should be interpreted as depicting increasing depth from left to right.  The goal is simply to give some idea of the overall profile of the model: 
21 | 
22 | ![rfcn-res101-pascal profile](figs/rfcn-res101-pascal.png)
23 | 


--------------------------------------------------------------------------------
/reports/rfcn-res50-pascal.md:
--------------------------------------------------------------------------------
 1 | ### Report for rfcn-res50-pascal
 2 | Model params 122 MB 
 3 | 
 4 | Estimates for a single full pass of model at input size 600 x 850: 
 5 | 
 6 | * Memory required for features: 1 GB 
 7 | * Flops: 79 GFLOPs 
 8 | 
 9 | Estimates are given below of the burden of computing the `res5c_relu` features in the network for different input sizes using a batch size of 128: 
10 | 
11 | | input size | feature size | feature memory | flops | 
12 | |------------|--------------|----------------|-------| 
13 | | 300 x 425 | 19 x 27 x 2048 | 38 GB | 2 TFLOPs |
14 | | 600 x 850 | 38 x 53 x 2048 | 151 GB | 8 TFLOPs |
15 | | 900 x 1275 | 57 x 80 x 2048 | 342 GB | 18 TFLOPs |
16 | | 1200 x 1700 | 75 x 107 x 2048 | 605 GB | 32 TFLOPs |
17 | | 1500 x 2125 | 94 x 133 x 2048 | 944 GB | 50 TFLOPs |
18 | | 1800 x 2550 | 113 x 160 x 2048 | 1 TB | 72 TFLOPs |
19 | 
20 | A rough outline of where in the network memory is allocated to parameters and features and where the greatest computational cost lies is shown below.  The x-axis does not show labels (it becomes hard to read for networks containing hundreds of layers) - it should be interpreted as depicting increasing depth from left to right.  The goal is simply to give some idea of the overall profile of the model: 
21 | 
22 | ![rfcn-res50-pascal profile](figs/rfcn-res50-pascal.png)
23 | 


--------------------------------------------------------------------------------
/reports/squeezenet1-0.md:
--------------------------------------------------------------------------------
 1 | ### Report for squeezenet1-0
 2 | Model params 5 MB 
 3 | 
 4 | Estimates for a single full pass of model at input size 224 x 224: 
 5 | 
 6 | * Memory required for features: 30 MB 
 7 | * Flops: 837 MFLOPs 
 8 | 
 9 | Estimates are given below of the burden of computing the `features_12_cat` features in the network for different input sizes using a batch size of 128: 
10 | 
11 | | input size | feature size | feature memory | flops | 
12 | |------------|--------------|----------------|-------| 
13 | | 112 x 112 | 6 x 6 x 512 | 841 MB | 22 GFLOPs |
14 | | 224 x 224 | 13 x 13 x 512 | 4 GB | 96 GFLOPs |
15 | | 336 x 336 | 20 x 20 x 512 | 8 GB | 221 GFLOPs |
16 | | 448 x 448 | 27 x 27 x 512 | 15 GB | 398 GFLOPs |
17 | | 560 x 560 | 34 x 34 x 512 | 23 GB | 626 GFLOPs |
18 | | 672 x 672 | 41 x 41 x 512 | 33 GB | 906 GFLOPs |
19 | 
20 | A rough outline of where in the network memory is allocated to parameters and features and where the greatest computational cost lies is shown below.  The x-axis does not show labels (it becomes hard to read for networks containing hundreds of layers) - it should be interpreted as depicting increasing depth from left to right.  The goal is simply to give some idea of the overall profile of the model: 
21 | 
22 | ![squeezenet1-0 profile](figs/squeezenet1-0.png)
23 | 


--------------------------------------------------------------------------------
/reports/squeezenet1-1.md:
--------------------------------------------------------------------------------
 1 | ### Report for squeezenet1-1
 2 | Model params 5 MB 
 3 | 
 4 | Estimates for a single full pass of model at input size 224 x 224: 
 5 | 
 6 | * Memory required for features: 17 MB 
 7 | * Flops: 360 MFLOPs 
 8 | 
 9 | Estimates are given below of the burden of computing the `features_12_cat` features in the network for different input sizes using a batch size of 128: 
10 | 
11 | | input size | feature size | feature memory | flops | 
12 | |------------|--------------|----------------|-------| 
13 | | 112 x 112 | 6 x 6 x 512 | 483 MB | 8 GFLOPs |
14 | | 224 x 224 | 13 x 13 x 512 | 2 GB | 35 GFLOPs |
15 | | 336 x 336 | 20 x 20 x 512 | 5 GB | 81 GFLOPs |
16 | | 448 x 448 | 27 x 27 x 512 | 8 GB | 146 GFLOPs |
17 | | 560 x 560 | 34 x 34 x 512 | 13 GB | 230 GFLOPs |
18 | | 672 x 672 | 41 x 41 x 512 | 19 GB | 333 GFLOPs |
19 | 
20 | A rough outline of where in the network memory is allocated to parameters and features and where the greatest computational cost lies is shown below.  The x-axis does not show labels (it becomes hard to read for networks containing hundreds of layers) - it should be interpreted as depicting increasing depth from left to right.  The goal is simply to give some idea of the overall profile of the model: 
21 | 
22 | ![squeezenet1-1 profile](figs/squeezenet1-1.png)
23 | 


--------------------------------------------------------------------------------
/reports/ssd-pascal-mobilenet-ft.md:
--------------------------------------------------------------------------------
 1 | ### Report for ssd-pascal-mobilenet-ft
 2 | Model params 22 MB 
 3 | 
 4 | Estimates for a single full pass of model at input size 300 x 300: 
 5 | 
 6 | * Memory required for features: 37 MB 
 7 | * Flops: 1 GFLOPs 
 8 | 
 9 | Estimates are given below of the burden of computing the `conv17_2_relu` features in the network for different input sizes using a batch size of 128: 
10 | 
11 | | input size | feature size | feature memory | flops | 
12 | |------------|--------------|----------------|-------| 
13 | | 150 x 150 | 1 x 1 x 128 | 1 GB | 39 GFLOPs |
14 | | 300 x 300 | 1 x 1 x 128 | 4 GB | 146 GFLOPs |
15 | | 450 x 450 | 1 x 1 x 128 | 10 GB | 336 GFLOPs |
16 | | 600 x 600 | 2 x 2 x 128 | 17 GB | 574 GFLOPs |
17 | | 750 x 750 | 2 x 2 x 128 | 27 GB | 890 GFLOPs |
18 | | 900 x 900 | 2 x 2 x 128 | 39 GB | 1 TFLOPs |
19 | 
20 | A rough outline of where in the network memory is allocated to parameters and features and where the greatest computational cost lies is shown below.  The x-axis does not show labels (it becomes hard to read for networks containing hundreds of layers) - it should be interpreted as depicting increasing depth from left to right.  The goal is simply to give some idea of the overall profile of the model: 
21 | 
22 | ![ssd-pascal-mobilenet-ft profile](figs/ssd-pascal-mobilenet-ft.png)
23 | 


--------------------------------------------------------------------------------
/reports/ssd-pascal-vggvd-300.md:
--------------------------------------------------------------------------------
 1 | ### Report for ssd-pascal-vggvd-300
 2 | Model params 100 MB 
 3 | 
 4 | Estimates for a single full pass of model at input size 300 x 300: 
 5 | 
 6 | * Memory required for features: 116 MB 
 7 | * Flops: 31 GFLOPs 
 8 | 
 9 | Estimates are given below of the burden of computing the `relu4_3` features in the network for different input sizes using a batch size of 128: 
10 | 
11 | | input size | feature size | feature memory | flops | 
12 | |------------|--------------|----------------|-------| 
13 | | 150 x 150 | 19 x 19 x 512 | 3 GB | 818 GFLOPs |
14 | | 300 x 300 | 38 x 38 x 512 | 13 GB | 3 TFLOPs |
15 | | 450 x 450 | 57 x 57 x 512 | 28 GB | 7 TFLOPs |
16 | | 600 x 600 | 75 x 75 x 512 | 50 GB | 13 TFLOPs |
17 | | 750 x 750 | 94 x 94 x 512 | 78 GB | 20 TFLOPs |
18 | | 900 x 900 | 113 x 113 x 512 | 113 GB | 29 TFLOPs |
19 | 
20 | A rough outline of where in the network memory is allocated to parameters and features and where the greatest computational cost lies is shown below.  The x-axis does not show labels (it becomes hard to read for networks containing hundreds of layers) - it should be interpreted as depicting increasing depth from left to right.  The goal is simply to give some idea of the overall profile of the model: 
21 | 
22 | ![ssd-pascal-vggvd-300 profile](figs/ssd-pascal-vggvd-300.png)
23 | 


--------------------------------------------------------------------------------
/reports/ssd-pascal-vggvd-512.md:
--------------------------------------------------------------------------------
 1 | ### Report for ssd-pascal-vggvd-512
 2 | Model params 104 MB 
 3 | 
 4 | Estimates for a single full pass of model at input size 512 x 512: 
 5 | 
 6 | * Memory required for features: 337 MB 
 7 | * Flops: 91 GFLOPs 
 8 | 
 9 | Estimates are given below of the burden of computing the `relu4_3` features in the network for different input sizes using a batch size of 128: 
10 | 
11 | | input size | feature size | feature memory | flops | 
12 | |------------|--------------|----------------|-------| 
13 | | 256 x 256 | 32 x 32 x 512 | 9 GB | 2 TFLOPs |
14 | | 512 x 512 | 64 x 64 x 512 | 36 GB | 9 TFLOPs |
15 | | 768 x 768 | 96 x 96 x 512 | 82 GB | 21 TFLOPs |
16 | | 1024 x 1024 | 128 x 128 x 512 | 146 GB | 37 TFLOPs |
17 | | 1280 x 1280 | 160 x 160 x 512 | 228 GB | 59 TFLOPs |
18 | | 1536 x 1536 | 192 x 192 x 512 | 328 GB | 84 TFLOPs |
19 | 
20 | A rough outline of where in the network memory is allocated to parameters and features and where the greatest computational cost lies is shown below.  The x-axis does not show labels (it becomes hard to read for networks containing hundreds of layers) - it should be interpreted as depicting increasing depth from left to right.  The goal is simply to give some idea of the overall profile of the model: 
21 | 
22 | ![ssd-pascal-vggvd-512 profile](figs/ssd-pascal-vggvd-512.png)
23 | 


--------------------------------------------------------------------------------
/reports/vgg-f.md:
--------------------------------------------------------------------------------
 1 | ### Report for vgg-f
 2 | Model params 232 MB 
 3 | 
 4 | Estimates for a single full pass of model at input size 224 x 224: 
 5 | 
 6 | * Memory required for features: 4 MB 
 7 | * Flops: 727 MFLOPs 
 8 | 
 9 | Estimates are given below of the burden of computing the `pool5` features in the network for different input sizes using a batch size of 128: 
10 | 
11 | | input size | feature size | feature memory | flops | 
12 | |------------|--------------|----------------|-------| 
13 | | 112 x 112 | 3 x 3 x 256 | 109 MB | 19 GFLOPs |
14 | | 224 x 224 | 6 x 6 x 256 | 476 MB | 86 GFLOPs |
15 | | 336 x 336 | 10 x 10 x 256 | 1 GB | 200 GFLOPs |
16 | | 448 x 448 | 13 x 13 x 256 | 2 GB | 362 GFLOPs |
17 | | 560 x 560 | 17 x 17 x 256 | 3 GB | 571 GFLOPs |
18 | | 672 x 672 | 20 x 20 x 256 | 4 GB | 828 GFLOPs |
19 | 
20 | A rough outline of where in the network memory is allocated to parameters and features and where the greatest computational cost lies is shown below.  The x-axis does not show labels (it becomes hard to read for networks containing hundreds of layers) - it should be interpreted as depicting increasing depth from left to right.  The goal is simply to give some idea of the overall profile of the model: 
21 | 
22 | ![vgg-f profile](figs/vgg-f.png)
23 | 


--------------------------------------------------------------------------------
/reports/vgg-m-1024.md:
--------------------------------------------------------------------------------
 1 | ### Report for vgg-m-1024
 2 | Model params 333 MB 
 3 | 
 4 | Estimates for a single full pass of model at input size 224 x 224: 
 5 | 
 6 | * Memory required for features: 12 MB 
 7 | * Flops: 2 GFLOPs 
 8 | 
 9 | Estimates are given below of the burden of computing the `pool5` features in the network for different input sizes using a batch size of 128: 
10 | 
11 | | input size | feature size | feature memory | flops | 
12 | |------------|--------------|----------------|-------| 
13 | | 112 x 112 | 3 x 3 x 512 | 365 MB | 44 GFLOPs |
14 | | 224 x 224 | 6 x 6 x 512 | 2 GB | 204 GFLOPs |
15 | | 336 x 336 | 10 x 10 x 512 | 4 GB | 480 GFLOPs |
16 | | 448 x 448 | 13 x 13 x 512 | 6 GB | 874 GFLOPs |
17 | | 560 x 560 | 17 x 17 x 512 | 10 GB | 1 TFLOPs |
18 | | 672 x 672 | 20 x 20 x 512 | 15 GB | 2 TFLOPs |
19 | 
20 | A rough outline of where in the network memory is allocated to parameters and features and where the greatest computational cost lies is shown below.  The x-axis does not show labels (it becomes hard to read for networks containing hundreds of layers) - it should be interpreted as depicting increasing depth from left to right.  The goal is simply to give some idea of the overall profile of the model: 
21 | 
22 | ![vgg-m-1024 profile](figs/vgg-m-1024.png)
23 | 


--------------------------------------------------------------------------------
/reports/vgg-m-128.md:
--------------------------------------------------------------------------------
 1 | ### Report for vgg-m-128
 2 | Model params 315 MB 
 3 | 
 4 | Estimates for a single full pass of model at input size 224 x 224: 
 5 | 
 6 | * Memory required for features: 12 MB 
 7 | * Flops: 2 GFLOPs 
 8 | 
 9 | Estimates are given below of the burden of computing the `pool5` features in the network for different input sizes using a batch size of 128: 
10 | 
11 | | input size | feature size | feature memory | flops | 
12 | |------------|--------------|----------------|-------| 
13 | | 112 x 112 | 3 x 3 x 512 | 365 MB | 44 GFLOPs |
14 | | 224 x 224 | 6 x 6 x 512 | 2 GB | 204 GFLOPs |
15 | | 336 x 336 | 10 x 10 x 512 | 4 GB | 480 GFLOPs |
16 | | 448 x 448 | 13 x 13 x 512 | 6 GB | 874 GFLOPs |
17 | | 560 x 560 | 17 x 17 x 512 | 10 GB | 1 TFLOPs |
18 | | 672 x 672 | 20 x 20 x 512 | 15 GB | 2 TFLOPs |
19 | 
20 | A rough outline of where in the network memory is allocated to parameters and features and where the greatest computational cost lies is shown below.  The x-axis does not show labels (it becomes hard to read for networks containing hundreds of layers) - it should be interpreted as depicting increasing depth from left to right.  The goal is simply to give some idea of the overall profile of the model: 
21 | 
22 | ![vgg-m-128 profile](figs/vgg-m-128.png)
23 | 


--------------------------------------------------------------------------------
/reports/vgg-m-2048.md:
--------------------------------------------------------------------------------
 1 | ### Report for vgg-m-2048
 2 | Model params 353 MB 
 3 | 
 4 | Estimates for a single full pass of model at input size 224 x 224: 
 5 | 
 6 | * Memory required for features: 12 MB 
 7 | * Flops: 2 GFLOPs 
 8 | 
 9 | Estimates are given below of the burden of computing the `pool5` features in the network for different input sizes using a batch size of 128: 
10 | 
11 | | input size | feature size | feature memory | flops | 
12 | |------------|--------------|----------------|-------| 
13 | | 112 x 112 | 3 x 3 x 512 | 365 MB | 44 GFLOPs |
14 | | 224 x 224 | 6 x 6 x 512 | 2 GB | 204 GFLOPs |
15 | | 336 x 336 | 10 x 10 x 512 | 4 GB | 480 GFLOPs |
16 | | 448 x 448 | 13 x 13 x 512 | 6 GB | 874 GFLOPs |
17 | | 560 x 560 | 17 x 17 x 512 | 10 GB | 1 TFLOPs |
18 | | 672 x 672 | 20 x 20 x 512 | 15 GB | 2 TFLOPs |
19 | 
20 | A rough outline of where in the network memory is allocated to parameters and features and where the greatest computational cost lies is shown below.  The x-axis does not show labels (it becomes hard to read for networks containing hundreds of layers) - it should be interpreted as depicting increasing depth from left to right.  The goal is simply to give some idea of the overall profile of the model: 
21 | 
22 | ![vgg-m-2048 profile](figs/vgg-m-2048.png)
23 | 


--------------------------------------------------------------------------------
/reports/vgg-m.md:
--------------------------------------------------------------------------------
 1 | ### Report for vgg-m
 2 | Model params 393 MB 
 3 | 
 4 | Estimates for a single full pass of model at input size 224 x 224: 
 5 | 
 6 | * Memory required for features: 12 MB 
 7 | * Flops: 2 GFLOPs 
 8 | 
 9 | Estimates are given below of the burden of computing the `pool5` features in the network for different input sizes using a batch size of 128: 
10 | 
11 | | input size | feature size | feature memory | flops | 
12 | |------------|--------------|----------------|-------| 
13 | | 112 x 112 | 3 x 3 x 512 | 365 MB | 44 GFLOPs |
14 | | 224 x 224 | 6 x 6 x 512 | 2 GB | 204 GFLOPs |
15 | | 336 x 336 | 10 x 10 x 512 | 4 GB | 480 GFLOPs |
16 | | 448 x 448 | 13 x 13 x 512 | 6 GB | 874 GFLOPs |
17 | | 560 x 560 | 17 x 17 x 512 | 10 GB | 1 TFLOPs |
18 | | 672 x 672 | 20 x 20 x 512 | 15 GB | 2 TFLOPs |
19 | 
20 | A rough outline of where in the network memory is allocated to parameters and features and where the greatest computational cost lies is shown below.  The x-axis does not show labels (it becomes hard to read for networks containing hundreds of layers) - it should be interpreted as depicting increasing depth from left to right.  The goal is simply to give some idea of the overall profile of the model: 
21 | 
22 | ![vgg-m profile](figs/vgg-m.png)
23 | 


--------------------------------------------------------------------------------
/reports/vgg-s.md:
--------------------------------------------------------------------------------
 1 | ### Report for vgg-s
 2 | Model params 393 MB 
 3 | 
 4 | Estimates for a single full pass of model at input size 224 x 224: 
 5 | 
 6 | * Memory required for features: 12 MB 
 7 | * Flops: 3 GFLOPs 
 8 | 
 9 | Estimates are given below of the burden of computing the `pool5` features in the network for different input sizes using a batch size of 128: 
10 | 
11 | | input size | feature size | feature memory | flops | 
12 | |------------|--------------|----------------|-------| 
13 | | 112 x 112 | 3 x 3 x 512 | 348 MB | 58 GFLOPs |
14 | | 224 x 224 | 6 x 6 x 512 | 2 GB | 327 GFLOPs |
15 | | 336 x 336 | 9 x 9 x 512 | 4 GB | 768 GFLOPs |
16 | | 448 x 448 | 12 x 12 x 512 | 6 GB | 1 TFLOPs |
17 | | 560 x 560 | 15 x 15 x 512 | 10 GB | 2 TFLOPs |
18 | | 672 x 672 | 18 x 18 x 512 | 15 GB | 3 TFLOPs |
19 | 
20 | A rough outline of where in the network memory is allocated to parameters and features and where the greatest computational cost lies is shown below.  The x-axis does not show labels (it becomes hard to read for networks containing hundreds of layers) - it should be interpreted as depicting increasing depth from left to right.  The goal is simply to give some idea of the overall profile of the model: 
21 | 
22 | ![vgg-s profile](figs/vgg-s.png)
23 | 


--------------------------------------------------------------------------------
/reports/vgg-vd-16-atrous.md:
--------------------------------------------------------------------------------
 1 | ### Report for vgg-vd-16-atrous
 2 | Model params 82 MB 
 3 | 
 4 | Estimates for a single full pass of model at input size 224 x 224: 
 5 | 
 6 | * Memory required for features: 58 MB 
 7 | * Flops: 16 GFLOPs 
 8 | 
 9 | Estimates are given below of the burden of computing the `pool5` features in the network for different input sizes using a batch size of 128: 
10 | 
11 | | input size | feature size | feature memory | flops | 
12 | |------------|--------------|----------------|-------| 
13 | | 112 x 112 | 4 x 4 x 512 | 2 GB | 493 GFLOPs |
14 | | 224 x 224 | 7 x 7 x 512 | 7 GB | 2 TFLOPs |
15 | | 336 x 336 | 11 x 11 x 512 | 16 GB | 4 TFLOPs |
16 | | 448 x 448 | 14 x 14 x 512 | 29 GB | 8 TFLOPs |
17 | | 560 x 560 | 18 x 18 x 512 | 45 GB | 12 TFLOPs |
18 | | 672 x 672 | 21 x 21 x 512 | 65 GB | 18 TFLOPs |
19 | 
20 | A rough outline of where in the network memory is allocated to parameters and features and where the greatest computational cost lies is shown below.  The x-axis does not show labels (it becomes hard to read for networks containing hundreds of layers) - it should be interpreted as depicting increasing depth from left to right.  The goal is simply to give some idea of the overall profile of the model: 
21 | 
22 | ![vgg-vd-16-atrous profile](figs/vgg-vd-16-atrous.png)
23 | 


--------------------------------------------------------------------------------
/reports/vgg-vd-16.md:
--------------------------------------------------------------------------------
 1 | ### Report for vgg-vd-16
 2 | Model params 528 MB 
 3 | 
 4 | Estimates for a single full pass of model at input size 224 x 224: 
 5 | 
 6 | * Memory required for features: 58 MB 
 7 | * Flops: 16 GFLOPs 
 8 | 
 9 | Estimates are given below of the burden of computing the `pool5` features in the network for different input sizes using a batch size of 128: 
10 | 
11 | | input size | feature size | feature memory | flops | 
12 | |------------|--------------|----------------|-------| 
13 | | 112 x 112 | 4 x 4 x 512 | 2 GB | 493 GFLOPs |
14 | | 224 x 224 | 7 x 7 x 512 | 7 GB | 2 TFLOPs |
15 | | 336 x 336 | 11 x 11 x 512 | 16 GB | 4 TFLOPs |
16 | | 448 x 448 | 14 x 14 x 512 | 29 GB | 8 TFLOPs |
17 | | 560 x 560 | 18 x 18 x 512 | 45 GB | 12 TFLOPs |
18 | | 672 x 672 | 21 x 21 x 512 | 65 GB | 18 TFLOPs |
19 | 
20 | A rough outline of where in the network memory is allocated to parameters and features and where the greatest computational cost lies is shown below.  The x-axis does not show labels (it becomes hard to read for networks containing hundreds of layers) - it should be interpreted as depicting increasing depth from left to right.  The goal is simply to give some idea of the overall profile of the model: 
21 | 
22 | ![vgg-vd-16 profile](figs/vgg-vd-16.png)
23 | 


--------------------------------------------------------------------------------
/reports/vgg-vd-19.md:
--------------------------------------------------------------------------------
 1 | ### Report for vgg-vd-19
 2 | Model params 548 MB 
 3 | 
 4 | Estimates for a single full pass of model at input size 224 x 224: 
 5 | 
 6 | * Memory required for features: 63 MB 
 7 | * Flops: 20 GFLOPs 
 8 | 
 9 | Estimates are given below of the burden of computing the `pool5` features in the network for different input sizes using a batch size of 128: 
10 | 
11 | | input size | feature size | feature memory | flops | 
12 | |------------|--------------|----------------|-------| 
13 | | 112 x 112 | 4 x 4 x 512 | 2 GB | 626 GFLOPs |
14 | | 224 x 224 | 7 x 7 x 512 | 8 GB | 3 TFLOPs |
15 | | 336 x 336 | 11 x 11 x 512 | 18 GB | 6 TFLOPs |
16 | | 448 x 448 | 14 x 14 x 512 | 31 GB | 10 TFLOPs |
17 | | 560 x 560 | 18 x 18 x 512 | 49 GB | 16 TFLOPs |
18 | | 672 x 672 | 21 x 21 x 512 | 70 GB | 23 TFLOPs |
19 | 
20 | A rough outline of where in the network memory is allocated to parameters and features and where the greatest computational cost lies is shown below.  The x-axis does not show labels (it becomes hard to read for networks containing hundreds of layers) - it should be interpreted as depicting increasing depth from left to right.  The goal is simply to give some idea of the overall profile of the model: 
21 | 
22 | ![vgg-vd-19 profile](figs/vgg-vd-19.png)
23 | 


--------------------------------------------------------------------------------
/setup_convnet_burden.m:
--------------------------------------------------------------------------------
 1 | function setup_convnet_burden()
 2 | %SETUP_CONVNET_BURDEN Sets up convnet-burden, by adding its folders
 3 | % to the Matlab path
 4 | %
 5 | % Copyright (C) 2017 Samuel Albanie
 6 | % Licensed under The MIT License [see LICENSE.md for details]
 7 | 
 8 |   check_dependency('mcnExtraLayers') ;
 9 |   root = fileparts(mfilename('fullpath')) ;
10 |   addpath(root, [root '/matlab'], [root '/core']) ;
11 | 
12 | % -----------------------------------
13 | function check_dependency(moduleName)
14 | % -----------------------------------
15 | 
16 |   name2path = @(name) strrep(name, '-', '_') ;
17 |   setupFunc = ['setup_', name2path(moduleName)] ;
18 |   if exist(setupFunc, 'file')
19 |     vl_contrib('setup', moduleName) ;
20 |   else
21 |     % try adding the module to the path, supressing the warning
22 |     warning('off', 'MATLAB:dispatcher:pathWarning') ;
23 |     addpath(fullfile(vl_rootnn, 'contrib', moduleName)) ;
24 |     warning('on', 'MATLAB:dispatcher:pathWarning') ;
25 | 
26 |     if exist(setupFunc, 'file')
27 |       vl_contrib('setup', moduleName) ;
28 |     else
29 |       waiting = true ;
30 |       msg = ['module %s was not found on the MATLAB path. Would you like ' ...
31 |              'to install it now? (y/n)\n'] ;
32 |       prompt = sprintf(msg, moduleName) ;
33 |       while waiting
34 |         str = input(prompt,'s') ;
35 |         switch str
36 |           case 'y'
37 |             vl_contrib('install', moduleName) ;
38 |             vl_contrib('compile', moduleName) ;
39 |             vl_contrib('setup', moduleName) ;
40 |             return ;
41 |           case 'n'
42 |             throw(exception) ;
43 |           otherwise
44 |             fprintf('input %s not recognised, please use `y` or `n`\n', str) ;
45 |         end
46 |       end
47 |     end
48 |   end
49 | 
50 | 


--------------------------------------------------------------------------------