├── LICENSE.txt ├── README.md ├── demo.ipynb └── pytorch_modelsize.py /LICENSE.txt: -------------------------------------------------------------------------------- 1 | Copyright 2017 Jacob C. Kimmel 2 | 3 | MIT LICENSE 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 6 | 7 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 8 | 9 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 10 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # PyTorch Model Size Estimator 2 | 3 | This tool estimates the size of a [PyTorch](https://pytorch.org) model in memory for a given input size. 4 | Estimating the size of a model in memory is useful when trying to determine an appropriate batch size, or when making architectural decisions. 5 | 6 | **Note (1):** `SizeEstimator` is only valid for models where dimensionality changes are exclusively carried out by modules in `model.modules()`. 7 | 8 | For example, use of `nn.Functional.max_pool2d` in the `forward()` method of a model prevents `SizeEstimator` from functioning properly. There is no direct means to access dimensionality changes carried out by arbitrary functions in the `forward()` method, such that tracking the size of inputs and gradients to be stored is non-trivial for such models. 9 | 10 | **Note (2):** The size estimates provided by this tool are theoretical estimates only, and the total memory used will vary depending on implementation details. PyTorch utilizes a few hundred MB of memory for CUDA initialization, and the use of cuDNN alters memory usage in a manner that is difficult to predict. See [this discussion on the PyTorch Forums](https://discuss.pytorch.org/t/gpu-memory-estimation-given-a-network/1713) for more detail. 11 | 12 | [See this blog post](http://jacobkimmel.github.io/pytorch_estimating_model_size/) for an explanation of the size estimation logic. 13 | 14 | ## Usage 15 | 16 | To use the size estimator, simply import the `SizeEstimator` class, then provide a model and an input size for estimation. 17 | 18 | ```python 19 | # Define a model 20 | import torch 21 | import torch.nn as nn 22 | from torch.autograd import Variable 23 | import numpy as np 24 | 25 | class Model(nn.Module): 26 | 27 | def __init__(self): 28 | super(Model,self).__init__() 29 | 30 | self.conv0 = nn.Conv2d(1, 16, kernel_size=3, padding=5) 31 | self.conv1 = nn.Conv2d(16, 32, kernel_size=3) 32 | 33 | def forward(self, x): 34 | h = self.conv0(x) 35 | h = self.conv1(h) 36 | return h 37 | 38 | model = Model() 39 | 40 | # Estimate Size 41 | from pytorch_modelsize import SizeEstimator 42 | 43 | se = SizeEstimator(model, input_size=(16,1,256,256)) 44 | print(se.estimate_size()) 45 | 46 | # Returns 47 | # (size in megabytes, size in bits) 48 | # (408.2833251953125, 3424928768) 49 | 50 | print(se.param_bits) # bits taken up by parameters 51 | print(se.forward_backward_bits) # bits stored for forward and backward 52 | print(se.input_bits) # bits for input 53 | ``` 54 | 55 | ## Development 56 | 57 | This tool is a product of the [Laboratory of Cell Geometry](https://cellgeometry.ucsf.edu/) at the [University of California, San Francisco](https://ucsf.edu). 58 | -------------------------------------------------------------------------------- /demo.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": { 7 | "collapsed": true 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "import torch\n", 12 | "import torch.nn as nn\n", 13 | "from torch.autograd import Variable\n", 14 | "import numpy as np" 15 | ] 16 | }, 17 | { 18 | "cell_type": "code", 19 | "execution_count": 2, 20 | "metadata": { 21 | "collapsed": true 22 | }, 23 | "outputs": [], 24 | "source": [ 25 | "class Model(nn.Module):\n", 26 | " \n", 27 | " def __init__(self):\n", 28 | " super(Model,self).__init__()\n", 29 | " \n", 30 | " self.conv0 = nn.Conv2d(1, 16, kernel_size=3, padding=5)\n", 31 | " self.conv1 = nn.Conv2d(16, 32, kernel_size=3)\n", 32 | "\n", 33 | " def forward(self, x):\n", 34 | " h = self.conv0(x)\n", 35 | " h = self.conv1(h)\n", 36 | " return h\n", 37 | " \n", 38 | "model = Model()" 39 | ] 40 | }, 41 | { 42 | "cell_type": "code", 43 | "execution_count": 3, 44 | "metadata": { 45 | "collapsed": true 46 | }, 47 | "outputs": [], 48 | "source": [ 49 | "import torch\n", 50 | "import torch.nn as nn\n", 51 | "from torch.autograd import Variable\n", 52 | "import numpy as np\n", 53 | "\n", 54 | "class SizeEstimator(object):\n", 55 | "\n", 56 | " def __init__(self, model, input_size=(1,1,32,32), bits=32):\n", 57 | " '''\n", 58 | " Estimates the size of PyTorch models in memory\n", 59 | " for a given input size\n", 60 | " '''\n", 61 | " self.model = model\n", 62 | " self.input_size = input_size\n", 63 | " self.bits = 32\n", 64 | " return\n", 65 | "\n", 66 | " def get_parameter_sizes(self):\n", 67 | " '''Get sizes of all parameters in `model`'''\n", 68 | " mods = list(model.modules())\n", 69 | " for i in range(1,len(mods)):\n", 70 | " m = mods[i]\n", 71 | " p = list(m.parameters())\n", 72 | " sizes = []\n", 73 | " for j in range(len(p)):\n", 74 | " sizes.append(np.array(p[j].size()))\n", 75 | "\n", 76 | " self.param_sizes = sizes\n", 77 | " return\n", 78 | "\n", 79 | " def get_output_sizes(self):\n", 80 | " '''Run sample input through each layer to get output sizes'''\n", 81 | " input_ = Variable(torch.FloatTensor(*self.input_size), volatile=True)\n", 82 | " mods = list(model.modules())\n", 83 | " out_sizes = []\n", 84 | " for i in range(1, len(mods)):\n", 85 | " m = mods[i]\n", 86 | " out = m(input_)\n", 87 | " out_sizes.append(np.array(out.size()))\n", 88 | " input_ = out\n", 89 | "\n", 90 | " self.out_sizes = out_sizes\n", 91 | " return\n", 92 | "\n", 93 | " def calc_param_bits(self):\n", 94 | " '''Calculate total number of bits to store `model` parameters'''\n", 95 | " total_bits = 0\n", 96 | " for i in range(len(self.param_sizes)):\n", 97 | " s = self.param_sizes[i]\n", 98 | " bits = np.prod(np.array(s))*self.bits\n", 99 | " total_bits += bits\n", 100 | " self.param_bits = total_bits\n", 101 | " return\n", 102 | "\n", 103 | " def calc_forward_backward_bits(self):\n", 104 | " '''Calculate bits to store forward and backward pass'''\n", 105 | " total_bits = 0\n", 106 | " for i in range(len(self.out_sizes)):\n", 107 | " s = self.out_sizes[i]\n", 108 | " bits = np.prod(np.array(s))*self.bits\n", 109 | " total_bits += bits\n", 110 | " # multiply by 2 for both forward AND backward\n", 111 | " self.forward_backward_bits = (total_bits*2)\n", 112 | " return\n", 113 | "\n", 114 | " def calc_input_bits(self):\n", 115 | " '''Calculate bits to store input'''\n", 116 | " self.input_bits = np.prod(np.array(self.input_size))*self.bits\n", 117 | " return\n", 118 | "\n", 119 | " def estimate_size(self):\n", 120 | " '''Estimate model size in memory in megabytes and bits'''\n", 121 | " self.get_parameter_sizes()\n", 122 | " self.get_output_sizes()\n", 123 | " self.calc_param_bits()\n", 124 | " self.calc_forward_backward_bits()\n", 125 | " self.calc_input_bits()\n", 126 | " total = self.param_bits + self.forward_backward_bits + self.input_bits\n", 127 | "\n", 128 | " total_megabytes = (total/8)/(1024**2)\n", 129 | " return total_megabytes, total\n" 130 | ] 131 | }, 132 | { 133 | "cell_type": "code", 134 | "execution_count": 4, 135 | "metadata": { 136 | "scrolled": true 137 | }, 138 | "outputs": [ 139 | { 140 | "name": "stdout", 141 | "output_type": "stream", 142 | "text": [ 143 | "(0.5694580078125, 4776960)\n" 144 | ] 145 | } 146 | ], 147 | "source": [ 148 | "se = SizeEstimator(model)\n", 149 | "print(se.estimate_size())" 150 | ] 151 | }, 152 | { 153 | "cell_type": "code", 154 | "execution_count": 5, 155 | "metadata": {}, 156 | "outputs": [ 157 | { 158 | "name": "stdout", 159 | "output_type": "stream", 160 | "text": [ 161 | "(408.2833251953125, 3424928768)\n" 162 | ] 163 | } 164 | ], 165 | "source": [ 166 | "se = SizeEstimator(model, input_size=(16,1,256,256))\n", 167 | "print(se.estimate_size())" 168 | ] 169 | }, 170 | { 171 | "cell_type": "code", 172 | "execution_count": null, 173 | "metadata": { 174 | "collapsed": true 175 | }, 176 | "outputs": [], 177 | "source": [] 178 | } 179 | ], 180 | "metadata": { 181 | "kernelspec": { 182 | "display_name": "Python 3", 183 | "language": "python", 184 | "name": "python3" 185 | }, 186 | "language_info": { 187 | "codemirror_mode": { 188 | "name": "ipython", 189 | "version": 3 190 | }, 191 | "file_extension": ".py", 192 | "mimetype": "text/x-python", 193 | "name": "python", 194 | "nbconvert_exporter": "python", 195 | "pygments_lexer": "ipython3", 196 | "version": "3.6.2" 197 | } 198 | }, 199 | "nbformat": 4, 200 | "nbformat_minor": 2 201 | } 202 | -------------------------------------------------------------------------------- /pytorch_modelsize.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | from torch.autograd import Variable 4 | import numpy as np 5 | 6 | class SizeEstimator(object): 7 | 8 | def __init__(self, model, input_size=(1,1,32,32), bits=32): 9 | ''' 10 | Estimates the size of PyTorch models in memory 11 | for a given input size 12 | ''' 13 | self.model = model 14 | self.input_size = input_size 15 | self.bits = bits 16 | return 17 | 18 | def get_parameter_sizes(self): 19 | '''Get sizes of all parameters in `model`''' 20 | mods = list(self.model.modules()) 21 | sizes = [] 22 | 23 | for i in range(1,len(mods)): 24 | m = mods[i] 25 | p = list(m.parameters()) 26 | for j in range(len(p)): 27 | sizes.append(np.array(p[j].size())) 28 | 29 | self.param_sizes = sizes 30 | return 31 | 32 | def get_output_sizes(self): 33 | '''Run sample input through each layer to get output sizes''' 34 | input_ = Variable(torch.FloatTensor(*self.input_size), volatile=True) 35 | mods = list(self.model.modules()) 36 | out_sizes = [] 37 | for i in range(1, len(mods)): 38 | m = mods[i] 39 | out = m(input_) 40 | out_sizes.append(np.array(out.size())) 41 | input_ = out 42 | 43 | self.out_sizes = out_sizes 44 | return 45 | 46 | def calc_param_bits(self): 47 | '''Calculate total number of bits to store `model` parameters''' 48 | total_bits = 0 49 | for i in range(len(self.param_sizes)): 50 | s = self.param_sizes[i] 51 | bits = np.prod(np.array(s))*self.bits 52 | total_bits += bits 53 | self.param_bits = total_bits 54 | return 55 | 56 | def calc_forward_backward_bits(self): 57 | '''Calculate bits to store forward and backward pass''' 58 | total_bits = 0 59 | for i in range(len(self.out_sizes)): 60 | s = self.out_sizes[i] 61 | bits = np.prod(np.array(s))*self.bits 62 | total_bits += bits 63 | # multiply by 2 for both forward AND backward 64 | self.forward_backward_bits = (total_bits*2) 65 | return 66 | 67 | def calc_input_bits(self): 68 | '''Calculate bits to store input''' 69 | self.input_bits = np.prod(np.array(self.input_size))*self.bits 70 | return 71 | 72 | def estimate_size(self): 73 | '''Estimate model size in memory in megabytes and bits''' 74 | self.get_parameter_sizes() 75 | self.get_output_sizes() 76 | self.calc_param_bits() 77 | self.calc_forward_backward_bits() 78 | self.calc_input_bits() 79 | total = self.param_bits + self.forward_backward_bits + self.input_bits 80 | 81 | total_megabytes = (total/8)/(1024**2) 82 | return total_megabytes, total 83 | --------------------------------------------------------------------------------