├── LICENSE.txt
├── README.md
├── demo.ipynb
└── pytorch_modelsize.py


/LICENSE.txt:
--------------------------------------------------------------------------------
 1 | Copyright 2017 Jacob C. Kimmel
 2 | 
 3 | MIT LICENSE
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
 6 | 
 7 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
 8 | 
 9 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
10 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # PyTorch Model Size Estimator
 2 | 
 3 | This tool estimates the size of a [PyTorch](https://pytorch.org) model in memory for a given input size.  
 4 | Estimating the size of a model in memory is useful when trying to determine an appropriate batch size, or when making architectural decisions.
 5 | 
 6 | **Note (1):** `SizeEstimator` is only valid for models where dimensionality changes are exclusively carried out by modules in `model.modules()`.
 7 | 
 8 | For example, use of `nn.Functional.max_pool2d` in the `forward()` method of a model prevents `SizeEstimator` from functioning properly. There is no direct means to access dimensionality changes carried out by arbitrary functions in the `forward()` method, such that tracking the size of inputs and gradients to be stored is non-trivial for such models.
 9 | 
10 | **Note (2):** The size estimates provided by this tool are theoretical estimates only, and the total memory used will vary depending on implementation details. PyTorch utilizes a few hundred MB of memory for CUDA initialization, and the use of cuDNN alters memory usage in a manner that is difficult to predict. See [this discussion on the PyTorch Forums](https://discuss.pytorch.org/t/gpu-memory-estimation-given-a-network/1713) for more detail.
11 | 
12 | [See this blog post](http://jacobkimmel.github.io/pytorch_estimating_model_size/) for an explanation of the size estimation logic.
13 | 
14 | ## Usage
15 | 
16 | To use the size estimator, simply import the `SizeEstimator` class, then provide a model and an input size for estimation.
17 | 
18 | ```python
19 | # Define a model
20 | import torch
21 | import torch.nn as nn
22 | from torch.autograd import Variable
23 | import numpy as np
24 | 
25 | class Model(nn.Module):
26 | 
27 |     def __init__(self):
28 |         super(Model,self).__init__()
29 | 
30 |         self.conv0 = nn.Conv2d(1, 16, kernel_size=3, padding=5)
31 |         self.conv1 = nn.Conv2d(16, 32, kernel_size=3)
32 | 
33 |     def forward(self, x):
34 |         h = self.conv0(x)
35 |         h = self.conv1(h)
36 |         return h
37 | 
38 | model = Model()
39 | 
40 | # Estimate Size
41 | from pytorch_modelsize import SizeEstimator
42 | 
43 | se = SizeEstimator(model, input_size=(16,1,256,256))
44 | print(se.estimate_size())
45 | 
46 | # Returns
47 | # (size in megabytes, size in bits)
48 | # (408.2833251953125, 3424928768)
49 | 
50 | print(se.param_bits) # bits taken up by parameters
51 | print(se.forward_backward_bits) # bits stored for forward and backward
52 | print(se.input_bits) # bits for input
53 | ```
54 | 
55 | ## Development
56 | 
57 | This tool is a product of the [Laboratory of Cell Geometry](https://cellgeometry.ucsf.edu/) at the [University of California, San Francisco](https://ucsf.edu).
58 | 


--------------------------------------------------------------------------------
/demo.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {
  7 |     "collapsed": true
  8 |    },
  9 |    "outputs": [],
 10 |    "source": [
 11 |     "import torch\n",
 12 |     "import torch.nn as nn\n",
 13 |     "from torch.autograd import Variable\n",
 14 |     "import numpy as np"
 15 |    ]
 16 |   },
 17 |   {
 18 |    "cell_type": "code",
 19 |    "execution_count": 2,
 20 |    "metadata": {
 21 |     "collapsed": true
 22 |    },
 23 |    "outputs": [],
 24 |    "source": [
 25 |     "class Model(nn.Module):\n",
 26 |     "    \n",
 27 |     "    def __init__(self):\n",
 28 |     "        super(Model,self).__init__()\n",
 29 |     "        \n",
 30 |     "        self.conv0 = nn.Conv2d(1, 16, kernel_size=3, padding=5)\n",
 31 |     "        self.conv1 = nn.Conv2d(16, 32, kernel_size=3)\n",
 32 |     "\n",
 33 |     "    def forward(self, x):\n",
 34 |     "        h = self.conv0(x)\n",
 35 |     "        h = self.conv1(h)\n",
 36 |     "        return h\n",
 37 |     "    \n",
 38 |     "model = Model()"
 39 |    ]
 40 |   },
 41 |   {
 42 |    "cell_type": "code",
 43 |    "execution_count": 3,
 44 |    "metadata": {
 45 |     "collapsed": true
 46 |    },
 47 |    "outputs": [],
 48 |    "source": [
 49 |     "import torch\n",
 50 |     "import torch.nn as nn\n",
 51 |     "from torch.autograd import Variable\n",
 52 |     "import numpy as np\n",
 53 |     "\n",
 54 |     "class SizeEstimator(object):\n",
 55 |     "\n",
 56 |     "    def __init__(self, model, input_size=(1,1,32,32), bits=32):\n",
 57 |     "        '''\n",
 58 |     "        Estimates the size of PyTorch models in memory\n",
 59 |     "        for a given input size\n",
 60 |     "        '''\n",
 61 |     "        self.model = model\n",
 62 |     "        self.input_size = input_size\n",
 63 |     "        self.bits = 32\n",
 64 |     "        return\n",
 65 |     "\n",
 66 |     "    def get_parameter_sizes(self):\n",
 67 |     "        '''Get sizes of all parameters in `model`'''\n",
 68 |     "        mods = list(model.modules())\n",
 69 |     "        for i in range(1,len(mods)):\n",
 70 |     "            m = mods[i]\n",
 71 |     "            p = list(m.parameters())\n",
 72 |     "            sizes = []\n",
 73 |     "            for j in range(len(p)):\n",
 74 |     "                sizes.append(np.array(p[j].size()))\n",
 75 |     "\n",
 76 |     "        self.param_sizes = sizes\n",
 77 |     "        return\n",
 78 |     "\n",
 79 |     "    def get_output_sizes(self):\n",
 80 |     "        '''Run sample input through each layer to get output sizes'''\n",
 81 |     "        input_ = Variable(torch.FloatTensor(*self.input_size), volatile=True)\n",
 82 |     "        mods = list(model.modules())\n",
 83 |     "        out_sizes = []\n",
 84 |     "        for i in range(1, len(mods)):\n",
 85 |     "            m = mods[i]\n",
 86 |     "            out = m(input_)\n",
 87 |     "            out_sizes.append(np.array(out.size()))\n",
 88 |     "            input_ = out\n",
 89 |     "\n",
 90 |     "        self.out_sizes = out_sizes\n",
 91 |     "        return\n",
 92 |     "\n",
 93 |     "    def calc_param_bits(self):\n",
 94 |     "        '''Calculate total number of bits to store `model` parameters'''\n",
 95 |     "        total_bits = 0\n",
 96 |     "        for i in range(len(self.param_sizes)):\n",
 97 |     "            s = self.param_sizes[i]\n",
 98 |     "            bits = np.prod(np.array(s))*self.bits\n",
 99 |     "            total_bits += bits\n",
100 |     "        self.param_bits = total_bits\n",
101 |     "        return\n",
102 |     "\n",
103 |     "    def calc_forward_backward_bits(self):\n",
104 |     "        '''Calculate bits to store forward and backward pass'''\n",
105 |     "        total_bits = 0\n",
106 |     "        for i in range(len(self.out_sizes)):\n",
107 |     "            s = self.out_sizes[i]\n",
108 |     "            bits = np.prod(np.array(s))*self.bits\n",
109 |     "            total_bits += bits\n",
110 |     "        # multiply by 2 for both forward AND backward\n",
111 |     "        self.forward_backward_bits = (total_bits*2)\n",
112 |     "        return\n",
113 |     "\n",
114 |     "    def calc_input_bits(self):\n",
115 |     "        '''Calculate bits to store input'''\n",
116 |     "        self.input_bits = np.prod(np.array(self.input_size))*self.bits\n",
117 |     "        return\n",
118 |     "\n",
119 |     "    def estimate_size(self):\n",
120 |     "        '''Estimate model size in memory in megabytes and bits'''\n",
121 |     "        self.get_parameter_sizes()\n",
122 |     "        self.get_output_sizes()\n",
123 |     "        self.calc_param_bits()\n",
124 |     "        self.calc_forward_backward_bits()\n",
125 |     "        self.calc_input_bits()\n",
126 |     "        total = self.param_bits + self.forward_backward_bits + self.input_bits\n",
127 |     "\n",
128 |     "        total_megabytes = (total/8)/(1024**2)\n",
129 |     "        return total_megabytes, total\n"
130 |    ]
131 |   },
132 |   {
133 |    "cell_type": "code",
134 |    "execution_count": 4,
135 |    "metadata": {
136 |     "scrolled": true
137 |    },
138 |    "outputs": [
139 |     {
140 |      "name": "stdout",
141 |      "output_type": "stream",
142 |      "text": [
143 |       "(0.5694580078125, 4776960)\n"
144 |      ]
145 |     }
146 |    ],
147 |    "source": [
148 |     "se = SizeEstimator(model)\n",
149 |     "print(se.estimate_size())"
150 |    ]
151 |   },
152 |   {
153 |    "cell_type": "code",
154 |    "execution_count": 5,
155 |    "metadata": {},
156 |    "outputs": [
157 |     {
158 |      "name": "stdout",
159 |      "output_type": "stream",
160 |      "text": [
161 |       "(408.2833251953125, 3424928768)\n"
162 |      ]
163 |     }
164 |    ],
165 |    "source": [
166 |     "se = SizeEstimator(model, input_size=(16,1,256,256))\n",
167 |     "print(se.estimate_size())"
168 |    ]
169 |   },
170 |   {
171 |    "cell_type": "code",
172 |    "execution_count": null,
173 |    "metadata": {
174 |     "collapsed": true
175 |    },
176 |    "outputs": [],
177 |    "source": []
178 |   }
179 |  ],
180 |  "metadata": {
181 |   "kernelspec": {
182 |    "display_name": "Python 3",
183 |    "language": "python",
184 |    "name": "python3"
185 |   },
186 |   "language_info": {
187 |    "codemirror_mode": {
188 |     "name": "ipython",
189 |     "version": 3
190 |    },
191 |    "file_extension": ".py",
192 |    "mimetype": "text/x-python",
193 |    "name": "python",
194 |    "nbconvert_exporter": "python",
195 |    "pygments_lexer": "ipython3",
196 |    "version": "3.6.2"
197 |   }
198 |  },
199 |  "nbformat": 4,
200 |  "nbformat_minor": 2
201 | }
202 | 


--------------------------------------------------------------------------------
/pytorch_modelsize.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | from torch.autograd import Variable
 4 | import numpy as np
 5 | 
 6 | class SizeEstimator(object):
 7 | 
 8 |     def __init__(self, model, input_size=(1,1,32,32), bits=32):
 9 |         '''
10 |         Estimates the size of PyTorch models in memory
11 |         for a given input size
12 |         '''
13 |         self.model = model
14 |         self.input_size = input_size
15 |         self.bits = bits
16 |         return
17 | 
18 |     def get_parameter_sizes(self):
19 |         '''Get sizes of all parameters in `model`'''
20 |         mods = list(self.model.modules())
21 |         sizes = []
22 |         
23 |         for i in range(1,len(mods)):
24 |             m = mods[i]
25 |             p = list(m.parameters())
26 |             for j in range(len(p)):
27 |                 sizes.append(np.array(p[j].size()))
28 | 
29 |         self.param_sizes = sizes
30 |         return
31 | 
32 |     def get_output_sizes(self):
33 |         '''Run sample input through each layer to get output sizes'''
34 |         input_ = Variable(torch.FloatTensor(*self.input_size), volatile=True)
35 |         mods = list(self.model.modules())
36 |         out_sizes = []
37 |         for i in range(1, len(mods)):
38 |             m = mods[i]
39 |             out = m(input_)
40 |             out_sizes.append(np.array(out.size()))
41 |             input_ = out
42 | 
43 |         self.out_sizes = out_sizes
44 |         return
45 | 
46 |     def calc_param_bits(self):
47 |         '''Calculate total number of bits to store `model` parameters'''
48 |         total_bits = 0
49 |         for i in range(len(self.param_sizes)):
50 |             s = self.param_sizes[i]
51 |             bits = np.prod(np.array(s))*self.bits
52 |             total_bits += bits
53 |         self.param_bits = total_bits
54 |         return
55 | 
56 |     def calc_forward_backward_bits(self):
57 |         '''Calculate bits to store forward and backward pass'''
58 |         total_bits = 0
59 |         for i in range(len(self.out_sizes)):
60 |             s = self.out_sizes[i]
61 |             bits = np.prod(np.array(s))*self.bits
62 |             total_bits += bits
63 |         # multiply by 2 for both forward AND backward
64 |         self.forward_backward_bits = (total_bits*2)
65 |         return
66 | 
67 |     def calc_input_bits(self):
68 |         '''Calculate bits to store input'''
69 |         self.input_bits = np.prod(np.array(self.input_size))*self.bits
70 |         return
71 | 
72 |     def estimate_size(self):
73 |         '''Estimate model size in memory in megabytes and bits'''
74 |         self.get_parameter_sizes()
75 |         self.get_output_sizes()
76 |         self.calc_param_bits()
77 |         self.calc_forward_backward_bits()
78 |         self.calc_input_bits()
79 |         total = self.param_bits + self.forward_backward_bits + self.input_bits
80 | 
81 |         total_megabytes = (total/8)/(1024**2)
82 |         return total_megabytes, total
83 | 


--------------------------------------------------------------------------------