├── .gitignore
├── LICENSE
├── README.md
├── nnprof
    ├── __init__.py
    ├── info_table.py
    ├── profiler.py
    └── utils.py
├── setup.py
└── tests
    └── test_nnprofile.py


/.gitignore:
--------------------------------------------------------------------------------
 1 | # useless data type
 2 | *.jpg
 3 | *.png
 4 | *.txt
 5 | 
 6 | # compilation and distribution
 7 | __pycache__
 8 | _ext
 9 | *.pyc
10 | *.so
11 | *.egg-info/
12 | build/
13 | dist/
14 | wheels/
15 | 
16 | # pytorch/python/numpy formats
17 | *.pth
18 | *.pkl
19 | *.npy
20 | 
21 | # ipython/jupyter notebooks
22 | *.ipynb
23 | **/.ipynb_checkpoints/
24 | 
25 | # Editor temporaries
26 | *.swn
27 | *.swo
28 | *.swp
29 | *~
30 | 
31 | # Pycharm editor settings
32 | .idea
33 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "[]"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright [yyyy] [name of copyright owner]
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.
202 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # nnprof 
 2 | 
 3 | ## Introduction
 4 | 
 5 | nnprof is a profile tool for pytorch neural networks. 
 6 | 
 7 | ### Features
 8 | 
 9 | * **multi profile mode**: nnprof support 4 profile mode: Layer level, Operation level, Mixed level, Layer Tree level. Please check below for detail usage. 
10 | * **time and memory profile**: nnprof support both time and memory profile now. But since memory profile is first supported in pytorch 1.6, please use torch version >= 1.6 for memory profile.
11 | * **support sorted by given key and show profile percent**: user could print table with percentage and sorted profile info using a given key,  which is really helpful for optimiziing neural network.
12 | 
13 | ## Requirements
14 | 
15 | * Python >= 3.6
16 | * PyTorch
17 | * Numpy
18 | 
19 | ## Get Started
20 | 
21 | ### install nnprof
22 | * pip install: 
23 | ```shell
24 | pip install nnprof
25 | ```
26 | * from source: 
27 | ```shell
28 | python -m pip install 'git+https://github.com/FateScript/nnprof.git'
29 | 
30 | # or install after clone this repo
31 | git clone https://github.com/FateScript/nnprof.git
32 | pip install -e nnprof
33 | ```
34 | 
35 | ### use nnprf
36 | 
37 | ```python3
38 | from nnprof import profile, ProfileMode
39 | import torch
40 | import torchvision
41 | 
42 | model = torchvision.models.alexnet(pretrained=False)
43 | x = torch.rand([1, 3, 224, 224])
44 | 
45 | # mode could be anyone in LAYER, OP, MIXED, LAYER_TREE
46 | mode = ProfileMode.LAYER
47 | 
48 | with profile(model, mode=mode) as prof:
49 |     y = model(x)
50 | 
51 | print(prof.table(average=False, sorted_by="cpu_time"))
52 | # table could be sorted by presented header.
53 | ```
54 | 
55 | Part of presented table looks like table below, Note that they are sorted by cpu_time.
56 | ```
57 | ╒══════════════════════╤═══════════════════╤═══════════════════╤════════╕
58 | │ name                 │ self_cpu_time     │ cpu_time          │   hits │
59 | ╞══════════════════════╪═══════════════════╪═══════════════════╪════════╡
60 | │ AlexNet.features.0   │ 19.114ms (34.77%) │ 76.383ms (45.65%) │      1 │
61 | ├──────────────────────┼───────────────────┼───────────────────┼────────┤
62 | │ AlexNet.features.3   │ 5.148ms (9.37%)   │ 20.576ms (12.30%) │      1 │
63 | ├──────────────────────┼───────────────────┼───────────────────┼────────┤
64 | │ AlexNet.features.8   │ 4.839ms (8.80%)   │ 19.336ms (11.56%) │      1 │
65 | ├──────────────────────┼───────────────────┼───────────────────┼────────┤
66 | │ AlexNet.features.6   │ 4.162ms (7.57%)   │ 16.632ms (9.94%)  │      1 │
67 | ├──────────────────────┼───────────────────┼───────────────────┼────────┤
68 | │ AlexNet.features.10  │ 2.705ms (4.92%)   │ 10.713ms (6.40%)  │      1 │
69 | ├──────────────────────┼───────────────────┼───────────────────┼────────┤
70 | ```
71 | 
72 | You are welcomed to try diffierent profile mode and more table format.
73 | 
74 | ## Contribution
75 | 
76 | Any issues and pull requests are welcomed.
77 | 
78 | ## Acknowledgement
79 | 
80 | Some thoughts of nnprof are inspired by  [torchprof](https://github.com/awwong1/torchprof) and [torch.autograd.profile](https://github.com/pytorch/pytorch/blob/749f8b78508c43f9e6331f2395a4202785068442/torch/autograd/profiler.py) .
81 | Many thanks to the authors.
82 | 


--------------------------------------------------------------------------------
/nnprof/__init__.py:
--------------------------------------------------------------------------------
1 | from .profiler import ProfileMode, profile
2 | from .info_table import InfoTable, TreeTable
3 | 


--------------------------------------------------------------------------------
/nnprof/info_table.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding:utf-8 -*-
  3 | 
  4 | import numpy as np
  5 | import tabulate
  6 | 
  7 | from torch.autograd.profiler import format_time
  8 | try:
  9 |     from torch.autograd.profiler import format_memory
 10 | except ImportError:
 11 |     from .utils import format_memory
 12 | 
 13 | 
 14 | class InfoTable:
 15 | 
 16 |     def __init__(self, headers, data, with_percent=False):
 17 |         """
 18 |         Args:
 19 |             header (Iterable[string]): header of info table.
 20 |             data (Iterable[numpy.array]): data of table.
 21 |             with_percent (bool): whether data presented with percent data or not.
 22 |         """
 23 |         assert len(headers) == len(data), "length of headers and data are not matched"
 24 |         self.headers = headers
 25 |         self.info = {key: value for key, value in zip(headers, data)}
 26 |         self.with_percent = with_percent
 27 | 
 28 |     def insert(self, header, data, position=-1):
 29 |         """
 30 |         insert header and data into current table at given position
 31 | 
 32 |         Args:
 33 |             header (Iterable[string]): inserted data.
 34 |             data (Iterable[numpy.array]): inserted data.
 35 |             position (int): insert position, the same usage like list indexing.
 36 |         """
 37 | 
 38 |         def swap(a, b):
 39 |             a, b = b, a
 40 | 
 41 |         self.info[header] = data
 42 |         if header in self.headers:
 43 |             index = self.headers.index(header)
 44 |             swap(self.headers[index], self.headers[position])
 45 |         else:
 46 |             self.headers.insert(position, header)
 47 | 
 48 |     def sorted_by(self, keyname=None, descending=True):
 49 |         """
 50 |         use keyname to sort table.
 51 | 
 52 |         Args:
 53 |             keyname (string): sorted header name.
 54 |             descending (bool): whether sorted in descending order or not.
 55 |         """
 56 |         if keyname is None:
 57 |             return self
 58 |         if keyname not in self.info:
 59 |             keyname += "_avg"
 60 |         assert keyname in self.info
 61 |         sort_index = np.argsort(self.info[keyname], axis=0).reshape(-1)
 62 |         if descending:
 63 |             sort_index = sort_index[::-1]
 64 |         for header in self.headers:
 65 |             self.info[header] = self.info[header][sort_index]
 66 | 
 67 |         return self
 68 | 
 69 |     def filter(self, filter_list=None):
 70 |         """
 71 |         filter header and data in filter list.
 72 | 
 73 |         Args:
 74 |             filter_list (Iterable[string]): list of headers that needs to be filtered out
 75 |         """
 76 |         self.headers = [header for header in self.headers if header not in filter_list]
 77 | 
 78 |     def filter_zeros(self):
 79 |         """filter all zeros data."""
 80 |         filter_list = []
 81 |         for header in self.headers:
 82 |             data = self.info[header]
 83 |             if "float" in data.dtype.name or "int" in data.dtype.name:
 84 |                 if data.sum() == 0:
 85 |                     filter_list.append(header)
 86 |         self.filter(filter_list)
 87 | 
 88 |     def average(self, average_key="hits"):
 89 |         """
 90 |         average table with average key
 91 | 
 92 |         Args:
 93 |             average_key:
 94 |         """
 95 |         hits = self.info[average_key]
 96 |         for i, header in enumerate(self.headers):
 97 |             if header.endswith("time") or header.endswith("mem"):
 98 |                 self.info[header + "_avg"] = self.info[header] / hits
 99 |                 self.headers[i] += "_avg"
100 |                 del self.info[header]
101 | 
102 |     def row_limit(self, limit=None):
103 |         """
104 |         return table in row limit format.
105 | 
106 |         Args:
107 |             limit (int): row limit number of table, None means no limit.
108 |         """
109 |         if limit is None:
110 |             return self
111 |         else:
112 |             data = [self.info[x][:limit] for x in self.headers]
113 |             return InfoTable(headers=self.headers, data=data)
114 | 
115 |     def __str__(self):
116 |         self.filter_zeros()
117 |         time_formatter = np.vectorize(format_time)
118 |         mem_formatter = np.vectorize(format_memory)
119 |         percent_formatter = np.vectorize(lambda x: " ({:.2%})".format(x))
120 | 
121 |         fmt_data = []
122 |         for header in self.headers:
123 |             data = self.info[header]
124 |             if "time" in header:
125 |                 time_array = time_formatter(data)
126 |                 if self.with_percent:
127 |                     percent = percent_formatter(data / data.sum())
128 |                     time_array = np.core.defchararray.add(time_array, percent)
129 |                 fmt_data.append(time_array)
130 |             elif "mem" in header:
131 |                 mem_array = mem_formatter(data)
132 |                 if self.with_percent:
133 |                     percent = percent_formatter(data / data.sum())
134 |                     mem_array = np.core.defchararray.add(mem_array, percent)
135 |                 fmt_data.append(mem_array)
136 |             else:
137 |                 fmt_data.append(data)
138 | 
139 |         concat_data = np.concatenate(fmt_data, axis=1)
140 |         table = tabulate.tabulate(concat_data, headers=self.headers, tablefmt="fancy_grid")
141 |         return table
142 | 
143 | 
144 | class TreeTable(InfoTable):
145 | 
146 |     def __init__(self, headers, data, with_percent=False, max_depth=3):
147 |         super().__init__(headers, data, with_percent)
148 |         self.max_depth = max_depth
149 | 
150 |     def __str__(self):
151 |         self.filter_zeros()
152 |         time_formatter = np.vectorize(format_time)
153 |         mem_formatter = np.vectorize(format_memory)
154 |         percent_formatter = np.vectorize(lambda x: " ({:.2%})".format(x))
155 | 
156 |         fmt_data = []
157 |         for header in self.headers:
158 |             data = self.info[header]
159 |             if "time" in header:
160 |                 time_array = time_formatter(data)
161 |                 if self.with_percent:
162 |                     percent = percent_formatter(data / data[0])  # data[0] is the sum value
163 |                     time_array = np.core.defchararray.add(time_array, percent)
164 |                 fmt_data.append(time_array)
165 |             elif "mem" in header:
166 |                 mem_array = mem_formatter(data)
167 |                 if self.with_percent:
168 |                     percent = percent_formatter(data / data[0])  # sum value, ditto
169 |                     mem_array = np.core.defchararray.add(mem_array, percent)
170 |                 fmt_data.append(mem_array)
171 |             else:
172 |                 fmt_data.append(data)
173 | 
174 |         concat_data = np.concatenate(fmt_data, axis=1)
175 | 
176 |         # white space should be kept under profile
177 |         old_ws = tabulate.PRESERVE_WHITESPACE
178 |         tabulate.PRESERVE_WHITESPACE = True
179 |         table = tabulate.tabulate(concat_data, headers=self.headers, tablefmt="fancy_grid")
180 |         tabulate.PRESERVE_WHITESPACE = old_ws
181 |         return table
182 | 


--------------------------------------------------------------------------------
/nnprof/profiler.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding:utf-8 -*-
  3 | 
  4 | import functools
  5 | import itertools
  6 | 
  7 | from collections import OrderedDict, defaultdict, namedtuple
  8 | from enum import IntEnum, unique
  9 | 
 10 | import numpy as np
 11 | 
 12 | import torch
 13 | import torch.autograd.profiler as tprofiler
 14 | 
 15 | from .info_table import InfoTable, TreeTable
 16 | from .utils import leaf_modules_generator
 17 | 
 18 | 
 19 | # profile info in table
 20 | COMMON_STAT = [
 21 |     "self_cpu_time", "cpu_time", "self_cuda_time", "cuda_time",
 22 |     "self_cpu_mem", "cpu_mem", "self_cuda_mem", "cuda_mem", "hits"
 23 | ]
 24 | STAT_TYPE = ["float32"] * (len(COMMON_STAT) - 1) + ["int32"]  # "hits" is int32 type
 25 | LEADING_KEY = "  "   # leading string of module name LAYER_TREE profile mode
 26 | 
 27 | ProfileInfo = namedtuple("ProfileInfo", COMMON_STAT)
 28 | 
 29 | 
 30 | @unique
 31 | class ProfileMode(IntEnum):
 32 | 
 33 |     def __new__(cls, value, doc=None):
 34 |         self = int.__new__(cls, value)
 35 |         self._value_ = value
 36 |         if doc is not None:
 37 |             self.__doc__ = doc
 38 |         return self
 39 | 
 40 |     LAYER = 1, "Layer by layer profile"
 41 |     OP = 2, "Operator level profile"
 42 |     MIXED = 3, "Operator of layer level profile"
 43 |     LAYER_TREE = 4, "Layer level profile, presented in tree format"
 44 | 
 45 | 
 46 | class profile:
 47 |     """
 48 |     Profile tool for Pytorch models, using torch.autograd.profile inside.
 49 |     """
 50 | 
 51 |     def __init__(
 52 |         self,
 53 |         module,
 54 |         enabled=True,
 55 |         use_cuda=False,
 56 |         paths=None,
 57 |         profile_memory=False,
 58 |         mode=ProfileMode.LAYER
 59 |     ):
 60 |         """
 61 |         Args:
 62 |             module (nn.Module): torch module to profile.
 63 |             enabled (bool): whether enable profile or not.
 64 |             use_cuda (bool): whether use cuda profiler or not.
 65 |             paths (Iterable[string]): profile paths for hooks. For example, if you want see
 66 |                 profile info of modeule named classifier, use path=("classifier")
 67 |             profile_memory (bool): whether profile memory or not, require torch >= 1.6
 68 |             mode (IntEnum): ProfileMode enum.
 69 |         """
 70 |         self._module = module
 71 |         self.enabled = enabled
 72 |         self.use_cuda = use_cuda
 73 |         self.paths = paths
 74 | 
 75 |         if profile_memory:
 76 |             torch_ver = [int(x) for x in torch.__version__.split(".", maxsplit=2)[:2]]
 77 |             assert torch_ver >= [1, 6], "profile_memory = True requires torch 1.6+"
 78 |         self.profile_memory = profile_memory
 79 |         assert mode in ProfileMode, "Profile mode {} not found".format(mode)
 80 |         self.mode = mode
 81 | 
 82 |         self.entered = False
 83 |         self.exited = False
 84 |         self.traces = ()
 85 |         self.profile_events = defaultdict(list)
 86 | 
 87 |     def __enter__(self):
 88 |         if not self.enabled:
 89 |             return self
 90 |         if self.entered:
 91 |             raise RuntimeError("profile is not reentrant")
 92 |         self.entered = True
 93 |         self._forwards = {}  # store the original forward functions
 94 |         self.traces = tuple(map(self._add_profile_hook, leaf_modules_generator(self._module)))
 95 |         return self
 96 | 
 97 |     def __exit__(self, exc_type, exc_val, exc_tb):
 98 |         if not self.enabled:
 99 |             return
100 |         tuple(map(self._remove_profile_hook, self.traces))
101 |         del self._forwards  # remove unnecessary forwards
102 |         self.exited = True
103 | 
104 |     def _traces_to_table(self, **kwargs):
105 |         """
106 |         human readable output of the profiler traces and events.
107 |         """
108 |         tree_dict = build_info_tree(self.traces, self.profile_events, self.mode)
109 | 
110 |         if self.mode == ProfileMode.LAYER_TREE:
111 |             max_depth = kwargs.pop("max_depth", 3)
112 |             tree_dict = format_to_module_tree(tree_dict, max_depth)
113 |             headers, data = generate_header_and_data(tree_dict)
114 |             table = TreeTable(headers, data, max_depth=max_depth)
115 |         else:
116 |             headers, data = generate_header_and_data(tree_dict)
117 |             table = InfoTable(headers, data)
118 | 
119 |         return table
120 | 
121 |     def table(
122 |         self, sorted_by="cpu_time", row_limit=None, average=False, with_percent=True, **kwargs
123 |     ):
124 |         """
125 |         return profile info in table format
126 | 
127 |         Args:
128 |             sorted_by (string): which data the table is sorted by. Default sorted by cpu time
129 |             row_limit (int): row limit number of table, None means no limit.
130 |             average (bool): whether average profile data by hits or not.
131 |             with_percent (bool): whether profile data presented with percent data or not.
132 |             kwargs:
133 |                 max_depth (int): depth of tree if using LAYER_TREE mode
134 |         """
135 |         if not self.exited:
136 |             return "<unfinished profile>"
137 |         else:
138 |             table = self._traces_to_table(**kwargs)
139 |             if not self.use_cuda:
140 |                 table.filter([x for x in COMMON_STAT if "cuda" in x])
141 | 
142 |             if self.mode != ProfileMode.LAYER_TREE:
143 |                 # sorted/row limit/avarage not supported for TreeInfo type
144 |                 table = table.sorted_by(sorted_by).row_limit(row_limit)
145 | 
146 |                 if average:
147 |                     table.average()
148 | 
149 |             table.with_percent = with_percent
150 |             return table
151 | 
152 |     def _add_profile_hook(self, leaf_module):
153 | 
154 |         def cond(path, paths):
155 |             return sum([key in path for key in paths])
156 | 
157 |         path, module = leaf_module
158 |         if (self.paths is not None and cond(path, self.paths)) or (self.paths is None):
159 |             _forward = module.forward
160 |             self._forwards[path] = _forward
161 | 
162 |             @functools.wraps(_forward)
163 |             def wrap_forward(*args, **kwargs):
164 |                 with tprofiler.profile(
165 |                     use_cuda=self.use_cuda, profile_memory=self.profile_memory,
166 |                 ) as prof:
167 |                     res = _forward(*args, **kwargs)
168 |                 event_list = prof.function_events
169 |                 event_list.populate_cpu_children()
170 |                 # each profile call should be contained in its own list
171 |                 self.profile_events[path].append(event_list)
172 |                 return res
173 | 
174 |             module.forward = wrap_forward
175 | 
176 |         return leaf_module
177 | 
178 |     def _remove_profile_hook(self, trace):
179 |         path, module = trace
180 |         if (self.paths is not None and path in self.paths) or (self.paths is None):
181 |             module.forward = self._forwards[path]
182 | 
183 | 
184 | def generate_header_and_data(tree_dict):
185 |     headers = ["name"] + COMMON_STAT
186 | 
187 |     format_lines = [
188 |         (
189 |             name,
190 |             info.self_cpu_time,
191 |             info.cpu_time,
192 |             info.self_cuda_time,
193 |             info.cuda_time,
194 |             info.self_cpu_mem,
195 |             info.cpu_mem,
196 |             info.self_cuda_mem,
197 |             info.cuda_mem,
198 |             info.hits,
199 |         ) for name, info in tree_dict.items()
200 |     ]
201 |     data = np.array(format_lines)
202 |     data = np.hsplit(data, len(headers))
203 |     data[1:] = [x.astype(dtype) for x, dtype in zip(data[1:], STAT_TYPE)]
204 |     return headers, data
205 | 
206 | 
207 | def format_to_module_tree(profile_dict, max_depth=3):
208 | 
209 |     def merge_info(origin_info, update_info):
210 |         sum_result = tuple(a + b for a, b in zip(origin_info[:-1], update_info[:-1]))
211 |         hits = (max(origin_info[-1], update_info[-1]), )
212 |         return sum_result + hits
213 | 
214 |     tree_format_dict = OrderedDict()
215 |     for key, info in profile_dict.items():
216 |         path = [k for i, k in enumerate(key.split(".", maxsplit=max_depth))]
217 |         path = list(itertools.accumulate(path, lambda x, y: x + "." + y))[:max_depth]
218 |         path_with_whitespace = [LEADING_KEY * i + k for i, k in enumerate(path)]
219 | 
220 |         for p in path_with_whitespace:
221 |             if p in tree_format_dict:
222 |                 tree_format_dict[p] = ProfileInfo(*merge_info(info, tree_format_dict[p]))
223 |             else:
224 |                 tree_format_dict[p] = info
225 | 
226 |     return tree_format_dict
227 | 
228 | 
229 | def get_profile_info(events, path_events):
230 |     if "self_cpu_memory_usage" in dir(events[0]):
231 |         self_cpu_mem = sum([e.self_cpu_memory_usage for e in events])
232 |         cpu_mem = sum([e.cpu_memory_usage for e in events])
233 |         self_cuda_mem = sum([e.self_cuda_memory_usage for e in events])
234 |         cuda_mem = sum([e.cuda_memory_usage for e in events])
235 |     else:
236 |         self_cpu_mem, cpu_mem, self_cuda_mem, cuda_mem = 0., 0., 0., 0.
237 | 
238 |     info = ProfileInfo(
239 |         # TIME
240 |         sum([e.self_cpu_time_total for e in events]),
241 |         sum([e.cpu_time_total for e in events]),
242 |         sum([e.self_cuda_time_total for e in events]),
243 |         sum([e.cuda_time_total for e in events]),
244 |         # Memory
245 |         self_cpu_mem,
246 |         cpu_mem,
247 |         self_cuda_mem,
248 |         cuda_mem,
249 |         # Hits
250 |         len(path_events)
251 |     )
252 |     return info
253 | 
254 | 
255 | def build_info_tree(traces, trace_events, mode=ProfileMode.LAYER):
256 |     """
257 |     build profile dict according to profile mode.
258 |     """
259 |     assert mode in ProfileMode, "ProfileMode {} not found".format(mode)
260 |     tree = OrderedDict()
261 | 
262 |     for trace in traces:
263 |         path, module = trace
264 |         # unwrap all of the events, in case model is called multiple times
265 |         events = [te for tevents in trace_events[path] for te in tevents]
266 |         if mode == ProfileMode.LAYER or mode == ProfileMode.LAYER_TREE:
267 |             tree[path] = get_profile_info(events, trace_events[path])
268 |         elif mode == ProfileMode.OP or mode == ProfileMode.MIXED:
269 |             for op in set(event.name for event in events):
270 |                 op_events = [e for e in events if e.name == op]
271 |                 stat = get_profile_info(op_events, op_events)
272 |                 if mode == ProfileMode.MIXED:
273 |                     tree[path + "." + op] = stat
274 |                 else:  # operator mode
275 |                     if op not in tree:  # init op in tree
276 |                         tree[op] = stat
277 |                     else:
278 |                         # add Op level profile info to original value.
279 |                         tree[op] = ProfileInfo(*(a + b for a, b in zip(tree[op], stat)))
280 | 
281 |     return tree
282 | 


--------------------------------------------------------------------------------
/nnprof/utils.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding:utf-8 -*-
 3 | 
 4 | from collections import namedtuple
 5 | 
 6 | LeafModule = namedtuple("LeafModule", ["path", "module"])
 7 | 
 8 | 
 9 | def leaf_modules_generator(module, name=None, path=None):
10 |     """
11 |     Generate all leaf modules of an given pytorch module
12 | 
13 |     Args:
14 |         module (nn.Module): a pytorch nn.Module
15 |         name (string): name of pytorch module
16 |         path (Tuple[string]): path to pytorch module
17 | 
18 |     Return:
19 |         Generator contains LeafModule.
20 |     """
21 |     if path is None:
22 |         path = ()
23 | 
24 |     if not name:
25 |         name = module.__class__.__name__
26 |     named_children = list(module.named_children())
27 |     path = path + (name,)
28 |     if len(named_children) == 0:
29 |         yield LeafModule(".".join(path), module)
30 |     # recursively walk into all submodules
31 |     for name, child_module in named_children:
32 |         yield from leaf_modules_generator(child_module, name=name, path=path)
33 | 
34 | 
35 | def format_memory(nbytes):
36 |     """Returns a formatted memory size string"""
37 |     KB = 1024
38 |     MB = 1024 * KB
39 |     GB = 1024 * MB
40 |     if (abs(nbytes) >= GB):
41 |         return '{:.2f} Gb'.format(nbytes * 1.0 / GB)
42 |     elif (abs(nbytes) >= MB):
43 |         return '{:.2f} Mb'.format(nbytes * 1.0 / MB)
44 |     elif (abs(nbytes) >= KB):
45 |         return '{:.2f} Kb'.format(nbytes * 1.0 / KB)
46 |     else:
47 |         return str(nbytes) + ' b'
48 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding:utf-8 -*-
 3 | 
 4 | import codecs
 5 | import os
 6 | import setuptools
 7 | 
 8 | 
 9 | def read(*parts):
10 |     cur_path = os.path.abspath(os.path.dirname(__file__))
11 |     with codecs.open(os.path.join(cur_path, *parts), "r") as fp:
12 |         return fp.read()
13 | 
14 | 
15 | setuptools.setup(
16 |     name="nnprof",
17 |     version="0.1.1",
18 |     author="Feng Wang",
19 |     author_email="wffatescript@gmail.com",
20 |     description="Profile tool for neural network(time, memory, etc.)",
21 |     long_description=read("README.md"),
22 |     long_description_content_type="text/markdown",
23 |     url="https://github.com/FateScript/nnprof",
24 |     packages=setuptools.find_packages(),
25 |     license="Apache License 2.0",
26 |     install_requires=[
27 |         "tabulate",
28 |         "torch",
29 |         "numpy",
30 |     ],
31 |     classifiers=[
32 |         "License :: OSI Approved :: Apache Software License",
33 |         "Operating System :: OS Independent",
34 |         "Programming Language :: Python :: 2",
35 |         "Programming Language :: Python :: 3",
36 |         "Programming Language :: Python :: 3.5",
37 |         "Programming Language :: Python :: 3.6",
38 |         "Programming Language :: Python :: 3.7",
39 |         "Programming Language :: Python :: 3.8",
40 |         "Programming Language :: Python :: 3.9",
41 |     ],
42 | )
43 | 


--------------------------------------------------------------------------------
/tests/test_nnprofile.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python3
 2 | # -*- coding:utf-8 -*-
 3 | import unittest
 4 | import torch
 5 | import torchvision
 6 | 
 7 | from nnprof import profile, ProfileMode, InfoTable, TreeTable
 8 | 
 9 | 
10 | class TestProfile(unittest.TestCase):
11 | 
12 |     model = torchvision.models.alexnet(pretrained=False)
13 |     x = torch.rand([1, 3, 224, 224])
14 | 
15 |     def test_table_args(self):
16 |         with profile(self.model, profile_memory=True) as prof:
17 |             _ = self.model(self.x)
18 |         sorted_keys = ["cpu_time", "self_cpu_time"]
19 |         for k in sorted_keys:
20 |             table = prof.table(sorted_by=k)
21 |             self.assertIsInstance(table, InfoTable)
22 | 
23 |         for average in [True, False]:
24 |             table = prof.table(average=average)
25 |             self.assertIsInstance(table, InfoTable)
26 | 
27 |     def test_profile_mode(self):
28 |         for mode in ProfileMode:
29 |             with profile(self.model, profile_memory=True, mode=mode) as prof:
30 |                 _ = self.model(self.x)
31 |             table = prof.table()
32 |             if mode != ProfileMode.LAYER_TREE:
33 |                 self.assertIsInstance(table, InfoTable)
34 |             else:
35 |                 self.assertIsInstance(table, TreeTable)
36 | 


--------------------------------------------------------------------------------