├── .gitignore ├── LICENSE ├── README.md ├── nnprof ├── __init__.py ├── info_table.py ├── profiler.py └── utils.py ├── setup.py └── tests └── test_nnprofile.py /.gitignore: -------------------------------------------------------------------------------- 1 | # useless data type 2 | *.jpg 3 | *.png 4 | *.txt 5 | 6 | # compilation and distribution 7 | __pycache__ 8 | _ext 9 | *.pyc 10 | *.so 11 | *.egg-info/ 12 | build/ 13 | dist/ 14 | wheels/ 15 | 16 | # pytorch/python/numpy formats 17 | *.pth 18 | *.pkl 19 | *.npy 20 | 21 | # ipython/jupyter notebooks 22 | *.ipynb 23 | **/.ipynb_checkpoints/ 24 | 25 | # Editor temporaries 26 | *.swn 27 | *.swo 28 | *.swp 29 | *~ 30 | 31 | # Pycharm editor settings 32 | .idea 33 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # nnprof 2 | 3 | ## Introduction 4 | 5 | nnprof is a profile tool for pytorch neural networks. 6 | 7 | ### Features 8 | 9 | * **multi profile mode**: nnprof support 4 profile mode: Layer level, Operation level, Mixed level, Layer Tree level. Please check below for detail usage. 10 | * **time and memory profile**: nnprof support both time and memory profile now. But since memory profile is first supported in pytorch 1.6, please use torch version >= 1.6 for memory profile. 11 | * **support sorted by given key and show profile percent**: user could print table with percentage and sorted profile info using a given key, which is really helpful for optimiziing neural network. 12 | 13 | ## Requirements 14 | 15 | * Python >= 3.6 16 | * PyTorch 17 | * Numpy 18 | 19 | ## Get Started 20 | 21 | ### install nnprof 22 | * pip install: 23 | ```shell 24 | pip install nnprof 25 | ``` 26 | * from source: 27 | ```shell 28 | python -m pip install 'git+https://github.com/FateScript/nnprof.git' 29 | 30 | # or install after clone this repo 31 | git clone https://github.com/FateScript/nnprof.git 32 | pip install -e nnprof 33 | ``` 34 | 35 | ### use nnprf 36 | 37 | ```python3 38 | from nnprof import profile, ProfileMode 39 | import torch 40 | import torchvision 41 | 42 | model = torchvision.models.alexnet(pretrained=False) 43 | x = torch.rand([1, 3, 224, 224]) 44 | 45 | # mode could be anyone in LAYER, OP, MIXED, LAYER_TREE 46 | mode = ProfileMode.LAYER 47 | 48 | with profile(model, mode=mode) as prof: 49 | y = model(x) 50 | 51 | print(prof.table(average=False, sorted_by="cpu_time")) 52 | # table could be sorted by presented header. 53 | ``` 54 | 55 | Part of presented table looks like table below, Note that they are sorted by cpu_time. 56 | ``` 57 | ╒══════════════════════╤═══════════════════╤═══════════════════╤════════╕ 58 | │ name │ self_cpu_time │ cpu_time │ hits │ 59 | ╞══════════════════════╪═══════════════════╪═══════════════════╪════════╡ 60 | │ AlexNet.features.0 │ 19.114ms (34.77%) │ 76.383ms (45.65%) │ 1 │ 61 | ├──────────────────────┼───────────────────┼───────────────────┼────────┤ 62 | │ AlexNet.features.3 │ 5.148ms (9.37%) │ 20.576ms (12.30%) │ 1 │ 63 | ├──────────────────────┼───────────────────┼───────────────────┼────────┤ 64 | │ AlexNet.features.8 │ 4.839ms (8.80%) │ 19.336ms (11.56%) │ 1 │ 65 | ├──────────────────────┼───────────────────┼───────────────────┼────────┤ 66 | │ AlexNet.features.6 │ 4.162ms (7.57%) │ 16.632ms (9.94%) │ 1 │ 67 | ├──────────────────────┼───────────────────┼───────────────────┼────────┤ 68 | │ AlexNet.features.10 │ 2.705ms (4.92%) │ 10.713ms (6.40%) │ 1 │ 69 | ├──────────────────────┼───────────────────┼───────────────────┼────────┤ 70 | ``` 71 | 72 | You are welcomed to try diffierent profile mode and more table format. 73 | 74 | ## Contribution 75 | 76 | Any issues and pull requests are welcomed. 77 | 78 | ## Acknowledgement 79 | 80 | Some thoughts of nnprof are inspired by [torchprof](https://github.com/awwong1/torchprof) and [torch.autograd.profile](https://github.com/pytorch/pytorch/blob/749f8b78508c43f9e6331f2395a4202785068442/torch/autograd/profiler.py) . 81 | Many thanks to the authors. 82 | -------------------------------------------------------------------------------- /nnprof/__init__.py: -------------------------------------------------------------------------------- 1 | from .profiler import ProfileMode, profile 2 | from .info_table import InfoTable, TreeTable 3 | -------------------------------------------------------------------------------- /nnprof/info_table.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding:utf-8 -*- 3 | 4 | import numpy as np 5 | import tabulate 6 | 7 | from torch.autograd.profiler import format_time 8 | try: 9 | from torch.autograd.profiler import format_memory 10 | except ImportError: 11 | from .utils import format_memory 12 | 13 | 14 | class InfoTable: 15 | 16 | def __init__(self, headers, data, with_percent=False): 17 | """ 18 | Args: 19 | header (Iterable[string]): header of info table. 20 | data (Iterable[numpy.array]): data of table. 21 | with_percent (bool): whether data presented with percent data or not. 22 | """ 23 | assert len(headers) == len(data), "length of headers and data are not matched" 24 | self.headers = headers 25 | self.info = {key: value for key, value in zip(headers, data)} 26 | self.with_percent = with_percent 27 | 28 | def insert(self, header, data, position=-1): 29 | """ 30 | insert header and data into current table at given position 31 | 32 | Args: 33 | header (Iterable[string]): inserted data. 34 | data (Iterable[numpy.array]): inserted data. 35 | position (int): insert position, the same usage like list indexing. 36 | """ 37 | 38 | def swap(a, b): 39 | a, b = b, a 40 | 41 | self.info[header] = data 42 | if header in self.headers: 43 | index = self.headers.index(header) 44 | swap(self.headers[index], self.headers[position]) 45 | else: 46 | self.headers.insert(position, header) 47 | 48 | def sorted_by(self, keyname=None, descending=True): 49 | """ 50 | use keyname to sort table. 51 | 52 | Args: 53 | keyname (string): sorted header name. 54 | descending (bool): whether sorted in descending order or not. 55 | """ 56 | if keyname is None: 57 | return self 58 | if keyname not in self.info: 59 | keyname += "_avg" 60 | assert keyname in self.info 61 | sort_index = np.argsort(self.info[keyname], axis=0).reshape(-1) 62 | if descending: 63 | sort_index = sort_index[::-1] 64 | for header in self.headers: 65 | self.info[header] = self.info[header][sort_index] 66 | 67 | return self 68 | 69 | def filter(self, filter_list=None): 70 | """ 71 | filter header and data in filter list. 72 | 73 | Args: 74 | filter_list (Iterable[string]): list of headers that needs to be filtered out 75 | """ 76 | self.headers = [header for header in self.headers if header not in filter_list] 77 | 78 | def filter_zeros(self): 79 | """filter all zeros data.""" 80 | filter_list = [] 81 | for header in self.headers: 82 | data = self.info[header] 83 | if "float" in data.dtype.name or "int" in data.dtype.name: 84 | if data.sum() == 0: 85 | filter_list.append(header) 86 | self.filter(filter_list) 87 | 88 | def average(self, average_key="hits"): 89 | """ 90 | average table with average key 91 | 92 | Args: 93 | average_key: 94 | """ 95 | hits = self.info[average_key] 96 | for i, header in enumerate(self.headers): 97 | if header.endswith("time") or header.endswith("mem"): 98 | self.info[header + "_avg"] = self.info[header] / hits 99 | self.headers[i] += "_avg" 100 | del self.info[header] 101 | 102 | def row_limit(self, limit=None): 103 | """ 104 | return table in row limit format. 105 | 106 | Args: 107 | limit (int): row limit number of table, None means no limit. 108 | """ 109 | if limit is None: 110 | return self 111 | else: 112 | data = [self.info[x][:limit] for x in self.headers] 113 | return InfoTable(headers=self.headers, data=data) 114 | 115 | def __str__(self): 116 | self.filter_zeros() 117 | time_formatter = np.vectorize(format_time) 118 | mem_formatter = np.vectorize(format_memory) 119 | percent_formatter = np.vectorize(lambda x: " ({:.2%})".format(x)) 120 | 121 | fmt_data = [] 122 | for header in self.headers: 123 | data = self.info[header] 124 | if "time" in header: 125 | time_array = time_formatter(data) 126 | if self.with_percent: 127 | percent = percent_formatter(data / data.sum()) 128 | time_array = np.core.defchararray.add(time_array, percent) 129 | fmt_data.append(time_array) 130 | elif "mem" in header: 131 | mem_array = mem_formatter(data) 132 | if self.with_percent: 133 | percent = percent_formatter(data / data.sum()) 134 | mem_array = np.core.defchararray.add(mem_array, percent) 135 | fmt_data.append(mem_array) 136 | else: 137 | fmt_data.append(data) 138 | 139 | concat_data = np.concatenate(fmt_data, axis=1) 140 | table = tabulate.tabulate(concat_data, headers=self.headers, tablefmt="fancy_grid") 141 | return table 142 | 143 | 144 | class TreeTable(InfoTable): 145 | 146 | def __init__(self, headers, data, with_percent=False, max_depth=3): 147 | super().__init__(headers, data, with_percent) 148 | self.max_depth = max_depth 149 | 150 | def __str__(self): 151 | self.filter_zeros() 152 | time_formatter = np.vectorize(format_time) 153 | mem_formatter = np.vectorize(format_memory) 154 | percent_formatter = np.vectorize(lambda x: " ({:.2%})".format(x)) 155 | 156 | fmt_data = [] 157 | for header in self.headers: 158 | data = self.info[header] 159 | if "time" in header: 160 | time_array = time_formatter(data) 161 | if self.with_percent: 162 | percent = percent_formatter(data / data[0]) # data[0] is the sum value 163 | time_array = np.core.defchararray.add(time_array, percent) 164 | fmt_data.append(time_array) 165 | elif "mem" in header: 166 | mem_array = mem_formatter(data) 167 | if self.with_percent: 168 | percent = percent_formatter(data / data[0]) # sum value, ditto 169 | mem_array = np.core.defchararray.add(mem_array, percent) 170 | fmt_data.append(mem_array) 171 | else: 172 | fmt_data.append(data) 173 | 174 | concat_data = np.concatenate(fmt_data, axis=1) 175 | 176 | # white space should be kept under profile 177 | old_ws = tabulate.PRESERVE_WHITESPACE 178 | tabulate.PRESERVE_WHITESPACE = True 179 | table = tabulate.tabulate(concat_data, headers=self.headers, tablefmt="fancy_grid") 180 | tabulate.PRESERVE_WHITESPACE = old_ws 181 | return table 182 | -------------------------------------------------------------------------------- /nnprof/profiler.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding:utf-8 -*- 3 | 4 | import functools 5 | import itertools 6 | 7 | from collections import OrderedDict, defaultdict, namedtuple 8 | from enum import IntEnum, unique 9 | 10 | import numpy as np 11 | 12 | import torch 13 | import torch.autograd.profiler as tprofiler 14 | 15 | from .info_table import InfoTable, TreeTable 16 | from .utils import leaf_modules_generator 17 | 18 | 19 | # profile info in table 20 | COMMON_STAT = [ 21 | "self_cpu_time", "cpu_time", "self_cuda_time", "cuda_time", 22 | "self_cpu_mem", "cpu_mem", "self_cuda_mem", "cuda_mem", "hits" 23 | ] 24 | STAT_TYPE = ["float32"] * (len(COMMON_STAT) - 1) + ["int32"] # "hits" is int32 type 25 | LEADING_KEY = " " # leading string of module name LAYER_TREE profile mode 26 | 27 | ProfileInfo = namedtuple("ProfileInfo", COMMON_STAT) 28 | 29 | 30 | @unique 31 | class ProfileMode(IntEnum): 32 | 33 | def __new__(cls, value, doc=None): 34 | self = int.__new__(cls, value) 35 | self._value_ = value 36 | if doc is not None: 37 | self.__doc__ = doc 38 | return self 39 | 40 | LAYER = 1, "Layer by layer profile" 41 | OP = 2, "Operator level profile" 42 | MIXED = 3, "Operator of layer level profile" 43 | LAYER_TREE = 4, "Layer level profile, presented in tree format" 44 | 45 | 46 | class profile: 47 | """ 48 | Profile tool for Pytorch models, using torch.autograd.profile inside. 49 | """ 50 | 51 | def __init__( 52 | self, 53 | module, 54 | enabled=True, 55 | use_cuda=False, 56 | paths=None, 57 | profile_memory=False, 58 | mode=ProfileMode.LAYER 59 | ): 60 | """ 61 | Args: 62 | module (nn.Module): torch module to profile. 63 | enabled (bool): whether enable profile or not. 64 | use_cuda (bool): whether use cuda profiler or not. 65 | paths (Iterable[string]): profile paths for hooks. For example, if you want see 66 | profile info of modeule named classifier, use path=("classifier") 67 | profile_memory (bool): whether profile memory or not, require torch >= 1.6 68 | mode (IntEnum): ProfileMode enum. 69 | """ 70 | self._module = module 71 | self.enabled = enabled 72 | self.use_cuda = use_cuda 73 | self.paths = paths 74 | 75 | if profile_memory: 76 | torch_ver = [int(x) for x in torch.__version__.split(".", maxsplit=2)[:2]] 77 | assert torch_ver >= [1, 6], "profile_memory = True requires torch 1.6+" 78 | self.profile_memory = profile_memory 79 | assert mode in ProfileMode, "Profile mode {} not found".format(mode) 80 | self.mode = mode 81 | 82 | self.entered = False 83 | self.exited = False 84 | self.traces = () 85 | self.profile_events = defaultdict(list) 86 | 87 | def __enter__(self): 88 | if not self.enabled: 89 | return self 90 | if self.entered: 91 | raise RuntimeError("profile is not reentrant") 92 | self.entered = True 93 | self._forwards = {} # store the original forward functions 94 | self.traces = tuple(map(self._add_profile_hook, leaf_modules_generator(self._module))) 95 | return self 96 | 97 | def __exit__(self, exc_type, exc_val, exc_tb): 98 | if not self.enabled: 99 | return 100 | tuple(map(self._remove_profile_hook, self.traces)) 101 | del self._forwards # remove unnecessary forwards 102 | self.exited = True 103 | 104 | def _traces_to_table(self, **kwargs): 105 | """ 106 | human readable output of the profiler traces and events. 107 | """ 108 | tree_dict = build_info_tree(self.traces, self.profile_events, self.mode) 109 | 110 | if self.mode == ProfileMode.LAYER_TREE: 111 | max_depth = kwargs.pop("max_depth", 3) 112 | tree_dict = format_to_module_tree(tree_dict, max_depth) 113 | headers, data = generate_header_and_data(tree_dict) 114 | table = TreeTable(headers, data, max_depth=max_depth) 115 | else: 116 | headers, data = generate_header_and_data(tree_dict) 117 | table = InfoTable(headers, data) 118 | 119 | return table 120 | 121 | def table( 122 | self, sorted_by="cpu_time", row_limit=None, average=False, with_percent=True, **kwargs 123 | ): 124 | """ 125 | return profile info in table format 126 | 127 | Args: 128 | sorted_by (string): which data the table is sorted by. Default sorted by cpu time 129 | row_limit (int): row limit number of table, None means no limit. 130 | average (bool): whether average profile data by hits or not. 131 | with_percent (bool): whether profile data presented with percent data or not. 132 | kwargs: 133 | max_depth (int): depth of tree if using LAYER_TREE mode 134 | """ 135 | if not self.exited: 136 | return "" 137 | else: 138 | table = self._traces_to_table(**kwargs) 139 | if not self.use_cuda: 140 | table.filter([x for x in COMMON_STAT if "cuda" in x]) 141 | 142 | if self.mode != ProfileMode.LAYER_TREE: 143 | # sorted/row limit/avarage not supported for TreeInfo type 144 | table = table.sorted_by(sorted_by).row_limit(row_limit) 145 | 146 | if average: 147 | table.average() 148 | 149 | table.with_percent = with_percent 150 | return table 151 | 152 | def _add_profile_hook(self, leaf_module): 153 | 154 | def cond(path, paths): 155 | return sum([key in path for key in paths]) 156 | 157 | path, module = leaf_module 158 | if (self.paths is not None and cond(path, self.paths)) or (self.paths is None): 159 | _forward = module.forward 160 | self._forwards[path] = _forward 161 | 162 | @functools.wraps(_forward) 163 | def wrap_forward(*args, **kwargs): 164 | with tprofiler.profile( 165 | use_cuda=self.use_cuda, profile_memory=self.profile_memory, 166 | ) as prof: 167 | res = _forward(*args, **kwargs) 168 | event_list = prof.function_events 169 | event_list.populate_cpu_children() 170 | # each profile call should be contained in its own list 171 | self.profile_events[path].append(event_list) 172 | return res 173 | 174 | module.forward = wrap_forward 175 | 176 | return leaf_module 177 | 178 | def _remove_profile_hook(self, trace): 179 | path, module = trace 180 | if (self.paths is not None and path in self.paths) or (self.paths is None): 181 | module.forward = self._forwards[path] 182 | 183 | 184 | def generate_header_and_data(tree_dict): 185 | headers = ["name"] + COMMON_STAT 186 | 187 | format_lines = [ 188 | ( 189 | name, 190 | info.self_cpu_time, 191 | info.cpu_time, 192 | info.self_cuda_time, 193 | info.cuda_time, 194 | info.self_cpu_mem, 195 | info.cpu_mem, 196 | info.self_cuda_mem, 197 | info.cuda_mem, 198 | info.hits, 199 | ) for name, info in tree_dict.items() 200 | ] 201 | data = np.array(format_lines) 202 | data = np.hsplit(data, len(headers)) 203 | data[1:] = [x.astype(dtype) for x, dtype in zip(data[1:], STAT_TYPE)] 204 | return headers, data 205 | 206 | 207 | def format_to_module_tree(profile_dict, max_depth=3): 208 | 209 | def merge_info(origin_info, update_info): 210 | sum_result = tuple(a + b for a, b in zip(origin_info[:-1], update_info[:-1])) 211 | hits = (max(origin_info[-1], update_info[-1]), ) 212 | return sum_result + hits 213 | 214 | tree_format_dict = OrderedDict() 215 | for key, info in profile_dict.items(): 216 | path = [k for i, k in enumerate(key.split(".", maxsplit=max_depth))] 217 | path = list(itertools.accumulate(path, lambda x, y: x + "." + y))[:max_depth] 218 | path_with_whitespace = [LEADING_KEY * i + k for i, k in enumerate(path)] 219 | 220 | for p in path_with_whitespace: 221 | if p in tree_format_dict: 222 | tree_format_dict[p] = ProfileInfo(*merge_info(info, tree_format_dict[p])) 223 | else: 224 | tree_format_dict[p] = info 225 | 226 | return tree_format_dict 227 | 228 | 229 | def get_profile_info(events, path_events): 230 | if "self_cpu_memory_usage" in dir(events[0]): 231 | self_cpu_mem = sum([e.self_cpu_memory_usage for e in events]) 232 | cpu_mem = sum([e.cpu_memory_usage for e in events]) 233 | self_cuda_mem = sum([e.self_cuda_memory_usage for e in events]) 234 | cuda_mem = sum([e.cuda_memory_usage for e in events]) 235 | else: 236 | self_cpu_mem, cpu_mem, self_cuda_mem, cuda_mem = 0., 0., 0., 0. 237 | 238 | info = ProfileInfo( 239 | # TIME 240 | sum([e.self_cpu_time_total for e in events]), 241 | sum([e.cpu_time_total for e in events]), 242 | sum([e.self_cuda_time_total for e in events]), 243 | sum([e.cuda_time_total for e in events]), 244 | # Memory 245 | self_cpu_mem, 246 | cpu_mem, 247 | self_cuda_mem, 248 | cuda_mem, 249 | # Hits 250 | len(path_events) 251 | ) 252 | return info 253 | 254 | 255 | def build_info_tree(traces, trace_events, mode=ProfileMode.LAYER): 256 | """ 257 | build profile dict according to profile mode. 258 | """ 259 | assert mode in ProfileMode, "ProfileMode {} not found".format(mode) 260 | tree = OrderedDict() 261 | 262 | for trace in traces: 263 | path, module = trace 264 | # unwrap all of the events, in case model is called multiple times 265 | events = [te for tevents in trace_events[path] for te in tevents] 266 | if mode == ProfileMode.LAYER or mode == ProfileMode.LAYER_TREE: 267 | tree[path] = get_profile_info(events, trace_events[path]) 268 | elif mode == ProfileMode.OP or mode == ProfileMode.MIXED: 269 | for op in set(event.name for event in events): 270 | op_events = [e for e in events if e.name == op] 271 | stat = get_profile_info(op_events, op_events) 272 | if mode == ProfileMode.MIXED: 273 | tree[path + "." + op] = stat 274 | else: # operator mode 275 | if op not in tree: # init op in tree 276 | tree[op] = stat 277 | else: 278 | # add Op level profile info to original value. 279 | tree[op] = ProfileInfo(*(a + b for a, b in zip(tree[op], stat))) 280 | 281 | return tree 282 | -------------------------------------------------------------------------------- /nnprof/utils.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding:utf-8 -*- 3 | 4 | from collections import namedtuple 5 | 6 | LeafModule = namedtuple("LeafModule", ["path", "module"]) 7 | 8 | 9 | def leaf_modules_generator(module, name=None, path=None): 10 | """ 11 | Generate all leaf modules of an given pytorch module 12 | 13 | Args: 14 | module (nn.Module): a pytorch nn.Module 15 | name (string): name of pytorch module 16 | path (Tuple[string]): path to pytorch module 17 | 18 | Return: 19 | Generator contains LeafModule. 20 | """ 21 | if path is None: 22 | path = () 23 | 24 | if not name: 25 | name = module.__class__.__name__ 26 | named_children = list(module.named_children()) 27 | path = path + (name,) 28 | if len(named_children) == 0: 29 | yield LeafModule(".".join(path), module) 30 | # recursively walk into all submodules 31 | for name, child_module in named_children: 32 | yield from leaf_modules_generator(child_module, name=name, path=path) 33 | 34 | 35 | def format_memory(nbytes): 36 | """Returns a formatted memory size string""" 37 | KB = 1024 38 | MB = 1024 * KB 39 | GB = 1024 * MB 40 | if (abs(nbytes) >= GB): 41 | return '{:.2f} Gb'.format(nbytes * 1.0 / GB) 42 | elif (abs(nbytes) >= MB): 43 | return '{:.2f} Mb'.format(nbytes * 1.0 / MB) 44 | elif (abs(nbytes) >= KB): 45 | return '{:.2f} Kb'.format(nbytes * 1.0 / KB) 46 | else: 47 | return str(nbytes) + ' b' 48 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding:utf-8 -*- 3 | 4 | import codecs 5 | import os 6 | import setuptools 7 | 8 | 9 | def read(*parts): 10 | cur_path = os.path.abspath(os.path.dirname(__file__)) 11 | with codecs.open(os.path.join(cur_path, *parts), "r") as fp: 12 | return fp.read() 13 | 14 | 15 | setuptools.setup( 16 | name="nnprof", 17 | version="0.1.1", 18 | author="Feng Wang", 19 | author_email="wffatescript@gmail.com", 20 | description="Profile tool for neural network(time, memory, etc.)", 21 | long_description=read("README.md"), 22 | long_description_content_type="text/markdown", 23 | url="https://github.com/FateScript/nnprof", 24 | packages=setuptools.find_packages(), 25 | license="Apache License 2.0", 26 | install_requires=[ 27 | "tabulate", 28 | "torch", 29 | "numpy", 30 | ], 31 | classifiers=[ 32 | "License :: OSI Approved :: Apache Software License", 33 | "Operating System :: OS Independent", 34 | "Programming Language :: Python :: 2", 35 | "Programming Language :: Python :: 3", 36 | "Programming Language :: Python :: 3.5", 37 | "Programming Language :: Python :: 3.6", 38 | "Programming Language :: Python :: 3.7", 39 | "Programming Language :: Python :: 3.8", 40 | "Programming Language :: Python :: 3.9", 41 | ], 42 | ) 43 | -------------------------------------------------------------------------------- /tests/test_nnprofile.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | # -*- coding:utf-8 -*- 3 | import unittest 4 | import torch 5 | import torchvision 6 | 7 | from nnprof import profile, ProfileMode, InfoTable, TreeTable 8 | 9 | 10 | class TestProfile(unittest.TestCase): 11 | 12 | model = torchvision.models.alexnet(pretrained=False) 13 | x = torch.rand([1, 3, 224, 224]) 14 | 15 | def test_table_args(self): 16 | with profile(self.model, profile_memory=True) as prof: 17 | _ = self.model(self.x) 18 | sorted_keys = ["cpu_time", "self_cpu_time"] 19 | for k in sorted_keys: 20 | table = prof.table(sorted_by=k) 21 | self.assertIsInstance(table, InfoTable) 22 | 23 | for average in [True, False]: 24 | table = prof.table(average=average) 25 | self.assertIsInstance(table, InfoTable) 26 | 27 | def test_profile_mode(self): 28 | for mode in ProfileMode: 29 | with profile(self.model, profile_memory=True, mode=mode) as prof: 30 | _ = self.model(self.x) 31 | table = prof.table() 32 | if mode != ProfileMode.LAYER_TREE: 33 | self.assertIsInstance(table, InfoTable) 34 | else: 35 | self.assertIsInstance(table, TreeTable) 36 | --------------------------------------------------------------------------------