├── .dockerignore ├── .gitignore ├── CONTRIBUTING.md ├── Dockerfile ├── LICENSE ├── MANIFEST.in ├── README.rst ├── VERSION ├── docs ├── Makefile ├── _templates │ └── layout.html ├── advanced.rst ├── conf.py ├── examples.rst ├── faqs.rst ├── index.rst ├── install.rst ├── profile.rst └── quickstart.rst ├── pyprof ├── __init__.py ├── examples │ ├── .gitignore │ ├── custom_func_module │ │ ├── README.md │ │ ├── custom_function.py │ │ ├── custom_module.py │ │ └── test.sh │ ├── imagenet │ │ ├── imagenet.py │ │ └── test.sh │ ├── jit │ │ ├── README.md │ │ ├── jit_script_function.py │ │ ├── jit_script_method.py │ │ ├── jit_trace_function.py │ │ ├── jit_trace_method.py │ │ └── test.sh │ ├── lenet.py │ ├── operators.py │ ├── simple.py │ └── user_annotation │ │ ├── README.md │ │ ├── resnet.py │ │ └── test.sh ├── nvtx │ ├── __init__.py │ └── nvmarker.py ├── parse │ ├── __init__.py │ ├── __main__.py │ ├── db.py │ ├── kernel.py │ ├── nsight.py │ ├── nvvp.py │ └── parse.py └── prof │ ├── __init__.py │ ├── __main__.py │ ├── activation.py │ ├── base.py │ ├── blas.py │ ├── conv.py │ ├── convert.py │ ├── data.py │ ├── dropout.py │ ├── dtype.py │ ├── embedding.py │ ├── index_slice_join_mutate.py │ ├── linear.py │ ├── loss.py │ ├── memory.py │ ├── misc.py │ ├── normalization.py │ ├── optim.py │ ├── output.py │ ├── pointwise.py │ ├── pooling.py │ ├── prof.py │ ├── randomSample.py │ ├── recurrentCell.py │ ├── reduction.py │ ├── softmax.py │ ├── tc.py │ ├── tensor.py │ ├── usage.py │ └── utility.py ├── qa ├── L0_docs │ └── test.sh ├── L0_lenet │ ├── test.sh │ └── test_lenet.py ├── L0_nvtx │ ├── __init__.py │ ├── test.sh │ └── test_pyprof_nvtx.py ├── L0_pyprof_data │ ├── __init__.py │ ├── test.sh │ └── test_pyprof_data.py └── common │ ├── check_copyright.py │ └── run_test.py ├── requirements ├── requirements.txt └── requirements_nsys.txt ├── setup.cfg └── setup.py /.dockerignore: -------------------------------------------------------------------------------- 1 | .git* -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | __pycache__ 2 | *.sql 3 | *.sqlite 4 | *.qdrep 5 | *.dict 6 | *.csv 7 | *.log 8 | *.pyc 9 | 10 | build/ 11 | dist/ 12 | nvidia_pyprof.egg-info/ 13 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | 16 | 17 | # Contribution Rules 18 | 19 | - The code style convention is enforced by clang-format. See the 20 | Developer Guide for instructions on how to ensure your contributions 21 | conform. In general please follow the existing conventions in the 22 | relevant file, submodule, module, and project when you add new code 23 | or when you extend/fix existing functionality. 24 | 25 | - Avoid introducing unnecessary complexity into existing code so that 26 | maintainability and readability are preserved. 27 | 28 | - Try to keep pull requests (PRs) as concise as possible: 29 | 30 | - Avoid committing commented-out code. 31 | 32 | - Wherever possible, each PR should address a single concern. If 33 | there are several otherwise-unrelated things that should be fixed 34 | to reach a desired endpoint, it is perfectly fine to open several 35 | PRs and state in the description which PR depends on another 36 | PR. The more complex the changes are in a single PR, the more time 37 | it will take to review those changes. 38 | 39 | - Make sure that the build log is clean, meaning no warnings or 40 | errors should be present. 41 | 42 | - Make sure all `L0_*` tests pass: 43 | 44 | - In the `qa/` directory, there are basic sanity tests scripted in 45 | directories named `L0_...`. See the Testing section in the 46 | Developer Guide for instructions on running these tests. 47 | 48 | - PyProf's default build assumes recent versions of 49 | dependencies (CUDA, PyTorch, Nsight Systems, etc.). Contributions 50 | that add compatibility with older versions of those dependencies 51 | will be considered, but NVIDIA cannot guarantee that all possible 52 | build configurations work, are not broken by future contributions, 53 | and retain highest performance. 54 | 55 | - Make sure that you can contribute your work to open source (no 56 | license and/or patent conflict is introduced by your code). You need 57 | to [`sign`](#Sign) your commit. 58 | 59 | - Thanks in advance for your patience as we review your contributions; 60 | we do appreciate them! 61 | 62 | Sign Your Work 63 | -------------- 64 | 65 | We require that all contributors "sign-off" on their commits. This 66 | certifies that the contribution is your original work, or you have 67 | rights to submit it under the same license, or a compatible license. 68 | 69 | Any contribution which contains commits that are not Signed-Off will 70 | not be accepted. 71 | 72 | To sign off on a commit you simply use the `--signoff` (or `-s`) 73 | option when committing your changes: 74 | 75 | $ git commit -s -m "Add cool feature." 76 | 77 | This will append the following to your commit message: 78 | 79 | Signed-off-by: Your Name 80 | 81 | By doing this you certify the below: 82 | 83 | Developer Certificate of Origin 84 | Version 1.1 85 | 86 | Copyright (C) 2004, 2006 The Linux Foundation and its contributors. 87 | 1 Letterman Drive 88 | Suite D4700 89 | San Francisco, CA, 94129 90 | 91 | Everyone is permitted to copy and distribute verbatim copies of 92 | this license document, but changing it is not allowed. 93 | 94 | 95 | Developer's Certificate of Origin 1.1 96 | 97 | By making a contribution to this project, I certify that: 98 | 99 | (a) The contribution was created in whole or in part by me and I 100 | have the right to submit it under the open source license 101 | indicated in the file; or 102 | 103 | (b) The contribution is based upon previous work that, to the best 104 | of my knowledge, is covered under an appropriate open source 105 | license and I have the right under that license to submit that 106 | work with modifications, whether created in whole or in part by 107 | me, under the same open source license (unless I am permitted to 108 | submit under a different license), as indicated in the file; or 109 | 110 | (c) The contribution was provided directly to me by some other 111 | person who certified (a), (b) or (c) and I have not modified it. 112 | 113 | (d) I understand and agree that this project and the contribution 114 | are public and that a record of the contribution (including all 115 | personal information I submit with it, including my sign-off) is 116 | maintained indefinitely and may be redistributed consistent with 117 | this project or the open source license(s) involved. -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | ARG BASE_IMAGE=nvcr.io/nvidia/pytorch:21.04-py3 16 | 17 | ############################################################################ 18 | ## Install PyProf 19 | ############################################################################ 20 | FROM $BASE_IMAGE 21 | 22 | ARG PYPROF_VERSION=3.11.0dev 23 | ARG PYPROF_CONTAINER_VERSION=21.06dev 24 | 25 | # Copy entire repo into container even though some is not needed for the 26 | # build itself... because we want to be able to copyright check on 27 | # files that aren't directly needed for build. 28 | WORKDIR /opt/pytorch/pyprof 29 | RUN rm -fr * 30 | COPY . . 31 | 32 | RUN pip uninstall -y pyprof 33 | RUN pip install --no-cache-dir . 34 | 35 | # Generating the docs requires the docs source so copy that into the L0_docs so 36 | # that it is available when the test runs. 37 | RUN cp VERSION qa/L0_docs/. && \ 38 | cp README.rst qa/L0_docs/. && \ 39 | cp -r docs qa/L0_docs/. 40 | 41 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include *.py 2 | recursive-include pyprof * 3 | -------------------------------------------------------------------------------- /README.rst: -------------------------------------------------------------------------------- 1 | .. 2 | # Copyright (c) 2020-2021, NVIDIA CORPORATION. All rights reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | |License| 17 | 18 | PyProf - PyTorch Profiling tool 19 | =============================== 20 | 21 | **ANNOUNCEMENT:** 22 | On June 30th 2021, NVIDIA will no longer make contributions to the PyProf repository. 23 | 24 | To profile models in PyTorch, please use `NVIDIA Deep Learning Profiler (DLProf) `_ 25 | 26 | DLProf can help data scientists, engineers, and researchers understand and improve performance of their models by analyzing text reports or visualizing the reports in a web browser with the DLProf Viewer 27 | 28 | DLProf is available on NGC or as a python PIP wheel installation. 29 | 30 | To look for continued development on PyProf, please use https://github.com/adityaiitb/PyProf 31 | 32 | .. overview-begin-marker-do-not-remove 33 | 34 | PyProf is a tool that profiles and analyzes the GPU performance of PyTorch 35 | models. PyProf aggregates kernel performance from `Nsight Systems 36 | `_ or `NvProf 37 | `_ and provides the 38 | following additional features: 39 | 40 | * Identifies the layer that launched a kernel: e.g. the association of 41 | `ComputeOffsetsKernel` with a concrete PyTorch layer or API is not obvious. 42 | 43 | * Identifies the tensor dimensions and precision: without knowing the tensor 44 | dimensions and precision, it's impossible to reason about whether the actual 45 | (silicon) kernel time is close to maximum performance of such a kernel on 46 | the GPU. Knowing the tensor dimensions and precision, we can figure out the 47 | FLOPs and bandwidth required by a layer, and then determine how close to 48 | maximum performance the kernel is for that operation. 49 | 50 | * Forward-backward correlation: PyProf determines what the forward pass step 51 | is that resulted in the particular weight and data gradients (wgrad, dgrad), 52 | which makes it possible to determine the tensor dimensions required by these 53 | backprop steps to assess their performance. 54 | 55 | * Determines Tensor Core usage: PyProf can highlight the kernels that use 56 | `Tensor Cores `_. 57 | 58 | * Correlate the line in the user's code that launched a particular kernel (program trace). 59 | 60 | .. overview-end-marker-do-not-remove 61 | 62 | The current release of PyProf is 3.10.0 and is available in the 21.04 release of 63 | the PyTorch container on `NVIDIA GPU Cloud (NGC) `_. The 64 | branch for this release is `r21.04 65 | `_. 66 | 67 | Quick Installation Instructions 68 | ------------------------------- 69 | 70 | .. quick-install-start-marker-do-not-remove 71 | 72 | * Clone the git repository :: 73 | 74 | $ git clone https://github.com/NVIDIA/PyProf.git 75 | 76 | * Navigate to the top level PyProf directory 77 | 78 | * Install PyProf :: 79 | 80 | $ pip install . 81 | 82 | * Verify installation is complete with pip list :: 83 | 84 | $ pip list | grep pyprof 85 | 86 | * Should display :: 87 | 88 | pyprof 3.11.0.dev0 89 | 90 | .. quick-install-end-marker-do-not-remove 91 | 92 | Quick Start Instructions 93 | ------------------------ 94 | 95 | .. quick-start-start-marker-do-not-remove 96 | 97 | * Add the following lines to the PyTorch network you want to profile: :: 98 | 99 | import torch.cuda.profiler as profiler 100 | import pyprof 101 | pyprof.init() 102 | 103 | * Profile with NVProf or Nsight Systems to generate a SQL file. :: 104 | 105 | $ nsys profile -f true -o net --export sqlite python net.py 106 | 107 | * Run the parse.py script to generate the dictionary. :: 108 | 109 | $ python -m pyprof.parse net.sqlite > net.dict 110 | 111 | * Run the prof.py script to generate the reports. :: 112 | 113 | $ python -m pyprof.prof --csv net.dict 114 | 115 | .. quick-start-end-marker-do-not-remove 116 | 117 | Documentation 118 | ------------- 119 | 120 | The User Guide can be found in the 121 | `documentation for current release 122 | `_, and 123 | provides instructions on how to install and profile with PyProf. 124 | 125 | A complete `Quick Start Guide `_ 126 | provides step-by-step instructions to get you quickly started using PyProf. 127 | 128 | An `FAQ `_ provides 129 | answers for frequently asked questions. 130 | 131 | The `Release Notes 132 | `_ 133 | indicate the required versions of the NVIDIA Driver and CUDA, and also describe 134 | which GPUs are supported by PyProf 135 | 136 | Presentation and Papers 137 | ^^^^^^^^^^^^^^^^^^^^^^^ 138 | 139 | * `Automating End-toEnd PyTorch Profiling `_. 140 | * `Presentation slides `_. 141 | 142 | Contributing 143 | ------------ 144 | 145 | Contributions to PyProf are more than welcome. To 146 | contribute make a pull request and follow the guidelines outlined in 147 | the `Contributing `_ document. 148 | 149 | Reporting problems, asking questions 150 | ------------------------------------ 151 | 152 | We appreciate any feedback, questions or bug reporting regarding this 153 | project. When help with code is needed, follow the process outlined in 154 | the Stack Overflow (https://stackoverflow.com/help/mcve) 155 | document. Ensure posted examples are: 156 | 157 | * minimal – use as little code as possible that still produces the 158 | same problem 159 | 160 | * complete – provide all parts needed to reproduce the problem. Check 161 | if you can strip external dependency and still show the problem. The 162 | less time we spend on reproducing problems the more time we have to 163 | fix it 164 | 165 | * verifiable – test the code you're about to provide to make sure it 166 | reproduces the problem. Remove all other problems that are not 167 | related to your request/question. 168 | 169 | .. |License| image:: https://img.shields.io/badge/License-Apache2-green.svg 170 | :target: http://www.apache.org/licenses/LICENSE-2.0 171 | -------------------------------------------------------------------------------- /VERSION: -------------------------------------------------------------------------------- 1 | 3.11.0dev 2 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | # Makefile for Sphinx documentation 16 | # 17 | 18 | # You can set these variables from the command line. 19 | SPHINXOPTS = 20 | SPHINXBUILD = sphinx-build 21 | SPHINXPROJ = PyProf 22 | SOURCEDIR = . 23 | BUILDDIR = build 24 | 25 | # Put it first so that "make" without argument is like "make help". 26 | help: 27 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(O) 28 | 29 | clean: 30 | @rm -fr $(BUILDDIR) 31 | 32 | # Catch-all target: route all unknown targets to Sphinx using the new 33 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 34 | %: Makefile 35 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 36 | 37 | .PHONY: help clean Makefile 38 | -------------------------------------------------------------------------------- /docs/_templates/layout.html: -------------------------------------------------------------------------------- 1 | 16 | {% extends "!layout.html" %} 17 | {% block sidebartitle %} {{ super() }} 18 | 19 | 47 | {% endblock %} 48 | 49 | {% block footer %} {{ super() }} 50 | 51 | 66 | {% endblock %} 67 | -------------------------------------------------------------------------------- /docs/advanced.rst: -------------------------------------------------------------------------------- 1 | .. 2 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | Advanced PyProf Usage 17 | ===================== 18 | 19 | This section demonstrates some advanced techniques to get even more from your 20 | PyProf profiles. 21 | 22 | .. _section-layer-annotation: 23 | 24 | Layer Annotation 25 | ---------------- 26 | 27 | Adding custom NVTX ranges to the model layers will allow PyProf to aggregate 28 | profile results based on the ranges. :: 29 | 30 | # examples/user_annotation/resnet.py 31 | # Use the “layer:” prefix 32 | 33 | class Bottleneck(nn.Module): 34 | def forward(self, x): 35 | nvtx.range_push("layer:Bottleneck_{}".format(self.id)) # NVTX push marker 36 | 37 | nvtx.range_push("layer:Conv1") # Nested NVTX push/pop markers 38 | out = self.conv1(x) 39 | nvtx.range_pop() 40 | 41 | nvtx.range_push("layer:BN1") # Use the “layer:” prefix 42 | out = self.bn1(out) 43 | nvtx.range_pop() 44 | 45 | nvtx.range_push("layer:ReLU") 46 | out = self.relu(out) 47 | nvtx.range_pop() 48 | 49 | ... 50 | 51 | nvtx.range_pop() # NVTX pop marker.return out 52 | 53 | .. _section-custom-function: 54 | 55 | Custom Function 56 | --------------- 57 | 58 | The following is example of how to enable Torch Autograd to profile a custom 59 | function. :: 60 | 61 | # examples/custom_func_module/custom_function.py 62 | 63 | import torch 64 | import pyprof 65 | pyprof.init() 66 | 67 | class Foo(torch.autograd.Function): 68 | @staticmethoddef forward(ctx, in1, in2): 69 | out = in1 + in2 # This could be a custom C++ function 70 | return out 71 | @staticmethod 72 | def backward(ctx, grad): 73 | in1_grad, in2_grad = grad, grad # This could be a custom C++ function 74 | return in1_grad, in2_grad 75 | 76 | # Hook the forward and backward functions to pyprof 77 | pyprof.wrap(Foo, 'forward') 78 | pyprof.wrap(Foo, 'backward') 79 | 80 | .. _section-custom-module: 81 | 82 | Custom Module 83 | --------------- 84 | 85 | The following is example of how to enable Torch Autograd to profile a custom 86 | module. :: 87 | 88 | # examples/custom_func_module/custom_module.py 89 | 90 | import torch 91 | import pyprof 92 | pyprof.init() 93 | 94 | class Foo(torch.nn.Module): 95 | def __init__(self, size): 96 | super(Foo, self).__init__() 97 | self.n = torch.nn.Parameter(torch.ones(size)) 98 | self.m = torch.nn.Parameter(torch.ones(size)) 99 | 100 | def forward(self, input): 101 | return self.n*input + self.m # This could be a custom C++ function. 102 | 103 | # Hook the forward function to pyprof 104 | pyprof.wrap(Foo, 'forward') 105 | 106 | Extensibility 107 | ------------- 108 | 109 | * For custom functions and modules, users can add flops and bytes calculation 110 | 111 | * Python code is easy to extend - no need to recompile, no need to change the 112 | PyTorch backend and resolve merge conflicts on every version upgrade 113 | 114 | Actionable Items 115 | ---------------- 116 | 117 | The following list provides some common actionable items to consider when 118 | analyzing profile results and deciding on how best to improve the performance. 119 | For more customized and directed actionable items, consider using the `NVIDIA 120 | Deep Learning Profiler `_ 121 | that provide direct *Expert Systems* feedback based on the profile. 122 | 123 | * NvProf/ NsightSystems tell us what the hotspots are, but not if we can act on 124 | them. 125 | 126 | * If a kernel runs close to max perf based on FLOPs and bytes (and maximum FLOPs 127 | and bandwidth of the GPU), then there’s no point in optimizing it even if it’s 128 | a hotspot. 129 | 130 | * If the ideal timing based on FLOPs and bytes (max(compute_time, 131 | bandwidth_time)) is much shorter than the silicon time, there’s scope for 132 | improvement. 133 | 134 | * Tensor Core usage (conv): for Volta, convolutions should have the input 135 | channel count (C) and the output channel count (K) divisible by 8, in order to 136 | use tensor cores. For Turing, it’s optimal for C and K to be divisible by 16. 137 | 138 | * Tensor core usage (GEMM): M, N and K divisible by 8 (Volta) or 16 (Turing) (https://docs.nvidia.com/deeplearning/sdk/dl-performance-guide/index.html) 139 | -------------------------------------------------------------------------------- /docs/examples.rst: -------------------------------------------------------------------------------- 1 | .. 2 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | .. _section-examples: 17 | 18 | Examples 19 | ======== 20 | 21 | This section provides several real examples on how to profile with PyPRrof. 22 | 23 | Profile Lenet 24 | ------------- 25 | 26 | Navigate to the lenet example. :: 27 | 28 | $ cd pyprof/examples 29 | 30 | Run nsight systems to profile the network. :: 31 | 32 | $ nsys profile -f true -o lenet --export sqlite python lenet.py 33 | 34 | Parse the resulting lenet.sqlite database. :: 35 | 36 | $ python -m pyprof.parse lenet.sqlite > lenet.dict 37 | 38 | Run the prof script on the resulting dictionary. :: 39 | 40 | $ python -m pyprof.prof --csv lenet.dict > lenet.csv 41 | -------------------------------------------------------------------------------- /docs/faqs.rst: -------------------------------------------------------------------------------- 1 | .. 2 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | .. _section-faqs: 17 | 18 | PyProf FAQs 19 | =========== 20 | 21 | **How do I intercept the Adam optimizer in APEX?** :: 22 | 23 | import pyprof 24 | import fused_adam_cuda 25 | pyprof.nvtx.wrap(fused_adam_cuda, 'adam') 26 | 27 | **What is the correct initialization if you are using JIT and/or AMP?** 28 | 29 | #. Let any JIT to finish. 30 | #. Initlialize pyprof ``pyprof.init()``. 31 | #. Initialize AMP. 32 | 33 | **How do I profile with ``torch.distributed.launch``?** :: 34 | 35 | nvprof -f -o net%p.sql --profile-from-start off --profile-child-processes \ 36 | python -m torch.distributed.launch net.py 37 | -------------------------------------------------------------------------------- /docs/index.rst: -------------------------------------------------------------------------------- 1 | .. 2 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | NVIDIA PyProf - Pytorch Profiler 17 | ================================ 18 | 19 | .. include:: ../README.rst 20 | :start-after: overview-begin-marker-do-not-remove 21 | :end-before: overview-end-marker-do-not-remove 22 | 23 | .. toctree:: 24 | :hidden: 25 | 26 | Documentation home 27 | 28 | .. toctree:: 29 | :maxdepth: 2 30 | :caption: User Guide 31 | 32 | quickstart 33 | install 34 | profile 35 | advanced 36 | examples 37 | faqs -------------------------------------------------------------------------------- /docs/install.rst: -------------------------------------------------------------------------------- 1 | .. 2 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | .. _section-install: 17 | 18 | Installing PyProf 19 | ================= 20 | 21 | PyProf is available from GitHub. 22 | 23 | .. _section-installing-from-github: 24 | 25 | Installing from GitHub 26 | ---------------------- 27 | 28 | .. include:: ../README.rst 29 | :start-after: quick-install-start-marker-do-not-remove 30 | :end-before: quick-install-end-marker-do-not-remove 31 | 32 | .. _section-installing-from-ngc: 33 | 34 | Install from NGC Container 35 | -------------------------- 36 | 37 | PyProf is available in the PyTorch container on the `NVIDIA GPU Cloud (NGC) 38 | `_. 39 | 40 | Before you can pull a container from the NGC container registry, you 41 | must have Docker and nvidia-docker installed. For DGX users, this is 42 | explained in `Preparing to use NVIDIA Containers Getting Started Guide 43 | `_. 44 | For users other than DGX, follow the `nvidia-docker installation 45 | documentation `_ to install 46 | the most recent version of CUDA, Docker, and nvidia-docker. 47 | 48 | After performing the above setup, you can pull the PyProf container 49 | using the following command:: 50 | 51 | docker pull nvcr.io/nvidia/pytorch:20.12-py3 52 | 53 | Replace *20.12* with the version of PyTorch container that you want to pull. 54 | -------------------------------------------------------------------------------- /docs/quickstart.rst: -------------------------------------------------------------------------------- 1 | .. 2 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | .. _section-quickstart: 17 | 18 | Quickstart 19 | ========== 20 | 21 | PyProf is available in the following ways: 22 | 23 | * As :ref:`installable python code located in GitHub `. 24 | 25 | * As a pre-built Docker container available from the `NVIDIA GPU Cloud (NGC) 26 | `_. For more information, see :ref:`section-installing-from-ngc`. 27 | 28 | * As a buildable docker container. You can :ref:`build your 29 | own container using Docker ` 30 | 31 | .. _section-quickstart-prerequisites: 32 | 33 | Prerequisites 34 | ------------- 35 | 36 | * If you are installing directly from GitHub or building your own docker 37 | container, you will need to clone the PyProf GitHub repo. Go to 38 | https://github.com/NVIDIA/PyProf and then select the *clone* or *download* 39 | drop down button. After cloning the repo be sure to select the r 40 | release branch that corresponds to the version of PyProf want to use:: 41 | 42 | $ git checkout r20.12 43 | 44 | * If you are starting with a pre-built NGC container, you will need to install 45 | Docker and nvidia-docker. For DGX users, see `Preparing to use NVIDIA Containers 46 | `_. 47 | For users other than DGX, see the `nvidia-docker installation documentation 48 | `_. 49 | 50 | .. _section-quickstart-using-a-prebuilt-docker-container: 51 | 52 | Using a Prebuilt Docker Containers 53 | ---------------------------------- 54 | 55 | Use docker pull to get the PyTorch container from NGC:: 56 | 57 | $ docker pull nvcr.io/nvidia/pytorch:-py3 58 | 59 | Where is the version of PyProf that you want to pull. Once you have the 60 | container, you can run the container with the following command:: 61 | 62 | $ docker run --gpus=1 --rm --shm-size=1g --ulimit memlock=-1 --ulimit stack=67108864 -v/full/path/to/example/model/repository:/models 63 | 64 | Where is *nvcr.io/nvidia/pytorch:-py3*. 65 | 66 | .. _section-quickstart-building-with-docker: 67 | 68 | Building With Docker 69 | -------------------- 70 | 71 | Make sure you complete the step in 72 | :ref:`section-quickstart-prerequisites` before attempting to build the PyProf 73 | container. To build PyProf from source, change to the root directory of 74 | the GitHub repo and checkout the release version of the branch that 75 | you want to build (or the `main` branch if you want to build the 76 | under-development version):: 77 | 78 | $ git checkout r20.12 79 | 80 | Then use docker to build:: 81 | 82 | $ docker build --pull -t pyprof . 83 | 84 | After the build completes you can run the container with the following command:: 85 | 86 | $ docker run --gpus=1 --rm --shm-size=1g --ulimit memlock=-1 --ulimit stack=67108864 -v/full/path/to/example/model/repository:/models 87 | 88 | Where is *pyprof*. 89 | 90 | .. _section-quickstart-profile-with-pyprof: 91 | 92 | Profile with PyProf 93 | ------------------- 94 | 95 | .. include:: ../README.rst 96 | :start-after: quick-start-start-marker-do-not-remove 97 | :end-before: quick-start-end-marker-do-not-remove 98 | -------------------------------------------------------------------------------- /pyprof/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | 4 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 5 | # 6 | # Licensed under the Apache License, Version 2.0 (the "License"); 7 | # you may not use this file except in compliance with the License. 8 | # You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, software 13 | # distributed under the License is distributed on an "AS IS" BASIS, 14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | # See the License for the specific language governing permissions and 16 | # limitations under the License. 17 | 18 | import warnings 19 | 20 | from .nvtx.nvmarker import init 21 | from .nvtx.nvmarker import add_wrapper as wrap 22 | -------------------------------------------------------------------------------- /pyprof/examples/.gitignore: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | __pycache__ 16 | *.sql 17 | *.dict 18 | *.csv 19 | -------------------------------------------------------------------------------- /pyprof/examples/custom_func_module/README.md: -------------------------------------------------------------------------------- 1 | 16 | 17 | This directory has examples which show how to intercept (monkey patch) custom 18 | functions and modules with `pyprof`. No changes are required in `pyprof/parse`, 19 | however, users can add support for bytes and flops calculation for custom 20 | functions and modules in `pyprof/prof` by extending the `OperatorLayerBase` 21 | class. 22 | -------------------------------------------------------------------------------- /pyprof/examples/custom_func_module/custom_function.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | 4 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 5 | # 6 | # Licensed under the Apache License, Version 2.0 (the "License"); 7 | # you may not use this file except in compliance with the License. 8 | # You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, software 13 | # distributed under the License is distributed on an "AS IS" BASIS, 14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | # See the License for the specific language governing permissions and 16 | # limitations under the License. 17 | 18 | import torch 19 | import torch.cuda.profiler as profiler 20 | import pyprof 21 | #Initialize pyprof 22 | pyprof.init() 23 | 24 | 25 | class Foo(torch.autograd.Function): 26 | 27 | @staticmethod 28 | def forward(ctx, in1, in2): 29 | out = in1 + in2 #This could be a custom C/C++ function. 30 | return out 31 | 32 | @staticmethod 33 | def backward(ctx, grad): 34 | in1_grad = grad #This could be a custom C/C++ function. 35 | in2_grad = grad #This could be a custom C/C++ function. 36 | return in1_grad, in2_grad 37 | 38 | 39 | #Hook the forward and backward functions to pyprof 40 | pyprof.nvtx.wrap(Foo, 'forward') 41 | pyprof.nvtx.wrap(Foo, 'backward') 42 | 43 | foo = Foo.apply 44 | 45 | x = torch.ones(4, 4).cuda() 46 | y = torch.ones(4, 4).cuda() 47 | 48 | with torch.autograd.profiler.emit_nvtx(): 49 | profiler.start() 50 | z = foo(x, y) 51 | profiler.stop() 52 | -------------------------------------------------------------------------------- /pyprof/examples/custom_func_module/custom_module.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | 4 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 5 | # 6 | # Licensed under the Apache License, Version 2.0 (the "License"); 7 | # you may not use this file except in compliance with the License. 8 | # You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, software 13 | # distributed under the License is distributed on an "AS IS" BASIS, 14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | # See the License for the specific language governing permissions and 16 | # limitations under the License. 17 | 18 | import torch 19 | import torch.cuda.profiler as profiler 20 | import pyprof 21 | pyprof.init() 22 | 23 | 24 | class Foo(torch.nn.Module): 25 | 26 | def __init__(self, size): 27 | super(Foo, self).__init__() 28 | self.n = torch.nn.Parameter(torch.ones(size)) 29 | self.m = torch.nn.Parameter(torch.ones(size)) 30 | 31 | def forward(self, input): 32 | return self.n * input + self.m 33 | 34 | 35 | # Hook the forward function to pyprof 36 | pyprof.nvtx.wrap(Foo, 'forward') 37 | 38 | foo = Foo(4) 39 | foo.cuda() 40 | x = torch.ones(4).cuda() 41 | 42 | with torch.autograd.profiler.emit_nvtx(): 43 | profiler.start() 44 | z = foo(x) 45 | profiler.stop() 46 | -------------------------------------------------------------------------------- /pyprof/examples/custom_func_module/test.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | set -e 17 | 18 | SCRIPT=`realpath $0` 19 | SCRIPTPATH=`dirname $SCRIPT` 20 | PYPROF="$SCRIPTPATH/../.." 21 | 22 | parse="python $PYPROF/parse/parse.py" 23 | prof="python $PYPROF/prof/prof.py" 24 | 25 | for f in *.py 26 | do 27 | base=`basename $f .py` 28 | sql=$base.sql 29 | dict=$base.dict 30 | 31 | #NVprof 32 | echo "nvprof -fo $sql python $f" 33 | nvprof -fo $sql python $f 34 | 35 | #Parse 36 | echo $parse $sql 37 | $parse $sql > $dict 38 | 39 | #Prof 40 | echo $prof $dict 41 | $prof -w 130 $dict 42 | \rm $sql $dict 43 | done 44 | -------------------------------------------------------------------------------- /pyprof/examples/imagenet/imagenet.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | 4 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 5 | # 6 | # Licensed under the Apache License, Version 2.0 (the "License"); 7 | # you may not use this file except in compliance with the License. 8 | # You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, software 13 | # distributed under the License is distributed on an "AS IS" BASIS, 14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | # See the License for the specific language governing permissions and 16 | # limitations under the License. 17 | """ 18 | Example to run pyprof with imagenet models. 19 | """ 20 | 21 | import sys 22 | import torch 23 | import torch.nn as nn 24 | import torchvision.models as models 25 | import torch.cuda.profiler as profiler 26 | import argparse 27 | 28 | import pyprof 29 | from apex.optimizers import FusedAdam 30 | 31 | 32 | def parseArgs(): 33 | parser = argparse.ArgumentParser(prog=sys.argv[0], description="Run popular imagenet models.") 34 | 35 | parser.add_argument( 36 | "-m", type=str, default="resnet50", choices=[ 37 | "alexnet", "densenet121", "densenet161", "densenet169", "densenet201", "googlenet", "mnasnet0_5", 38 | "mnasnet0_75", "mnasnet1_0", "mnasnet1_3", "mobilenet_v2", "resnet18", "resnet34", "resnet50", "resnet101", 39 | "resnet152", "resnext50_32x4d", "resnext101_32x8d", "wide_resnet50_2", "wide_resnet101_2", 40 | "shufflenet_v2_x0_5", "shufflenet_v2_x1_0", "shufflenet_v2_x1_5", "shufflenet_v2_x2_0", "squeezenet1_0", 41 | "squeezenet1_1", "vgg11", "vgg11_bn", "vgg13", "vgg13_bn", "vgg16", "vgg16_bn", "vgg19", "vgg19_bn", 42 | "inception_v3" 43 | ], help="Model." 44 | ) 45 | 46 | parser.add_argument("-b", type=int, default=32, help="Batch size.") 47 | 48 | parser.add_argument("-o", type=str, default="adam", choices=["adam", "sgd"], help="Optimizer.") 49 | 50 | args = parser.parse_args() 51 | return args 52 | 53 | 54 | d = { 55 | "alexnet": { 56 | 'H': 224, 57 | 'W': 224, 58 | 'opts': {} 59 | }, 60 | "densenet121": { 61 | 'H': 224, 62 | 'W': 224, 63 | 'opts': {} 64 | }, 65 | "densenet161": { 66 | 'H': 224, 67 | 'W': 224, 68 | 'opts': {} 69 | }, 70 | "densenet169": { 71 | 'H': 224, 72 | 'W': 224, 73 | 'opts': {} 74 | }, 75 | "densenet201": { 76 | 'H': 224, 77 | 'W': 224, 78 | 'opts': {} 79 | }, 80 | "googlenet": { 81 | 'H': 224, 82 | 'W': 224, 83 | 'opts': { 84 | 'aux_logits': False 85 | } 86 | }, 87 | "mnasnet0_5": { 88 | 'H': 224, 89 | 'W': 224, 90 | 'opts': {} 91 | }, 92 | "mnasnet0_75": { 93 | 'H': 224, 94 | 'W': 224, 95 | 'opts': {} 96 | }, 97 | "mnasnet1_0": { 98 | 'H': 224, 99 | 'W': 224, 100 | 'opts': {} 101 | }, 102 | "mnasnet1_3": { 103 | 'H': 224, 104 | 'W': 224, 105 | 'opts': {} 106 | }, 107 | "mobilenet_v2": { 108 | 'H': 224, 109 | 'W': 224, 110 | 'opts': {} 111 | }, 112 | "resnet18": { 113 | 'H': 224, 114 | 'W': 224, 115 | 'opts': {} 116 | }, 117 | "resnet34": { 118 | 'H': 224, 119 | 'W': 224, 120 | 'opts': {} 121 | }, 122 | "resnet50": { 123 | 'H': 224, 124 | 'W': 224, 125 | 'opts': {} 126 | }, 127 | "resnet101": { 128 | 'H': 224, 129 | 'W': 224, 130 | 'opts': {} 131 | }, 132 | "resnet152": { 133 | 'H': 224, 134 | 'W': 224, 135 | 'opts': {} 136 | }, 137 | "resnext50_32x4d": { 138 | 'H': 224, 139 | 'W': 224, 140 | 'opts': {} 141 | }, 142 | "resnext101_32x8d": { 143 | 'H': 224, 144 | 'W': 224, 145 | 'opts': {} 146 | }, 147 | "wide_resnet50_2": { 148 | 'H': 224, 149 | 'W': 224, 150 | 'opts': {} 151 | }, 152 | "wide_resnet101_2": { 153 | 'H': 224, 154 | 'W': 224, 155 | 'opts': {} 156 | }, 157 | "shufflenet_v2_x0_5": { 158 | 'H': 224, 159 | 'W': 224, 160 | 'opts': {} 161 | }, 162 | "shufflenet_v2_x1_0": { 163 | 'H': 224, 164 | 'W': 224, 165 | 'opts': {} 166 | }, 167 | "shufflenet_v2_x1_5": { 168 | 'H': 224, 169 | 'W': 224, 170 | 'opts': {} 171 | }, 172 | "shufflenet_v2_x2_0": { 173 | 'H': 224, 174 | 'W': 224, 175 | 'opts': {} 176 | }, 177 | "squeezenet1_0": { 178 | 'H': 224, 179 | 'W': 224, 180 | 'opts': {} 181 | }, 182 | "squeezenet1_1": { 183 | 'H': 224, 184 | 'W': 224, 185 | 'opts': {} 186 | }, 187 | "vgg11": { 188 | 'H': 224, 189 | 'W': 224, 190 | 'opts': {} 191 | }, 192 | "vgg11_bn": { 193 | 'H': 224, 194 | 'W': 224, 195 | 'opts': {} 196 | }, 197 | "vgg13": { 198 | 'H': 224, 199 | 'W': 224, 200 | 'opts': {} 201 | }, 202 | "vgg13_bn": { 203 | 'H': 224, 204 | 'W': 224, 205 | 'opts': {} 206 | }, 207 | "vgg16": { 208 | 'H': 224, 209 | 'W': 224, 210 | 'opts': {} 211 | }, 212 | "vgg16_bn": { 213 | 'H': 224, 214 | 'W': 224, 215 | 'opts': {} 216 | }, 217 | "vgg19": { 218 | 'H': 224, 219 | 'W': 224, 220 | 'opts': {} 221 | }, 222 | "vgg19_bn": { 223 | 'H': 224, 224 | 'W': 224, 225 | 'opts': {} 226 | }, 227 | "inception_v3": { 228 | 'H': 299, 229 | 'W': 299, 230 | 'opts': { 231 | 'aux_logits': False 232 | } 233 | }, 234 | } 235 | 236 | 237 | def main(): 238 | args = parseArgs() 239 | 240 | pyprof.init() 241 | 242 | N = args.b 243 | C = 3 244 | H = d[args.m]['H'] 245 | W = d[args.m]['W'] 246 | opts = d[args.m]['opts'] 247 | classes = 1000 248 | 249 | net = getattr(models, args.m) 250 | net = net(**opts).cuda().half() 251 | net.train() 252 | 253 | x = torch.rand(N, C, H, W).cuda().half() 254 | target = torch.empty(N, dtype=torch.long).random_(classes).cuda() 255 | 256 | criterion = nn.CrossEntropyLoss().cuda() 257 | if (args.o == "sgd"): 258 | optimizer = torch.optim.SGD(net.parameters(), lr=0.01, momentum=0.9) 259 | elif (args.o == "adam"): 260 | optimizer = FusedAdam(net.parameters()) 261 | else: 262 | assert False 263 | 264 | #Warm up without profiler 265 | for i in range(2): 266 | output = net(x) 267 | loss = criterion(output, target) 268 | optimizer.zero_grad() 269 | loss.backward() 270 | optimizer.step() 271 | 272 | with torch.autograd.profiler.emit_nvtx(): 273 | profiler.start() 274 | output = net(x) 275 | loss = criterion(output, target) 276 | optimizer.zero_grad() 277 | loss.backward() 278 | optimizer.step() 279 | profiler.stop() 280 | 281 | 282 | if __name__ == "__main__": 283 | main() 284 | -------------------------------------------------------------------------------- /pyprof/examples/imagenet/test.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | set -e 17 | 18 | SCRIPT=`realpath $0` 19 | SCRIPTPATH=`dirname $SCRIPT` 20 | PYPROF="$SCRIPTPATH/../.." 21 | 22 | parse="python $PYPROF/parse/parse.py" 23 | prof="python $PYPROF/prof/prof.py" 24 | 25 | for net in "resnet50" 26 | do 27 | for optim in adam sgd 28 | do 29 | for batch in 32 64 30 | do 31 | base="torchvision".$net.$optim.$batch 32 | sql=$base.sql 33 | dict=$base.dict 34 | 35 | #NVprof 36 | echo "nvprof -fo $sql --profile-from-start off python imagenet.py -m ${net} -o $optim -b $batch" 37 | sudo env "PATH=$PATH" nvprof -fo $sql --profile-from-start off python imagenet.py -m ${net} -o $optim -b $batch 38 | 39 | #Parse 40 | echo $parse $sql 41 | $parse $sql > $dict 42 | 43 | #Prof 44 | echo $prof $dict 45 | $prof -w 130 $dict 46 | # \rm $sql $dict 47 | done 48 | done 49 | done 50 | -------------------------------------------------------------------------------- /pyprof/examples/jit/README.md: -------------------------------------------------------------------------------- 1 | 16 | 17 | *As of this writing, these examples do not work 18 | because of changes being proposed in PyTorch.* 19 | 20 | There are two ways to use PyTorch JIT 21 | - Scripting 22 | - Tracing 23 | 24 | In addition, we can JIT a 25 | - Stand alone function 26 | - Class / class method 27 | 28 | This directory has an example for each of the 4 cases. 29 | Intercepting (monkey patching) JITted code has a few extra steps, 30 | which are explained through comments. 31 | -------------------------------------------------------------------------------- /pyprof/examples/jit/jit_script_function.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | 4 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 5 | # 6 | # Licensed under the Apache License, Version 2.0 (the "License"); 7 | # you may not use this file except in compliance with the License. 8 | # You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, software 13 | # distributed under the License is distributed on an "AS IS" BASIS, 14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | # See the License for the specific language governing permissions and 16 | # limitations under the License. 17 | 18 | import torch 19 | import torch.cuda.profiler as profiler 20 | import pyprof 21 | 22 | #The following creates an object "foo" of type ScriptModule 23 | #The new object has a function called "forward" 24 | 25 | 26 | @torch.jit.script 27 | def foo(x, y): 28 | return torch.sigmoid(x) + y 29 | 30 | 31 | #Initialize pyprof after the JIT step 32 | pyprof.init() 33 | 34 | #Assign a name to the object "foo" 35 | foo.__name__ = "foo" 36 | 37 | #Hook up the forward function to pyprof 38 | pyprof.nvtx.wrap(foo, 'forward') 39 | 40 | x = torch.zeros(4, 4).cuda() 41 | y = torch.ones(4, 4).cuda() 42 | 43 | with torch.autograd.profiler.emit_nvtx(): 44 | profiler.start() 45 | z = foo(x, y) 46 | profiler.stop() 47 | print(z) 48 | -------------------------------------------------------------------------------- /pyprof/examples/jit/jit_script_method.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | 4 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 5 | # 6 | # Licensed under the Apache License, Version 2.0 (the "License"); 7 | # you may not use this file except in compliance with the License. 8 | # You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, software 13 | # distributed under the License is distributed on an "AS IS" BASIS, 14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | # See the License for the specific language governing permissions and 16 | # limitations under the License. 17 | 18 | import torch 19 | import torch.cuda.profiler as profiler 20 | import pyprof 21 | 22 | 23 | class Foo(torch.jit.ScriptModule): 24 | 25 | def __init__(self, size): 26 | super(Foo, self).__init__() 27 | self.n = torch.nn.Parameter(torch.ones(size)) 28 | self.m = torch.nn.Parameter(torch.ones(size)) 29 | 30 | @torch.jit.script_method 31 | def forward(self, input): 32 | return self.n * input + self.m 33 | 34 | 35 | #Initialize pyprof after the JIT step 36 | pyprof.init() 37 | 38 | #Hook up the forward function to pyprof 39 | pyprof.nvtx.wrap(Foo, 'forward') 40 | 41 | foo = Foo(4) 42 | foo.cuda() 43 | x = torch.ones(4).cuda() 44 | 45 | with torch.autograd.profiler.emit_nvtx(): 46 | profiler.start() 47 | z = foo(x) 48 | profiler.stop() 49 | print(z) 50 | -------------------------------------------------------------------------------- /pyprof/examples/jit/jit_trace_function.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | 4 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 5 | # 6 | # Licensed under the Apache License, Version 2.0 (the "License"); 7 | # you may not use this file except in compliance with the License. 8 | # You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, software 13 | # distributed under the License is distributed on an "AS IS" BASIS, 14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | # See the License for the specific language governing permissions and 16 | # limitations under the License. 17 | 18 | import torch 19 | import torch.cuda.profiler as profiler 20 | import pyprof 21 | 22 | 23 | def foo(x, y): 24 | return torch.sigmoid(x) + y 25 | 26 | 27 | x = torch.zeros(4, 4).cuda() 28 | y = torch.ones(4, 4).cuda() 29 | 30 | #JIT the function using tracing 31 | #This returns an object of type ScriptModule with a forward method. 32 | traced_foo = torch.jit.trace(foo, (x, y)) 33 | 34 | #Initialize pyprof after the JIT step 35 | pyprof.init() 36 | 37 | #Assign a name to the object "traced_foo" 38 | traced_foo.__dict__['__name__'] = "foo" 39 | 40 | #Hook up the forward function to pyprof 41 | pyprof.nvtx.wrap(traced_foo, 'forward') 42 | 43 | with torch.autograd.profiler.emit_nvtx(): 44 | profiler.start() 45 | z = traced_foo(x, y) 46 | profiler.stop() 47 | print(z) 48 | -------------------------------------------------------------------------------- /pyprof/examples/jit/jit_trace_method.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | 4 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 5 | # 6 | # Licensed under the Apache License, Version 2.0 (the "License"); 7 | # you may not use this file except in compliance with the License. 8 | # You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, software 13 | # distributed under the License is distributed on an "AS IS" BASIS, 14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | # See the License for the specific language governing permissions and 16 | # limitations under the License. 17 | 18 | import torch 19 | import torch.cuda.profiler as profiler 20 | import pyprof 21 | 22 | 23 | class Foo(torch.nn.Module): 24 | 25 | def __init__(self, size): 26 | super(Foo, self).__init__() 27 | self.n = torch.nn.Parameter(torch.ones(size)) 28 | self.m = torch.nn.Parameter(torch.ones(size)) 29 | 30 | def forward(self, input): 31 | return self.n * input + self.m 32 | 33 | 34 | foo = Foo(4) 35 | foo.cuda() 36 | x = torch.ones(4).cuda() 37 | 38 | #JIT the class using tracing 39 | traced_foo = torch.jit.trace(foo, x) 40 | 41 | #Initialize pyprof after the JIT step 42 | pyprof.init() 43 | 44 | #Assign a name to the object "traced_foo" 45 | traced_foo.__dict__['__name__'] = "foo" 46 | 47 | #Hook up the forward function to pyprof 48 | pyprof.nvtx.wrap(traced_foo, 'forward') 49 | 50 | with torch.autograd.profiler.emit_nvtx(): 51 | profiler.start() 52 | z = traced_foo(x) 53 | profiler.stop() 54 | print(z) 55 | -------------------------------------------------------------------------------- /pyprof/examples/jit/test.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | set -e 17 | 18 | SCRIPT=`realpath $0` 19 | SCRIPTPATH=`dirname $SCRIPT` 20 | PYPROF="$SCRIPTPATH/../.." 21 | 22 | parse="python $PYPROF/parse/parse.py" 23 | prof="python $PYPROF/prof/prof.py" 24 | 25 | for f in *.py 26 | do 27 | base=`basename $f .py` 28 | sql=$base.sql 29 | dict=$base.dict 30 | 31 | #NVprof 32 | echo "nvprof -fo $sql python $f" 33 | nvprof -fo $sql python $f 34 | 35 | #Parse 36 | echo $parse $sql 37 | $parse $sql > $dict 38 | 39 | #Prof 40 | echo $prof $dict 41 | $prof -w 130 $dict 42 | \rm $sql $dict 43 | done 44 | -------------------------------------------------------------------------------- /pyprof/examples/lenet.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | 4 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 5 | # 6 | # Licensed under the Apache License, Version 2.0 (the "License"); 7 | # you may not use this file except in compliance with the License. 8 | # You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, software 13 | # distributed under the License is distributed on an "AS IS" BASIS, 14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | # See the License for the specific language governing permissions and 16 | # limitations under the License. 17 | 18 | import torch 19 | import torch.nn as nn 20 | import torch.nn.functional as F 21 | import torch.cuda.profiler as profiler 22 | import torch.optim as optim 23 | 24 | import pyprof 25 | pyprof.init() 26 | 27 | 28 | class LeNet5(nn.Module): 29 | 30 | def __init__(self): 31 | super(LeNet5, self).__init__() 32 | # 1 input image channel, 6 output channels, 5x5 square convolution 33 | # kernel 34 | self.conv1 = nn.Conv2d(1, 6, 5) 35 | self.conv2 = nn.Conv2d(6, 16, 5) 36 | # an affine operation: y = Wx + b 37 | self.fc1 = nn.Linear(16 * 5 * 5, 120) 38 | self.fc2 = nn.Linear(120, 84) 39 | self.fc3 = nn.Linear(84, 10) 40 | 41 | def forward(self, x): 42 | # Max pooling over a (2, 2) window 43 | x = F.max_pool2d(F.relu(self.conv1(x)), (2, 2)) 44 | # If the size is a square you can only specify a single number 45 | x = F.max_pool2d(F.relu(self.conv2(x)), 2) 46 | x = x.view(-1, self.num_flat_features(x)) 47 | x = F.relu(self.fc1(x)) 48 | x = F.relu(self.fc2(x)) 49 | x = self.fc3(x) 50 | return x 51 | 52 | def num_flat_features(self, x): 53 | size = x.size()[1:] # all dimensions except the batch dimension 54 | num_features = 1 55 | for s in size: 56 | num_features *= s 57 | return num_features 58 | 59 | 60 | with torch.autograd.profiler.emit_nvtx(): 61 | 62 | net = LeNet5().cuda() 63 | 64 | input = torch.randn(1, 1, 32, 32).cuda() 65 | out = net(input) 66 | 67 | target = torch.randn(10) # a dummy target, for example 68 | target = target.view(1, -1).cuda() # make it the same shape as output 69 | criterion = nn.MSELoss() 70 | 71 | # create your optimizer 72 | optimizer = optim.SGD(net.parameters(), lr=0.01) 73 | 74 | # in your training loop: 75 | optimizer.zero_grad() # zero the gradient buffers 76 | 77 | profiler.start() 78 | output = net(input) 79 | loss = criterion(output, target) 80 | loss.backward() 81 | optimizer.step() # Does the update 82 | profiler.stop() 83 | -------------------------------------------------------------------------------- /pyprof/examples/operators.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | 4 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 5 | # 6 | # Licensed under the Apache License, Version 2.0 (the "License"); 7 | # you may not use this file except in compliance with the License. 8 | # You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, software 13 | # distributed under the License is distributed on an "AS IS" BASIS, 14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | # See the License for the specific language governing permissions and 16 | # limitations under the License. 17 | """ 18 | This file checks all Python operators. 19 | """ 20 | 21 | import sys 22 | import torch 23 | import torch.cuda.profiler as profiler 24 | import operator 25 | import inspect 26 | 27 | #Import and initialize pyprof 28 | import pyprof 29 | pyprof.init() 30 | 31 | X = 1024 32 | Y = 1024 33 | 34 | fa = torch.rand(X, Y).cuda() 35 | fb = torch.rand(X, Y).cuda() 36 | fc = torch.rand(X, Y).cuda() 37 | 38 | ia = torch.randint(0, 100, (X, Y)).cuda() 39 | ib = torch.randint(0, 100, (X, Y)).cuda() 40 | 41 | sa = torch.ones(1, 1).cuda() 42 | sb = torch.ones(1, 1).cuda() 43 | 44 | ba = fa.byte() 45 | 46 | unaryOps = [ 47 | "abs", 48 | "__abs__", 49 | "neg", 50 | "__neg__", 51 | ] 52 | invertOps = [ 53 | "inv", 54 | "invert", 55 | "__inv__", 56 | "__invert__", 57 | ] #imlemented only for byte tensors 58 | #pos, __pos__ is not implemented for tensors 59 | 60 | binaryOps = [] 61 | binaryOps += ["lt", "__lt__", "le", "__le__", "eq", "__eq__", "ne", "__ne__", "ge", "__ge__", "gt", "__gt__"] 62 | binaryOps += [ 63 | "add", "__add__", "sub", "__sub__", "mul", "__mul__", "floordiv", "__floordiv__", "truediv", "__truediv__", "pow", 64 | "__pow__", "mod", "__mod__" 65 | ] 66 | binaryOps += ["and_", "__and__", "or_", "__or__", "xor", "__xor__", "lshift", "__lshift__", "rshift", "__rshift__"] 67 | 68 | inplaceOps = [] 69 | inplaceOps += [ 70 | "iadd", 71 | "__iadd__", 72 | "isub", 73 | "__isub__", 74 | "imul", 75 | "__imul__", 76 | "ifloordiv", 77 | "__ifloordiv__", 78 | "itruediv", 79 | "__itruediv__", 80 | "imod", 81 | "__imod__", 82 | ] 83 | #ipow, __ipow__ is not implemented in pytorch 84 | inplaceOps += [ 85 | "iand", 86 | "__iand__", 87 | "ior", 88 | "__ior__", 89 | "ixor", 90 | "__ixor__", 91 | "ilshift", 92 | "__ilshift__", 93 | "irshift", 94 | "__irshift__", 95 | ] 96 | 97 | matmulOps = ["matmul", "__matmul__"] 98 | inplacematmulOps = ["imatmul", "__imatmul__"] 99 | 100 | reverseIntBinaryOps = [ 101 | "__radd__", 102 | "__rsub__", 103 | "__rmul__", 104 | "__rfloordiv__", 105 | "__rpow__", 106 | ] 107 | reverseFloatBinaryOps = [ 108 | "__radd__", 109 | "__rsub__", 110 | "__rmul__", 111 | "__rdiv__", 112 | "__rtruediv__", 113 | "__rfloordiv__", 114 | "__rpow__", 115 | ] 116 | ''' 117 | TODO 118 | .concat(a, b) 119 | .__concat__(a, b) 120 | .contains(a, b) 121 | .__contains__(a, b) 122 | .countOf(a, b) 123 | .delitem(a, b) 124 | .__delitem__(a, b) 125 | .getitem(a, b) 126 | .__getitem__(a, b) 127 | .indexOf(a, b) 128 | .setitem(a, b, c) 129 | .__setitem__(a, b, c) 130 | .length_hint(obj, default=0) 131 | .iconcat(a, b) 132 | .__iconcat__(a, b) 133 | .index(a) 134 | .__index__(a) 135 | ''' 136 | 137 | #Context manager 138 | with torch.autograd.profiler.emit_nvtx(): 139 | 140 | #Start profiler 141 | profiler.start() 142 | 143 | for op in unaryOps: 144 | assert hasattr(operator, op) 145 | f = getattr(operator, op) 146 | assert inspect.isbuiltin(f) 147 | c = f(ia) 148 | 149 | for op in invertOps: 150 | assert hasattr(operator, op) 151 | f = getattr(operator, op) 152 | assert inspect.isbuiltin(f) 153 | c = f(ba) 154 | 155 | for op in binaryOps: 156 | assert hasattr(operator, op) 157 | f = getattr(operator, op) 158 | assert inspect.isbuiltin(f) 159 | c = f(ia, ib) 160 | c = f(ia, 2) 161 | 162 | for op in inplaceOps: 163 | assert hasattr(operator, op) 164 | f = getattr(operator, op) 165 | assert inspect.isbuiltin(f) 166 | ia = f(ia, ib) 167 | ia = f(ia, 2) 168 | 169 | for op in matmulOps: 170 | assert hasattr(operator, op) 171 | f = getattr(operator, op) 172 | assert inspect.isbuiltin(f) 173 | c = f(fa, fb) 174 | 175 | for op in inplacematmulOps: 176 | assert hasattr(operator, op) 177 | f = getattr(operator, op) 178 | assert inspect.isbuiltin(f) 179 | fa = f(fa, fb) 180 | 181 | for op in reverseIntBinaryOps: 182 | assert hasattr(torch.Tensor, op) 183 | f = getattr(torch.Tensor, op) 184 | ia = f(ia, ib) 185 | 186 | for op in reverseFloatBinaryOps: 187 | assert hasattr(torch.Tensor, op) 188 | f = getattr(torch.Tensor, op) 189 | fa = f(fa, fb) 190 | ''' 191 | #c = fa[3] 192 | #c = fa[3][3] 193 | #c = torch.min(fa, 3) 194 | c = torch.sum(fa) 195 | c = torch.max(fa) 196 | c = -fa 197 | #fc[2][2] = fa[2][2] 198 | 199 | c = a_scalar and b_scalar 200 | c = a_scalar or b_scalar 201 | c = not a_scalar 202 | 203 | c = a is b 204 | c = a is not b 205 | ''' 206 | 207 | #Stop profiler 208 | profiler.stop() 209 | -------------------------------------------------------------------------------- /pyprof/examples/simple.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | 4 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 5 | # 6 | # Licensed under the Apache License, Version 2.0 (the "License"); 7 | # you may not use this file except in compliance with the License. 8 | # You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, software 13 | # distributed under the License is distributed on an "AS IS" BASIS, 14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | # See the License for the specific language governing permissions and 16 | # limitations under the License. 17 | """ 18 | This simple file provides an example of how to 19 | - import the pyprof library and initialize it 20 | - use the emit_nvtx context manager 21 | - start and stop the profiler 22 | 23 | Only kernels within profiler.start and profiler.stop calls are profiled. 24 | To profile 25 | $ nvprof -f -o simple.sql --profile-from-start off ./simple.py 26 | """ 27 | 28 | import sys 29 | import torch 30 | import torch.cuda.profiler as profiler 31 | 32 | #Import and initialize pyprof 33 | import pyprof 34 | pyprof.init() 35 | 36 | a = torch.randn(5, 5).cuda() 37 | b = torch.randn(5, 5).cuda() 38 | 39 | #Context manager 40 | with torch.autograd.profiler.emit_nvtx(): 41 | 42 | #Start profiler 43 | profiler.start() 44 | 45 | c = a + b 46 | c = torch.mul(a, b) 47 | c = torch.matmul(a, b) 48 | c = torch.argmax(a, dim=1) 49 | c = torch.nn.functional.pad(a, (1, 1)) 50 | 51 | #Stop profiler 52 | profiler.stop() 53 | -------------------------------------------------------------------------------- /pyprof/examples/user_annotation/README.md: -------------------------------------------------------------------------------- 1 | 16 | 17 | Nvidia NVTX range markers (https://docs.nvidia.com/gameworks/content/gameworkslibrary/nvtx/nvidia_tools_extension_library_nvtx.htm) 18 | are a useful tool to capture and observe events and code ranges etc. 19 | Using PyTorch APIs e.g, `torch.cuda.nvtx.range_push("xxx")` and `torch.cuda.nvtx.range_pop()` users can easily add their own NVTX range markers. These markers can then be observed in the Nvidia Visual Profiler (NVVP). 20 | 21 | While inserting NVTX markers (strings), if the users follow a specific string pattern `"layer:your_string_here"` e.g. `"layer:conv1"` or `"layer:encoder_layer_3_self_attention`, then `pyprof` will display the strings `conv1` and `encoder_layer_3_self_attention` next to the associated kernels in the output of `prof.py` when used with the `-c layer` option. 22 | 23 | NVTX range markers can be nested and if users follow the above string pattern, the output of `prof.py` will show all the markers associated with a kernel. 24 | 25 | The file `resnet.py` (a simplified version of the torchvision model) shows an example of how users can add (nested) NVTX markers with information which can greatly aid in understanding and analysis of networks. 26 | 27 | Note that the pattern `"layer:your_string_here"` was chosen to aid information extraction by `pyprof`. The tool will work seamlessly even if there are other markers or no markers at all. 28 | 29 | ### To run 30 | 31 | ```sh 32 | nvprof -fo resnet.sql --profile-from-start off python resnet.py 33 | parse.py resnet.sql > resnet.dict 34 | prof.py --csv -c idx,layer,dir,mod,op,kernel,params,sil resnet.dict 35 | ``` 36 | 37 | The file `resnet.sql` can also be opened with NVVP as usual. 38 | -------------------------------------------------------------------------------- /pyprof/examples/user_annotation/test.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | set -e 17 | 18 | SCRIPT=`realpath $0` 19 | SCRIPTPATH=`dirname $SCRIPT` 20 | PYPROF="$SCRIPTPATH/../.." 21 | 22 | parse="python $PYPROF/parse/parse.py" 23 | prof="python $PYPROF/prof/prof.py" 24 | 25 | for f in *.py 26 | do 27 | base=`basename $f .py` 28 | sql=$base.sql 29 | dict=$base.dict 30 | 31 | #NVprof 32 | echo "nvprof -fo --profile-from-start off $sql python $f" 33 | nvprof -fo $sql --profile-from-start off python $f 34 | 35 | #Parse 36 | echo $parse $sql 37 | $parse $sql > $dict 38 | 39 | #Prof 40 | echo $prof $dict 41 | #$prof -w 130 $dict 42 | $prof --csv -c idx,layer,dir,mod,op,kernel,params,sil $dict 43 | \rm $sql $dict 44 | done 45 | -------------------------------------------------------------------------------- /pyprof/nvtx/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | 4 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 5 | # 6 | # Licensed under the Apache License, Version 2.0 (the "License"); 7 | # you may not use this file except in compliance with the License. 8 | # You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, software 13 | # distributed under the License is distributed on an "AS IS" BASIS, 14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | # See the License for the specific language governing permissions and 16 | # limitations under the License. 17 | 18 | from .nvmarker import init 19 | from .nvmarker import add_wrapper as wrap 20 | -------------------------------------------------------------------------------- /pyprof/parse/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | 4 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 5 | # 6 | # Licensed under the Apache License, Version 2.0 (the "License"); 7 | # you may not use this file except in compliance with the License. 8 | # You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, software 13 | # distributed under the License is distributed on an "AS IS" BASIS, 14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | # See the License for the specific language governing permissions and 16 | # limitations under the License. 17 | -------------------------------------------------------------------------------- /pyprof/parse/__main__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | 4 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 5 | # 6 | # Licensed under the Apache License, Version 2.0 (the "License"); 7 | # you may not use this file except in compliance with the License. 8 | # You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, software 13 | # distributed under the License is distributed on an "AS IS" BASIS, 14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | # See the License for the specific language governing permissions and 16 | # limitations under the License. 17 | 18 | from .parse import main 19 | 20 | if __name__ == '__main__': 21 | main() 22 | -------------------------------------------------------------------------------- /pyprof/parse/db.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | 4 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 5 | # 6 | # Licensed under the Apache License, Version 2.0 (the "License"); 7 | # you may not use this file except in compliance with the License. 8 | # You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, software 13 | # distributed under the License is distributed on an "AS IS" BASIS, 14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | # See the License for the specific language governing permissions and 16 | # limitations under the License. 17 | 18 | import sys, sqlite3 19 | 20 | 21 | class DB(object): 22 | """ 23 | This class provides functions for DB operations 24 | with exception handling. 25 | """ 26 | 27 | def __init__(self, dbFile): 28 | try: 29 | conn = sqlite3.connect(dbFile) 30 | conn.row_factory = sqlite3.Row 31 | c = conn.cursor() 32 | except: 33 | print("Error opening {}".format(dbFile)) 34 | sys.exit(1) 35 | 36 | self.conn = conn 37 | self.c = c 38 | 39 | def select(self, cmd): 40 | try: 41 | self.c.execute(cmd) 42 | #rows = self.c.fetchall() 43 | rows = [dict(row) for row in self.c.fetchall()] 44 | except sqlite3.Error as e: 45 | print(e) 46 | sys.exit(1) 47 | except: 48 | print("Uncaught error in SQLite access while executing {}".format(cmd)) 49 | sys.exit(1) 50 | 51 | #print(rows) 52 | return rows 53 | 54 | def insert(self, cmd, data): 55 | try: 56 | self.c.execute(cmd, data) 57 | except sqlite3.Error as e: 58 | print(e) 59 | sys.exit(1) 60 | except: 61 | print("Uncaught error in SQLite access while executing {}".format(cmd)) 62 | sys.exit(1) 63 | 64 | def execute(self, cmd): 65 | try: 66 | self.c.execute(cmd) 67 | except sqlite3.Error as e: 68 | print(e) 69 | sys.exit(1) 70 | except: 71 | print("Uncaught error in SQLite access while executing {}".format(cmd)) 72 | sys.exit(1) 73 | 74 | def commit(self): 75 | self.conn.commit() 76 | 77 | def close(self): 78 | self.c.close() 79 | self.conn.close() 80 | -------------------------------------------------------------------------------- /pyprof/parse/kernel.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | 4 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 5 | # 6 | # Licensed under the Apache License, Version 2.0 (the "License"); 7 | # you may not use this file except in compliance with the License. 8 | # You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, software 13 | # distributed under the License is distributed on an "AS IS" BASIS, 14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | # See the License for the specific language governing permissions and 16 | # limitations under the License. 17 | 18 | import cxxfilt, struct, binascii 19 | 20 | #Helper functions 21 | 22 | 23 | def demangle(name): 24 | """ 25 | Demangle a C++ string 26 | """ 27 | result = name 28 | try: 29 | result = cxxfilt.demangle(name) 30 | except: 31 | pass 32 | return result 33 | 34 | 35 | def getShortName(name): 36 | """ 37 | Returns a shorter kernel name 38 | """ 39 | sname = name.split("<")[0] \ 40 | .replace("void ", "") \ 41 | .replace("at::","") \ 42 | .replace("cuda::", "") \ 43 | .replace("native::","") \ 44 | .replace("(anonymous namespace)::", "") 45 | sname = sname.split("(")[0] 46 | return sname 47 | 48 | 49 | class Kernel(object): 50 | """ 51 | This class stores information about a kernel. 52 | """ 53 | 54 | kernels = [] 55 | profStart = 0 56 | 57 | def __init__(self): 58 | self.kNameId = None 59 | self.kShortName = None 60 | self.kLongName = None 61 | self.kStartTime = None #GPU start time 62 | self.kEndTime = None #GPU end time 63 | self.kDuration = None 64 | self.device = None 65 | self.stream = None 66 | self.grid = () 67 | self.block = () 68 | self.corrId = None 69 | self.rStartTime = None #CPU start time 70 | self.rEndTime = None #CPU end time 71 | self.rDuration = None 72 | self.tid = None 73 | self.pid = None 74 | self.objId = None 75 | self.timeOffset = None 76 | 77 | self.layerMarkers = [] 78 | self.traceMarkers = [] 79 | self.reprMarkers = [] 80 | self.pyprofMarkers = [] 81 | self.seqMarkers = [] 82 | self.otherMarkers = [] 83 | self.altMarkers = [] 84 | self.seqId = [] 85 | self.altSeqId = [] 86 | self.layer = [] 87 | 88 | self.subSeqId = None 89 | self.dir = None 90 | self.mod = [] 91 | self.op = [] 92 | 93 | def setKernelInfo(self, info): 94 | self.kNameId = info['kNameId'] 95 | self.corrId = int(info['correlationId']) 96 | start = int(info['start']) 97 | end = int(info['end']) 98 | assert end > start, "This assertion can fail for very large profiles. It usually fails when start = end = 0." 99 | self.kStartTime = start 100 | self.kEndTime = end 101 | self.kDuration = end - start 102 | assert (start > Kernel.profStart) 103 | self.device = int(info['deviceId']) 104 | self.stream = int(info['streamId']) 105 | self.grid = (info['gridX'], info['gridY'], info['gridZ']) 106 | self.block = (info['blockX'], info['blockY'], info['blockZ']) 107 | self.timeOffset = Kernel.profStart 108 | self.setKernelName(info['name']) 109 | self.setRunTimeInfo(info) 110 | 111 | def setKernelName(self, name): 112 | cadena = demangle(name) 113 | self.kLongName = cadena 114 | self.kShortName = getShortName(cadena) 115 | 116 | def setRunTimeInfo(self, info): 117 | self.rStartTime = info['rStart'] 118 | self.rEndTime = info['rEnd'] 119 | self.rDuration = info['rEnd'] - info['rStart'] 120 | self.pid = info['pid'] 121 | self.tid = info['tid'] 122 | self.objId = info['objId'] 123 | assert (self.rStartTime < self.rEndTime) 124 | assert (self.rStartTime < self.kStartTime) 125 | 126 | def setMarkerInfo(self, info): 127 | self.layerMarkers, self.traceMarkers, self.reprMarkers, self.pyprofMarkers, self.seqMarkers, self.otherMarkers, self.altMarkers, self.seqId, self.altSeqId, self.layer = info 128 | self.subSeqId = 0 129 | 130 | def setDirection(self): 131 | """ 132 | Set direction (fprop, bprop) based on PyTorch sequence markers. 133 | It is a heuristic and not a foolproof method. 134 | """ 135 | if any("Backward, seq = " in x for x in self.seqMarkers) or \ 136 | any("backward, seq = " in x for x in self.seqMarkers) or \ 137 | any("Backward0, seq = " in x for x in self.seqMarkers): 138 | self.dir = "bprop" 139 | else: 140 | self.dir = "fprop" 141 | 142 | def setOp(self): 143 | """ 144 | Detect and set the class/module (mod) and operation (op) 145 | of the kernel e.g. torch.nn.functional / linear, torch / sigmoid. 146 | The lookup sequence we use is 147 | NVTX markers inserted by pyprof 148 | NVTX markers inserted by PyTorch in bprop 149 | NVTX markers inserted by PyTorch in fprop 150 | It is a heuristic and not a foolproof method. 151 | """ 152 | 153 | def sanitize(name): 154 | name = name.replace("torch","") \ 155 | .replace("autograd","") \ 156 | .replace("_backward","") \ 157 | .replace("::","") \ 158 | .replace("jit","") \ 159 | .replace("(anonymous namespace)","") 160 | head, sep, tail = name.partition("Backward") 161 | return head 162 | 163 | #Check pyprof markers 164 | for m in self.pyprofMarkers: 165 | assert ("mod" in m) and ("op" in m) and ("args" in m) 166 | t = eval(m) 167 | self.op.append(t['op']) 168 | self.mod.append(t['mod']) 169 | 170 | if len(self.op): 171 | return 172 | 173 | #Check bprop kernel markers 174 | for m in self.seqMarkers: 175 | if ("backward, seq = " in m) or ("Backward, seq = " in m): 176 | op = m.split(",")[0] 177 | op = sanitize(op) 178 | self.op.append(op) 179 | self.mod.append('na') 180 | 181 | if len(self.op): 182 | return 183 | 184 | #Check markers with "seq = " 185 | for m in self.seqMarkers: 186 | if ", seq = " in m: 187 | op = m.split(",")[0] 188 | self.op.append(op) 189 | self.mod.append('na') 190 | 191 | if len(self.op): 192 | return 193 | 194 | #If nothing else 195 | if len(self.otherMarkers): 196 | self.op.append(self.otherMarkers[0]) 197 | self.mod.append('na') 198 | 199 | def print(self): 200 | """ 201 | Print kernel information. This is used by prof.py. 202 | """ 203 | 204 | a = lambda: None 205 | a.kShortName = self.kShortName 206 | a.kDuration = self.kDuration 207 | #a.layerMarkers = self.layerMarkers 208 | a.layer = self.layer 209 | a.trace = self.traceMarkers 210 | a.reprMarkers = self.reprMarkers 211 | a.marker = self.pyprofMarkers 212 | a.seqMarker = self.seqMarkers 213 | 214 | a.seqId = self.seqId 215 | a.subSeqId = self.subSeqId 216 | a.altSeqId = self.altSeqId 217 | 218 | a.dir = self.dir 219 | a.mod = self.mod 220 | a.op = self.op 221 | 222 | a.tid = self.tid 223 | a.device = self.device 224 | a.stream = self.stream 225 | a.grid = self.grid 226 | a.block = self.block 227 | a.kLongName = self.kLongName 228 | 229 | print(a.__dict__) 230 | -------------------------------------------------------------------------------- /pyprof/parse/parse.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | 4 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 5 | # 6 | # Licensed under the Apache License, Version 2.0 (the "License"); 7 | # you may not use this file except in compliance with the License. 8 | # You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, software 13 | # distributed under the License is distributed on an "AS IS" BASIS, 14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | # See the License for the specific language governing permissions and 16 | # limitations under the License. 17 | """ 18 | Parse the SQLite3 database from NVprof or Nsight and print a dictionary for every kernel. 19 | """ 20 | 21 | import sys 22 | import os 23 | import argparse 24 | from tqdm import tqdm 25 | 26 | from .db import DB 27 | from .kernel import Kernel 28 | from .nvvp import NVVP 29 | from .nsight import Nsight 30 | 31 | 32 | def parseArgs(): 33 | parser = argparse.ArgumentParser(prog=sys.argv[0], description="Parse SQLite3 DB from NVprof or Nsight.") 34 | parser.add_argument("file", type=str, default=None, help="SQLite3 database.") 35 | 36 | args = parser.parse_args() 37 | 38 | if not os.path.isfile(args.file): 39 | raise parser.error("No such file '{}'.".format(args.file)) 40 | 41 | return args 42 | 43 | 44 | def dbIsNvvp(db): 45 | cmd = "SELECT * FROM sqlite_master where type='table' AND name='StringTable'" 46 | result = db.select(cmd) 47 | return True if len(result) == 1 else False 48 | 49 | 50 | def main(): 51 | args = parseArgs() 52 | 53 | db = DB(args.file) 54 | nvvp = None 55 | if dbIsNvvp(db): 56 | nvvp = NVVP(db) 57 | else: 58 | nvvp = Nsight(db) 59 | 60 | kInfo = nvvp.getKernelInfo() 61 | if len(kInfo) == 0: 62 | print("Found 0 kernels. Exiting.", file=sys.stderr) 63 | db.close() 64 | sys.exit(0) 65 | else: 66 | print("Found {} kernels. Getting info for each kernel.".format(len(kInfo)), file=sys.stderr) 67 | 68 | nvvp.createMarkerTable() 69 | 70 | prevSeqId = -1 71 | prevSubSeqId = -1 72 | prevOp = "na" 73 | 74 | Kernel.profStart = nvvp.getProfileStart() 75 | 76 | for i in tqdm(range(len(kInfo)), ascii=True): 77 | info = kInfo[i] 78 | k = Kernel() 79 | 80 | #Calculate/encode object ID 81 | nvvp.encode_object_id(info) 82 | 83 | #Set kernel info 84 | k.setKernelInfo(info) 85 | 86 | #Get and set marker and seqid info 87 | info = nvvp.getMarkerInfo(k.objId, k.rStartTime, k.rEndTime) 88 | k.setMarkerInfo(info) 89 | 90 | #If the seqId contains both 0 and non zero integers, remove 0. 91 | if any(seq != 0 for seq in k.seqId) and (0 in k.seqId): 92 | k.seqId.remove(0) 93 | 94 | #Set direction (it uses seq id) 95 | k.setDirection() 96 | 97 | #Set op 98 | k.setOp() 99 | 100 | #The following code is based on heuristics. 101 | #TODO: Refactor. 102 | #Assign subSeqId, adjust seqId and altSeqId 103 | #seqId can be 0. 104 | #A kernel can have multiple seqIds both in fprop and bprop. 105 | #In bprop, seqIds might not decrease monotonically. I have observed a few blips. 106 | if len(k.seqId): 107 | assert (k.dir in ["fprop", "bprop"]) 108 | if (k.dir == "fprop"): 109 | #Check if there is a sequence id larger than the previous 110 | inc = (k.seqId[-1] > prevSeqId) 111 | if inc: 112 | currSeqId = [x for x in k.seqId if x > prevSeqId][0] 113 | else: 114 | currSeqId = prevSeqId 115 | else: 116 | currSeqId = k.seqId[0] 117 | 118 | #if ((currSeqId == prevSeqId) and (k.op == prevOp)): 119 | if ((currSeqId == prevSeqId) and (k.op == prevOp)) or ((k.op[0] == "forward") and (k.op == prevOp) and 120 | (k.mod[0] in ["LSTMCell", "GRUCell", "RNNCell"])): 121 | #The second condition is to trap cases when pytorch does not use cudnn for a LSTMCell. 122 | k.subSeqId = prevSubSeqId + 1 123 | 124 | prevSeqId = currSeqId 125 | prevSubSeqId = k.subSeqId 126 | prevOp = k.op 127 | 128 | #Keep currSeqId in k.seqId, move everything else to k.altSeqId 129 | for s in k.seqId: 130 | if s != currSeqId: 131 | k.seqId.remove(s) 132 | k.altSeqId.append(s) 133 | 134 | for s in k.altSeqId: 135 | if s == currSeqId: 136 | k.altSeqId.remove(s) 137 | 138 | k.altSeqId = list(set(k.altSeqId)) 139 | if (len(k.altSeqId)): 140 | (k.altSeqId).sort() 141 | 142 | k.print() 143 | 144 | db.close() 145 | 146 | 147 | if __name__ == '__main__': 148 | main() 149 | -------------------------------------------------------------------------------- /pyprof/prof/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | 4 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 5 | # 6 | # Licensed under the Apache License, Version 2.0 (the "License"); 7 | # you may not use this file except in compliance with the License. 8 | # You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, software 13 | # distributed under the License is distributed on an "AS IS" BASIS, 14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | # See the License for the specific language governing permissions and 16 | # limitations under the License. 17 | -------------------------------------------------------------------------------- /pyprof/prof/__main__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | 4 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 5 | # 6 | # Licensed under the Apache License, Version 2.0 (the "License"); 7 | # you may not use this file except in compliance with the License. 8 | # You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, software 13 | # distributed under the License is distributed on an "AS IS" BASIS, 14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | # See the License for the specific language governing permissions and 16 | # limitations under the License. 17 | 18 | from .prof import main 19 | 20 | if __name__ == '__main__': 21 | main() 22 | -------------------------------------------------------------------------------- /pyprof/prof/activation.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | 4 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 5 | # 6 | # Licensed under the Apache License, Version 2.0 (the "License"); 7 | # you may not use this file except in compliance with the License. 8 | # You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, software 13 | # distributed under the License is distributed on an "AS IS" BASIS, 14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | # See the License for the specific language governing permissions and 16 | # limitations under the License. 17 | 18 | from .base import OperatorLayerBase 19 | from .tensor import Tensor 20 | 21 | 22 | class Activation(OperatorLayerBase): 23 | """ 24 | This class handles the various activation functions. 25 | """ 26 | 27 | ops = [ 28 | "celu", "elu", "elu_", "hardshrink", "hardtanh", "hardtanh_", "leaky_relu", "leaky_relu_", "logsigmoid", 29 | "prelu", "relu", "relu_", "relu6", "rrelu", "rrelu_", "selu", "sigmoid", "softplus", "softshrink", "softsign", 30 | "tanh", "tanhshrink", "threshold", "threshold_" 31 | ] 32 | 33 | def __init__(self, d): 34 | marker = eval(d.argMarker[0]) 35 | mod = marker['mod'] 36 | op = marker['op'] 37 | args = marker['args'] 38 | 39 | self.mod_ = mod 40 | self.op_ = op 41 | 42 | assert (mod in ["torch.nn.functional", "torch", "Tensor"]) 43 | 44 | #Filter out named parameters 45 | args = list(filter(lambda x: x['name'] == '', args)) 46 | 47 | assert (len(args) >= 1) 48 | arg = args[0] 49 | assert (arg['type'] == "tensor") 50 | 51 | self.input = Tensor(arg['shape'], arg['dtype']) 52 | self.dir = d.dir 53 | 54 | def params(self): 55 | return str(self.input) 56 | 57 | def flops(self): 58 | # TODO: revise based on op 59 | return self.input.size 60 | 61 | def bytes(self): 62 | # TODO: revise based on op 63 | direction = self.dir 64 | b = self.input.bytes 65 | # fprop is 1 read, 1 write 66 | # bprop is 2 reads, 1 write 67 | b *= 2 if direction == "fprop" else 3 68 | return b 69 | 70 | def tc(self): 71 | return "-" 72 | 73 | def op(self): 74 | return self.op_ 75 | 76 | def mod(self): 77 | return self.mod_ 78 | -------------------------------------------------------------------------------- /pyprof/prof/base.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | 4 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 5 | # 6 | # Licensed under the Apache License, Version 2.0 (the "License"); 7 | # you may not use this file except in compliance with the License. 8 | # You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, software 13 | # distributed under the License is distributed on an "AS IS" BASIS, 14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | # See the License for the specific language governing permissions and 16 | # limitations under the License. 17 | 18 | from abc import ABC, abstractmethod 19 | 20 | 21 | class OperatorLayerBase(ABC): 22 | """ 23 | Base class for all layers and operators. 24 | Every derived class should have the following functions. 25 | """ 26 | 27 | @abstractmethod 28 | def tc(self): 29 | """ 30 | Tensor core usage by the kernel. 31 | Return "1" (yes), "0" (no, but possible), "-" (not applicable) 32 | """ 33 | pass 34 | 35 | @abstractmethod 36 | def params(self): 37 | """ 38 | Kernel parameters to be printed. 39 | """ 40 | pass 41 | 42 | @abstractmethod 43 | def flops(self): 44 | """ 45 | Note that 1 FMA = 2 flops. 46 | """ 47 | pass 48 | 49 | @abstractmethod 50 | def bytes(self): 51 | pass 52 | 53 | @abstractmethod 54 | def mod(self): 55 | """ 56 | Name of the module/class e.g. torch.nn.functional. 57 | """ 58 | pass 59 | 60 | @abstractmethod 61 | def op(self): 62 | """ 63 | Name of the operator e.g. sigmoid. 64 | """ 65 | pass 66 | -------------------------------------------------------------------------------- /pyprof/prof/convert.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | 4 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 5 | # 6 | # Licensed under the Apache License, Version 2.0 (the "License"); 7 | # you may not use this file except in compliance with the License. 8 | # You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, software 13 | # distributed under the License is distributed on an "AS IS" BASIS, 14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | # See the License for the specific language governing permissions and 16 | # limitations under the License. 17 | 18 | from .base import OperatorLayerBase 19 | from .tensor import Tensor 20 | 21 | 22 | class Convert(OperatorLayerBase): 23 | """ 24 | Class to handle convert operations. 25 | """ 26 | ops = ["byte", "char", "double", "float", "half", "int", "long", "short", "to"] 27 | 28 | def __init__(self, d): 29 | marker = eval(d.argMarker[0]) 30 | mod = marker['mod'] 31 | op = marker['op'] 32 | args = marker['args'] 33 | 34 | self.mod_ = mod 35 | self.op_ = op 36 | 37 | assert (mod == "Tensor") 38 | assert (op in Convert.ops) 39 | assert (len(args) == 1) 40 | 41 | t = args[0] 42 | if t['type'] == "tensor": 43 | self.input = Tensor(t['shape'], t['dtype']) 44 | else: # scalar 45 | self.input = Tensor([], t['type']) 46 | 47 | if op == "to": 48 | # the output dtype is unknown 49 | self.output = self.input 50 | else: 51 | self.output = Tensor(self.input.shape, op) 52 | 53 | def params(self): 54 | return str(self.input) 55 | 56 | def op(self): 57 | return self.op_ 58 | 59 | def mod(self): 60 | return self.mod_ 61 | 62 | def tc(self): 63 | return "-" 64 | 65 | def flops(self): 66 | return 0 67 | 68 | def bytes(self): 69 | return self.input.bytes + self.output.bytes 70 | -------------------------------------------------------------------------------- /pyprof/prof/data.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | 4 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 5 | # 6 | # Licensed under the Apache License, Version 2.0 (the "License"); 7 | # you may not use this file except in compliance with the License. 8 | # You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, software 13 | # distributed under the License is distributed on an "AS IS" BASIS, 14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | # See the License for the specific language governing permissions and 16 | # limitations under the License. 17 | 18 | from .utility import Utility 19 | 20 | 21 | class Data(object): 22 | """ 23 | Class to store all the data for every kernel e.g. name, bytes, flops, device, stream etc. 24 | """ 25 | 26 | def __init__(self, kernel): 27 | #Available from NVprof 28 | self.tid = kernel['tid'] 29 | self.device = kernel['device'] 30 | self.stream = kernel['stream'] 31 | self.grid = str(kernel['grid']).replace(" ", "").replace("(", "").replace(")", "") 32 | self.block = str(kernel['block']).replace(" ", "").replace("(", "").replace(")", "") 33 | self.name = kernel['kShortName'].replace(" ", "_") 34 | self.lName = kernel['kLongName'] 35 | self.sil = kernel['kDuration'] #units ns 36 | 37 | self.index = None 38 | 39 | #Markers 40 | self.argMarker = kernel['marker'] 41 | self.modMarker = kernel['reprMarkers'] 42 | self.seqMarker = kernel['seqMarker'] 43 | 44 | self.layer = kernel['layer'] 45 | self.trace = kernel['trace'] 46 | 47 | self.seqId = kernel['seqId'] 48 | self.altSeqId = kernel['altSeqId'] 49 | 50 | self.dir = kernel['dir'] 51 | self.sub = kernel['subSeqId'] 52 | 53 | self.mod = "na" 54 | self.op = "na" 55 | self.params = {"na": "na"} 56 | self.tc = "na" 57 | self.flops = 0 58 | self.bytes = 0 59 | 60 | def setParams(self, params): 61 | # TODO: Remove the else block after refactoring. 62 | if type(params) == str: 63 | self.params = params 64 | else: 65 | #Remove space from params 66 | qaz = "" 67 | for key, value in params.items(): 68 | if "type" not in key: 69 | qaz += "{}={},".format(key, value) 70 | else: 71 | if type(value) is str: 72 | qaz += "{},".format(Utility.typeToString(value)) 73 | else: 74 | qaz += "{}".format(value) 75 | 76 | self.params = qaz.replace(" ", "") 77 | -------------------------------------------------------------------------------- /pyprof/prof/dropout.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | 4 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 5 | # 6 | # Licensed under the Apache License, Version 2.0 (the "License"); 7 | # you may not use this file except in compliance with the License. 8 | # You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, software 13 | # distributed under the License is distributed on an "AS IS" BASIS, 14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | # See the License for the specific language governing permissions and 16 | # limitations under the License. 17 | 18 | from .base import OperatorLayerBase 19 | from .tensor import Tensor 20 | 21 | class Dropout(OperatorLayerBase): 22 | 23 | def __init__(self, d): 24 | marker = eval(d.argMarker[0]) 25 | mod = marker['mod'] 26 | op = marker['op'] 27 | args = marker['args'] 28 | 29 | self.marker = marker 30 | self.mod_ = mod 31 | self.op_ = op 32 | self.args = args 33 | 34 | assert (mod == "torch.nn.functional") 35 | assert (op == "dropout") 36 | 37 | self.inp = Tensor(args[0]['shape'], args[0]['dtype']) 38 | self.dir = d.dir 39 | 40 | return 41 | 42 | def params(self): 43 | return str(self.inp) 44 | 45 | def op(self): 46 | return self.op_ 47 | 48 | def mod(self): 49 | return self.mod_ 50 | 51 | def tc(self): 52 | return "-" 53 | 54 | def bytes(self): 55 | #Ignoring the cost of writing and reading the mask 56 | return self.inp.bytes * 2 57 | 58 | def flops(self): 59 | # Note: This is approximate and depends on the RNG 60 | return 5 * self.inp.size 61 | -------------------------------------------------------------------------------- /pyprof/prof/dtype.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | # Copyright (c) 2020, Aditya Agrawal. 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | class Dtype(object): 18 | 19 | _types = { 20 | "uint8" : (1, "uint8"), 21 | "int8" : (1, "int8"), 22 | "byte" : (1, "byte"), 23 | "char" : (1, "char"), 24 | "bool" : (1, "bool"), 25 | 26 | "float16" : (2, "fp16"), 27 | "half" : (2, "fp16"), 28 | "int16" : (2, "int16"), 29 | "short" : (2, "int16"), 30 | 31 | "float32" : (4, "fp32"), 32 | "float" : (4, "fp32"), 33 | "int32" : (4, "int32"), 34 | "int" : (4, "int32"), 35 | 36 | "int64" : (8, "int64"), 37 | "long" : (8, "int64"), 38 | "float64" : (8, "fp64"), 39 | "double" : (8, "fp64"), 40 | } 41 | 42 | @staticmethod 43 | def types(): 44 | t = Dtype._types.keys() 45 | return list(t) 46 | 47 | def __init__(self, dtype): 48 | assert dtype in Dtype.types() 49 | size, name = Dtype._types[dtype] 50 | self._itemsize = size 51 | self._name = name 52 | 53 | def __str__(self): 54 | return self._name 55 | 56 | @property 57 | def itemsize(self): 58 | return self._itemsize 59 | 60 | def main(): 61 | print(Dtype.types()) 62 | for i in Dtype.types(): 63 | dt = Dtype(i) 64 | print(i, dt, dt.itemsize) 65 | 66 | if __name__ == '__main__': 67 | main() 68 | -------------------------------------------------------------------------------- /pyprof/prof/embedding.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | 4 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 5 | # 6 | # Licensed under the Apache License, Version 2.0 (the "License"); 7 | # you may not use this file except in compliance with the License. 8 | # You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, software 13 | # distributed under the License is distributed on an "AS IS" BASIS, 14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | # See the License for the specific language governing permissions and 16 | # limitations under the License. 17 | 18 | from .base import OperatorLayerBase 19 | from .tensor import Tensor 20 | 21 | 22 | class Embedding(OperatorLayerBase): 23 | 24 | def __init__(self, d): 25 | marker = eval(d.argMarker[0]) 26 | mod = marker['mod'] 27 | op = marker['op'] 28 | args = marker['args'] 29 | 30 | self.mod_ = mod 31 | self.op_ = op 32 | 33 | assert (mod == "torch.nn.functional") 34 | assert (op == "embedding") 35 | 36 | input = args[0] 37 | embedding = args[1] 38 | 39 | self.input = Tensor(input['shape'], input['dtype']) 40 | self.embedding = Tensor(embedding['shape'], embedding['dtype']) 41 | 42 | assert (len(self.embedding.shape) == 2) 43 | 44 | self.dir = d.dir 45 | self.sub = d.sub 46 | return 47 | 48 | def params(self): 49 | return str(self.input) + ";" + str(self.embedding) 50 | 51 | def op(self): 52 | return self.op_ 53 | 54 | def mod(self): 55 | return self.mod_ 56 | 57 | def tc(self): 58 | return "-" 59 | 60 | def bytes(self): 61 | b = 0 62 | if self.dir == "fprop": 63 | # read indices 64 | b += self.input.bytes 65 | # read and write the embedding values 66 | b += 2 * self.input.size * self.embedding.shape[1] * self.embedding.itemsize 67 | else: 68 | # 3 times the size of the incoming gradient 69 | b = 3 * self.input.size * self.embedding.shape[1] * self.embedding.itemsize 70 | 71 | if self.sub > 0: 72 | b = 0 73 | 74 | return b 75 | 76 | def flops(self): 77 | # Note: not implemented yet 78 | return 0 79 | -------------------------------------------------------------------------------- /pyprof/prof/linear.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | 4 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 5 | # 6 | # Licensed under the Apache License, Version 2.0 (the "License"); 7 | # you may not use this file except in compliance with the License. 8 | # You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, software 13 | # distributed under the License is distributed on an "AS IS" BASIS, 14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | # See the License for the specific language governing permissions and 16 | # limitations under the License. 17 | 18 | from collections import OrderedDict 19 | from .tc import TC_Whitelist 20 | from .utility import Utility 21 | from .base import OperatorLayerBase 22 | 23 | 24 | class Linear(OperatorLayerBase): 25 | ''' 26 | Notes: 27 | If the bias occurs before the GEMM, then its 1 write (bias expansion). 28 | If the bias occurs after, then its 1 read and 1 write. 29 | bias in bprop is a reduction and hence is 1 read. 30 | ''' 31 | 32 | gemmKernels = [ 33 | "gemm", "gemv", "dot_kernel", "splitKreduce_kernel", 34 | "reduce_1Block_kernel", "cutlass" 35 | ] 36 | 37 | biasKernels = [ 38 | "kernelReduceContigDim", "kernelReduceNoncontigDim_shared", 39 | "elementwise_kernel", "reduce_kernel", "kernelPointwiseApply2", 40 | "2d_grouped_direct_kernel" 41 | ] 42 | 43 | def setXWBMNK(self, args): 44 | x = None 45 | w = None 46 | b = None 47 | if (len(args) == 2): 48 | x, w = args 49 | elif (len(args) == 3): 50 | x, w, b = args 51 | assert (x['type'] == w['type'] == "tensor") 52 | if (b['type'] == "tensor"): 53 | assert (len(b['shape']) == 1) 54 | elif (b['type'] == "NoneType"): 55 | assert b['value'] is None 56 | b = None 57 | else: 58 | assert False 59 | else: 60 | assert False 61 | 62 | assert (len(w['shape']) == 2) 63 | k1 = x['shape'][-1] 64 | n, k2 = w['shape'] 65 | assert (k1 == k2) 66 | if b is not None: 67 | assert (b['shape'][0] == n) 68 | t1 = x['dtype'] 69 | t2 = w['dtype'] 70 | assert (t1 == t2) 71 | 72 | # X, W, B 73 | self.x = x['shape'] 74 | self.w = w['shape'] 75 | self.b = b['shape'] if b is not None else None 76 | self.type = t1 77 | 78 | # M, N, K 79 | #n = Utility.numElems(x[0:-1]) 80 | n = self.x[0:-1] 81 | k = self.x[-1] 82 | m, k1 = self.w 83 | assert (k == k1) 84 | 85 | self.m = m 86 | self.n = n 87 | self.k = k 88 | 89 | def tc(self): 90 | if self.op() == "linear": 91 | if self.name in TC_Whitelist(): 92 | return 1 93 | return 0 94 | else: 95 | return "-" 96 | 97 | def __init__(self, d): 98 | self.name = d.name 99 | self.dir = d.dir 100 | self.sub = d.sub 101 | 102 | marker = eval(d.argMarker[0]) 103 | mod = marker['mod'] 104 | op = marker['op'] 105 | args = marker['args'] 106 | 107 | assert (mod == "torch.nn.functional") 108 | assert (op == "linear") 109 | 110 | self.setXWBMNK(args) 111 | 112 | if any(x in d.name for x in Linear.gemmKernels): 113 | self.op_ = "linear" 114 | else: 115 | assert any(x in d.name for x in Linear.biasKernels), f"Kernel name: {d.name}" 116 | self.op_ = "bias" 117 | ''' 118 | elif (("kernelPointwiseApply2" in d.name) or ("kernelReduceContigDim" in d.name) or ("kernelReduceNoncontigDim_shared" in d.name)): 119 | #bias expansion was before the gemm 120 | self.op_ = "bias" 121 | 122 | elif ("elementwise_kernel" in d.name): 123 | #Bias addition happens later with a broadcast tensor 124 | self.op_ = "bias" 125 | assert (len(d.argMarker) == 2) 126 | marker = eval(d.argMarker[1]) 127 | mod = marker['mod'] 128 | op = marker['op'] 129 | args = marker['args'] 130 | 131 | assert (mod == "Tensor") 132 | assert (op == "__iadd__") 133 | assert (len(args) == 2) 134 | mn = args[0]['shape'] 135 | b = args[1]['shape'] 136 | assert (len(b) == 1) 137 | 138 | assert (mn == (self.n + (self.m,))) 139 | assert (b == self.b) 140 | 141 | else: 142 | assert False 143 | ''' 144 | 145 | def params(self): 146 | #p = OrderedDict([('X', self.x), ('W', self.w), ('B', self.b), ('type', self.type)]) 147 | 148 | m, n, k, x, w, t = self.m, self.n, self.k, self.x, self.w, self.type 149 | if len(n) == 1: 150 | n = n[0] 151 | 152 | if self.op_ == "linear": 153 | if self.dir == "fprop": 154 | p = OrderedDict([('M', m), ('N', n), ('K', k), ('type', t)]) 155 | elif self.dir == "bprop": 156 | if self.sub == 0: #dgrad (most likely) 157 | p = OrderedDict([('M', k), ('N', n), ('K', m), ('type', t)]) 158 | elif self.sub == 1: #wgrad (most likely) 159 | p = OrderedDict([('M', k), ('N', m), ('K', n), ('type', t)]) 160 | else: 161 | #This happens when there are additional kernels for reduction 162 | p = OrderedDict([('X', x), ('W', w), ('type', t)]) 163 | else: 164 | assert False 165 | 166 | elif self.op_ == "bias": 167 | p = OrderedDict([('M', m), ('N', n), ('type', t)]) 168 | else: 169 | assert False 170 | return p 171 | 172 | def op(self): 173 | return self.op_ 174 | 175 | def bytesFlops(self): 176 | 177 | m = self.m 178 | n = Utility.numElems(self.n) 179 | k = self.k 180 | 181 | if self.op_ == "linear": 182 | if self.dir == "fprop": 183 | f = m * n * k * 2 184 | b = m * n + m * k + n * k * Utility.typeToBytes(self.type) 185 | elif self.dir == "bprop": 186 | if self.sub == 0: #dgrad (most likely) 187 | f = m * n * k * 2 188 | b = m * n + m * k + n * k * Utility.typeToBytes(self.type) 189 | elif self.sub == 1: #wgrad (most likely) 190 | f = m * n * k * 2 191 | b = m * n + m * k + n * k * Utility.typeToBytes(self.type) 192 | else: 193 | #This happens when there are additional kernels for reduction 194 | f = 0 195 | b = 0 196 | else: 197 | assert False 198 | 199 | elif self.op_ == "bias": 200 | f = m * n 201 | b = 2 * m * n * Utility.typeToBytes(self.type) 202 | else: 203 | assert False 204 | return b, f 205 | 206 | # TODO: Fix bytes and flops with CUTLASS kernels. 207 | def bytes(self): 208 | b, f = self.bytesFlops() 209 | return b 210 | 211 | def flops(self): 212 | b, f = self.bytesFlops() 213 | return f 214 | 215 | def mod(self): 216 | return self.mod_ 217 | -------------------------------------------------------------------------------- /pyprof/prof/loss.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | 4 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 5 | # 6 | # Licensed under the Apache License, Version 2.0 (the "License"); 7 | # you may not use this file except in compliance with the License. 8 | # You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, software 13 | # distributed under the License is distributed on an "AS IS" BASIS, 14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | # See the License for the specific language governing permissions and 16 | # limitations under the License. 17 | 18 | from collections import OrderedDict 19 | from .utility import Utility 20 | from .base import OperatorLayerBase 21 | 22 | #TODO: Add support for additional loss functions. 23 | 24 | 25 | class MSELoss(OperatorLayerBase): 26 | 27 | def __init__(self, d): 28 | marker = eval(d.argMarker[0]) 29 | mod = marker['mod'] 30 | op = marker['op'] 31 | args = marker['args'] 32 | 33 | self.marker = marker 34 | self.mod_ = mod 35 | self.op_ = op 36 | self.args = args 37 | 38 | assert (mod == "torch.nn.functional") 39 | assert (op == "mse_loss") 40 | assert (len(args) == 3) 41 | 42 | #Get input, target and reduction 43 | if (args[0]['name'] == ""): 44 | x = args[0] 45 | else: 46 | x = list(filter(lambda x: x['name'] == "input", args))[0] 47 | 48 | if (args[1]['name'] == ""): 49 | y = args[1] 50 | else: 51 | y = list(filter(lambda x: x['name'] == "target", args))[0] 52 | 53 | if (args[2]['name'] == ""): 54 | r = args[2] 55 | else: 56 | r = list(filter(lambda x: x['name'] == "reduction", args))[0] 57 | 58 | assert (x['type'] == y['type'] == "tensor") 59 | assert (x['shape'] == y['shape']) 60 | assert (x['dtype'] == y['dtype']) 61 | assert (r['type'] == "str") 62 | assert (r['value'] in ["none", "mean", "sum"]) 63 | 64 | self.shape = x['shape'] 65 | self.type = x['dtype'] 66 | self.red = r['value'] 67 | self.dir = d.dir 68 | 69 | def params(self): 70 | p = OrderedDict([('T', self.shape), ('type', self.type), ('red', self.red)]) 71 | return p 72 | 73 | def elems(self): 74 | red = self.red 75 | e = Utility.numElems(self.shape) 76 | 77 | if self.dir == "fprop": 78 | if red == "none": 79 | e *= 3 80 | else: 81 | e *= 2 82 | else: 83 | if red == "none": 84 | e *= 4 85 | else: 86 | e *= 3 87 | return e 88 | 89 | def bytes(self): 90 | return self.elems() * Utility.typeToBytes(self.type) 91 | 92 | def flops(self): 93 | return self.elems() * 2 + 1 94 | 95 | def tc(self): 96 | return "-" 97 | 98 | def op(self): 99 | return self.op_ 100 | 101 | def mod(self): 102 | return self.mod_ 103 | -------------------------------------------------------------------------------- /pyprof/prof/memory.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | # Copyright (c) 2020, Aditya Agrawal. 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | from .base import OperatorLayerBase 18 | from .tensor import Tensor 19 | 20 | def readMarker(d): 21 | marker = eval(d.argMarker[0]) 22 | return marker['mod'], marker['op'], marker['args'] 23 | 24 | class OneZero(OperatorLayerBase): 25 | """ 26 | Support for torch.ones, torch.zeros etc. 27 | Fill a tensor with ones or zeros. 28 | """ 29 | 30 | ops = ["ones", "ones_like", "zero_", "zeros", "zeros_like"] 31 | 32 | def __init__(self, d): 33 | mod, op, args = readMarker(d) 34 | assert mod in ["torch", "Tensor"] 35 | assert op in OneZero.ops 36 | 37 | self.mod_ = mod 38 | self.op_ = op 39 | 40 | # For ones_like, zero_, zeros_like, the input is a tensor. 41 | if op in ["ones_like", "zero_", "zeros_like"]: 42 | assert(len(args) == 1) 43 | arg = args[0] 44 | self.input = Tensor(arg['shape'], arg['dtype']) 45 | 46 | # For ones and zeros, the input can be a list, tuple, sequence of integers. 47 | # E.g. torch.ones((3,5,6)) or torch.ones([3,5,6]) or torch.ones(3,5,6) 48 | else: 49 | assert op in ["ones", "zeros"] 50 | # TODO: Assume the output dtype is float 51 | if args[0]['type'] in ['list', 'tuple']: 52 | assert(len(args) == 1) 53 | self.input = Tensor(args[0]['value'], "float") 54 | elif args[0]['type'] == "int": 55 | # Get all unnamed arguments of type int 56 | args = list(filter(lambda x: x['name'] == "" and x['type'] == "int", args)) 57 | shape = [x['value'] for x in args] 58 | self.input = Tensor(shape, "float") 59 | else: 60 | assert False 61 | 62 | def params(self): 63 | return str(self.input) 64 | 65 | def tc(self): 66 | return "-" 67 | 68 | def op(self): 69 | return self.op_ 70 | 71 | def mod(self): 72 | return self.mod_ 73 | 74 | def bytes(self): 75 | return self.input.bytes 76 | 77 | def flops(self): 78 | return 0 79 | 80 | class Fill(OperatorLayerBase): 81 | """ 82 | Support for torch.fill_. 83 | Fill a tensor with a specific value. 84 | """ 85 | 86 | def __init__(self, d): 87 | mod, op, args = readMarker(d) 88 | assert mod == "Tensor" 89 | assert op == "fill_" 90 | 91 | self.mod_ = mod 92 | self.op_ = op 93 | 94 | assert(len(args) == 2) 95 | arg = args[0] 96 | self.input = Tensor(arg['shape'], arg['dtype']) 97 | 98 | def params(self): 99 | return str(self.input) 100 | 101 | def tc(self): 102 | return "-" 103 | 104 | def op(self): 105 | return self.op_ 106 | 107 | def mod(self): 108 | return self.mod_ 109 | 110 | def bytes(self): 111 | return self.input.bytes 112 | 113 | def flops(self): 114 | return 0 115 | 116 | class Full(OperatorLayerBase): 117 | """ 118 | Support for torch.full. 119 | Create a tensor of specified size and filled with a specified value. 120 | """ 121 | 122 | def __init__(self, d): 123 | mod, op, args = readMarker(d) 124 | assert mod == "torch" 125 | assert op == "full" 126 | 127 | self.mod_ = mod 128 | self.op_ = op 129 | 130 | assert(len(args) == 2) 131 | arg1, arg2 = args 132 | assert arg1['type'] in ['list', 'tuple'] 133 | # TODO: Add more types for arg2 134 | assert arg2['type'] in ['float', 'int'] 135 | self.output = Tensor(arg1['value'], arg2['type']) 136 | 137 | def params(self): 138 | return str(self.output) 139 | 140 | def tc(self): 141 | return "-" 142 | 143 | def op(self): 144 | return self.op_ 145 | 146 | def mod(self): 147 | return self.mod_ 148 | 149 | def bytes(self): 150 | return self.output.bytes 151 | 152 | def flops(self): 153 | return 0 154 | -------------------------------------------------------------------------------- /pyprof/prof/misc.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | 4 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 5 | # 6 | # Licensed under the Apache License, Version 2.0 (the "License"); 7 | # you may not use this file except in compliance with the License. 8 | # You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, software 13 | # distributed under the License is distributed on an "AS IS" BASIS, 14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | # See the License for the specific language governing permissions and 16 | # limitations under the License. 17 | 18 | from collections import OrderedDict 19 | from .utility import Utility 20 | from .base import OperatorLayerBase 21 | 22 | 23 | class Foo(OperatorLayerBase): 24 | """ 25 | An object of Foo is instantiated when we detect an unsupported operator. 26 | """ 27 | 28 | def __init__(self, d): 29 | marker = eval(d.argMarker[0]) 30 | mod = marker['mod'] 31 | op = marker['op'] 32 | args = marker['args'] 33 | 34 | self.marker = marker 35 | self.mod_ = mod 36 | self.op_ = op 37 | self.args = args 38 | 39 | shapes = [] 40 | types = [] 41 | 42 | for arg in args: 43 | if arg['type'] == "tensor": 44 | shapes.append(arg['shape']) 45 | types.append(arg['dtype']) 46 | 47 | self.shape = shapes 48 | self.type = types 49 | 50 | def params(self): 51 | p = OrderedDict([('T', self.shape), ('type', self.type)]) 52 | return p 53 | 54 | def tc(self): 55 | return "-" 56 | 57 | def op(self): 58 | return self.op_ 59 | 60 | def mod(self): 61 | return self.mod_ 62 | 63 | def flops(self): 64 | return 0 65 | 66 | def bytes(self): 67 | return 0 68 | 69 | 70 | class Copy(OperatorLayerBase): 71 | 72 | def __init__(self, d): 73 | marker = eval(d.argMarker[0]) 74 | mod = marker['mod'] 75 | op = marker['op'] 76 | args = marker['args'] 77 | 78 | self.marker = marker 79 | self.mod_ = mod 80 | self.op_ = op 81 | self.args = args 82 | 83 | assert (mod == "Tensor") 84 | assert (op == "copy_") 85 | assert (len(args) == 2) 86 | 87 | dst, src = args 88 | assert (src['type'] == dst['type']) 89 | assert (src['shape'] == dst['shape']) 90 | 91 | self.shape = src['shape'] 92 | self.stype = src['dtype'] 93 | self.dtype = dst['dtype'] 94 | 95 | def params(self): 96 | #The data type might be different 97 | p = OrderedDict([('T', self.shape), ('stype', self.stype), ('dtype', self.dtype)]) 98 | return p 99 | 100 | def tc(self): 101 | return "-" 102 | 103 | def op(self): 104 | return self.op_ 105 | 106 | def mod(self): 107 | return self.mod_ 108 | 109 | def flops(self): 110 | return 0 111 | 112 | def elems(self): 113 | return Utility.numElems(self.shape) 114 | 115 | def bytes(self): 116 | return self.elems() * (Utility.typeToBytes(self.stype) + Utility.typeToBytes(self.dtype)) 117 | 118 | 119 | class Clone(OperatorLayerBase): 120 | 121 | def __init__(self, d): 122 | marker = eval(d.argMarker[0]) 123 | mod = marker['mod'] 124 | op = marker['op'] 125 | args = marker['args'] 126 | 127 | self.marker = marker 128 | self.mod_ = mod 129 | self.op_ = op 130 | self.args = args 131 | 132 | assert (mod == "Tensor") 133 | assert (op == "clone") 134 | assert (len(args) == 1) 135 | t = args[0] 136 | self.shape = t['shape'] 137 | self.type = t['dtype'] 138 | 139 | def params(self): 140 | p = OrderedDict([('T', self.shape), ('type', self.type)]) 141 | return p 142 | 143 | def flops(self): 144 | return 0 145 | 146 | def tc(self): 147 | return "-" 148 | 149 | def op(self): 150 | return self.op_ 151 | 152 | def mod(self): 153 | return self.mod_ 154 | 155 | def elems(self): 156 | return Utility.numElems(self.shape) 157 | 158 | def bytes(self): 159 | return 2 * self.elems() * Utility.typeToBytes(self.type) 160 | 161 | 162 | class Contiguous(OperatorLayerBase): 163 | 164 | def __init__(self, d): 165 | marker = eval(d.argMarker[0]) 166 | mod = marker['mod'] 167 | op = marker['op'] 168 | args = marker['args'] 169 | 170 | self.marker = marker 171 | self.mod_ = mod 172 | self.op_ = op 173 | self.args = args 174 | 175 | assert (mod == "Tensor") 176 | assert (op == "contiguous") 177 | assert (len(args) == 1) 178 | t = args[0] 179 | self.shape = t['shape'] 180 | self.type = t['dtype'] 181 | 182 | def params(self): 183 | p = OrderedDict([('T', self.shape), ('type', self.type)]) 184 | return p 185 | 186 | def flops(self): 187 | return 0 188 | 189 | def bytes(self): 190 | return 2 * Utility.numElems(self.shape) * Utility.typeToBytes(self.type) 191 | 192 | def tc(self): 193 | return "-" 194 | 195 | def op(self): 196 | return self.op_ 197 | 198 | def mod(self): 199 | return self.mod_ 200 | 201 | 202 | class Any(OperatorLayerBase): 203 | 204 | def __init__(self, d): 205 | marker = eval(d.argMarker[0]) 206 | mod = marker['mod'] 207 | op = marker['op'] 208 | args = marker['args'] 209 | 210 | self.marker = marker 211 | self.mod_ = mod 212 | self.op_ = op 213 | self.args = args 214 | 215 | assert (mod == "Tensor") 216 | assert (op == "any") 217 | assert (len(args) in [1,2]) 218 | t = args[0] 219 | # The input can be a tensor or scalar 220 | assert (t['type'] in ["tensor", "bool"]) 221 | 222 | if t['type'] == "tensor": 223 | self.shape = t['shape'] 224 | self.type = t['dtype'] 225 | else: 226 | self.shape = (1,) 227 | self.type = t['type'] 228 | 229 | self.sub = d.sub 230 | return 231 | 232 | def params(self): 233 | p = OrderedDict([('T', self.shape), ('type', self.type)]) 234 | return p 235 | 236 | def op(self): 237 | return self.op_ 238 | 239 | def mod(self): 240 | return self.mod_ 241 | 242 | def tc(self): 243 | return "-" 244 | 245 | def flops(self): 246 | return 0 247 | 248 | def bytes(self): 249 | return Utility.numElems(self.shape) * Utility.typeToBytes(self.type) 250 | -------------------------------------------------------------------------------- /pyprof/prof/normalization.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | 4 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 5 | # 6 | # Licensed under the Apache License, Version 2.0 (the "License"); 7 | # you may not use this file except in compliance with the License. 8 | # You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, software 13 | # distributed under the License is distributed on an "AS IS" BASIS, 14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | # See the License for the specific language governing permissions and 16 | # limitations under the License. 17 | 18 | from .base import OperatorLayerBase 19 | from .tensor import Tensor 20 | 21 | 22 | class BatchNorm(OperatorLayerBase): 23 | 24 | def __init__(self, d): 25 | marker = eval(d.argMarker[0]) 26 | mod = marker['mod'] 27 | op = marker['op'] 28 | args = marker['args'] 29 | 30 | self.mod_ = mod 31 | self.op_ = op 32 | 33 | assert (op == "batch_norm") 34 | assert (len(args) >= 1) 35 | i = args[0] 36 | assert (i['type'] == "tensor") 37 | 38 | self.input = Tensor(i['shape'], i['dtype']) 39 | self.dir = d.dir 40 | self.sub = d.sub 41 | 42 | def params(self): 43 | return str(self.input) 44 | 45 | def tc(self): 46 | return "-" 47 | 48 | def op(self): 49 | return self.op_ 50 | 51 | def mod(self): 52 | return self.mod_ 53 | 54 | def flops(self): 55 | # Variance algo-dependent, but this is a reasonable value. 56 | return self.input.size * 8 57 | 58 | def bytes(self): 59 | b = self.input.bytes 60 | # fprop is 2 reads, 2 writes 61 | # bprop is 4 reads, 1 write 62 | if self.dir == "fprop": 63 | b *= 4 64 | else: 65 | b *= 5 66 | 67 | if self.sub > 0: 68 | return 0 69 | else: 70 | return b 71 | -------------------------------------------------------------------------------- /pyprof/prof/optim.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | 4 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 5 | # 6 | # Licensed under the Apache License, Version 2.0 (the "License"); 7 | # you may not use this file except in compliance with the License. 8 | # You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, software 13 | # distributed under the License is distributed on an "AS IS" BASIS, 14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | # See the License for the specific language governing permissions and 16 | # limitations under the License. 17 | 18 | from collections import OrderedDict 19 | from .utility import Utility 20 | from .base import OperatorLayerBase 21 | 22 | #TODO: Add support for other optimizers. 23 | 24 | 25 | class Adam(OperatorLayerBase): 26 | 27 | def __init__(self, d): 28 | marker = eval(d.argMarker[0]) 29 | mod = marker['mod'] 30 | op = marker['op'] 31 | args = marker['args'] 32 | 33 | self.marker = marker 34 | self.mod_ = mod 35 | self.op_ = op 36 | self.args = args 37 | self.sub = d.sub 38 | 39 | assert (op == "adam") 40 | assert (len(args) == 12) or (len(args) == 14) 41 | w, hw, m, v, g = args[0:5] 42 | assert (w['shape'] == m['shape'] == v['shape'] == g['shape']) 43 | assert (hw['shape'] == w['shape']) or (hw['shape'] == (0, )) #hw could be null 44 | assert (w['type'] == m['type'] == v['type'] == g['type'] == hw['type'] == "tensor") 45 | assert (w['dtype'] == m['dtype'] == v['dtype'] == "float32") 46 | 47 | self.w = w 48 | self.g = g 49 | 50 | def params(self): 51 | p = OrderedDict([('T', self.w['shape']), ('wtype', self.w['dtype']), ('gtype', self.g['dtype'])]) 52 | return p 53 | 54 | def flops(self): 55 | return 0 56 | 57 | def bytes(self): 58 | wshape = self.w['shape'] 59 | wtype = self.w['dtype'] 60 | gtype = self.g['dtype'] 61 | b = 0 62 | 63 | elems = Utility.numElems(wshape) 64 | 65 | #Get time to stream read/write w, m, v 66 | b += 6 * elems * Utility.typeToBytes(wtype) 67 | 68 | #Get time to read "g" 69 | b += elems * Utility.typeToBytes(gtype) 70 | 71 | if wtype != gtype: #mixed precision 72 | #Get time to write "hw 73 | b += elems * Utility.typeToBytes(gtype) 74 | 75 | return b if (self.sub == 0) else 0 76 | 77 | def tc(self): 78 | return "-" 79 | 80 | def op(self): 81 | return self.op_ 82 | 83 | def mod(self): 84 | return self.mod_ 85 | -------------------------------------------------------------------------------- /pyprof/prof/output.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | 4 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 5 | # 6 | # Licensed under the Apache License, Version 2.0 (the "License"); 7 | # you may not use this file except in compliance with the License. 8 | # You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, software 13 | # distributed under the License is distributed on an "AS IS" BASIS, 14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | # See the License for the specific language governing permissions and 16 | # limitations under the License. 17 | 18 | import errno, os, sys 19 | 20 | 21 | class Output(): 22 | """ 23 | This class handles printing of a columed output and a CSV. 24 | """ 25 | 26 | # The table below is organized as 27 | # user_option: [output_header, attribute_in_Data_class, type, min_width_in_columed_output] 28 | table = { 29 | "idx": ["Idx", "index", int, 7], 30 | "seq": ["SeqId", "seqId", str, 7], 31 | "altseq": ["AltSeqId", "altSeqId", str, 7], 32 | "tid": ["TId", "tid", int, 12], 33 | "layer": ["Layer", "layer", str, 10], 34 | "trace": ["Trace", "trace", str, 25], 35 | "dir": ["Direction", "dir", str, 5], 36 | "sub": ["Sub", "sub", int, 3], 37 | "mod": ["Module", "mod", str, 15], 38 | "op": ["Op", "op", str, 15], 39 | "kernel": ["Kernel", "name", str, 0], 40 | "params": ["Params", "params", str, 0], 41 | "sil": ["Sil(ns)", "sil", int, 10], 42 | "tc": ["TC", "tc", str, 2], 43 | "device": ["Device", "device", int, 3], 44 | "stream": ["Stream", "stream", int, 3], 45 | "grid": ["Grid", "grid", str, 12], 46 | "block": ["Block", "block", str, 12], 47 | "flops": ["FLOPs", "flops", int, 12], 48 | "bytes": ["Bytes", "bytes", int, 12] 49 | } 50 | 51 | def __init__(self, args): 52 | self.cols = args.c 53 | self.csv = args.csv 54 | self.col = True if (args.w > 0) else False 55 | self.width = args.w 56 | 57 | w = 0 58 | for col in self.cols: 59 | assert col in Output.table.keys() 60 | w += Output.table[col][3] 61 | 62 | if ((self.col) and (w > self.width)): 63 | print("Minimum width required to print {} = {}. Exiting.".format(",".join(self.cols), w)) 64 | sys.exit(1) 65 | 66 | remainder = self.width - w 67 | 68 | if ("kernel" in self.cols) and ("params" in self.cols): 69 | Output.table["kernel"][3] = int(remainder / 2) 70 | Output.table["params"][3] = int(remainder / 2) 71 | elif ("kernel" in self.cols): 72 | Output.table["kernel"][3] = remainder 73 | elif ("params" in self.cols): 74 | Output.table["params"][3] = remainder 75 | 76 | #header format 77 | cadena = "" 78 | for col in self.cols: 79 | _, _, t, w = Output.table[col] 80 | cadena += "%-{}.{}s ".format(w, w) 81 | 82 | self.hFormat = cadena 83 | 84 | #data format 85 | cadena = "" 86 | for col in self.cols: 87 | _, _, t, w = Output.table[col] 88 | if (t == str): 89 | cadena += "%-{}.{}s ".format(w, w) 90 | elif (t == int): 91 | cadena += "%{}d ".format(w) 92 | 93 | self.dFormat = cadena 94 | 95 | def foo(self, cadena, pformat): 96 | if self.csv: 97 | cadena = ",".join(map(lambda x: '"' + str(x) + '"', cadena)) 98 | elif self.col: 99 | cadena = pformat % cadena 100 | else: 101 | cadena = " ".join(map(str, cadena)) 102 | 103 | try: 104 | print(cadena) 105 | except IOError as e: 106 | #gracefully handle pipes 107 | if e.errno == errno.EPIPE: 108 | # Python flushes standard streams on exit; redirect remaining output 109 | # to devnull to avoid another BrokenPipeError at shutdown 110 | 111 | devnull = os.open(os.devnull, os.O_WRONLY) 112 | os.dup2(devnull, sys.stdout.fileno()) 113 | sys.exit(0) 114 | else: 115 | sys.exit(-1) 116 | 117 | def header(self): 118 | cadena = () 119 | for col in self.cols: 120 | h = Output.table[col][0] 121 | cadena = cadena + (h, ) 122 | 123 | self.foo(cadena, self.hFormat) 124 | 125 | def data(self, a): 126 | if a.dir == "": 127 | direc = "na" 128 | else: 129 | direc = a.dir 130 | 131 | if a.op == "": 132 | op = "na" 133 | else: 134 | op = a.op 135 | 136 | if a.mod == "": 137 | mod = "na" 138 | else: 139 | mod = a.mod 140 | 141 | cadena = () 142 | for col in self.cols: 143 | attr = Output.table[col][1] 144 | val = getattr(a, attr) 145 | 146 | if col == "layer": 147 | assert (type(val) == list) 148 | val = ":".join(val) 149 | val = "-" if val == "" else val 150 | 151 | if col == "trace": 152 | assert (type(val) == list) 153 | if self.col and len(val): 154 | val = val[-1] 155 | val = val.split("/")[-1] 156 | else: 157 | val = ",".join(val) 158 | val = "-" if val == "" else val 159 | 160 | if col in ["seq", "altseq"]: 161 | assert (type(val) == list) 162 | val = ",".join(map(str, val)) 163 | val = "-" if val == "" else val 164 | 165 | cadena = cadena + (val, ) 166 | 167 | self.foo(cadena, self.dFormat) 168 | -------------------------------------------------------------------------------- /pyprof/prof/pointwise.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | 4 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 5 | # 6 | # Licensed under the Apache License, Version 2.0 (the "License"); 7 | # you may not use this file except in compliance with the License. 8 | # You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, software 13 | # distributed under the License is distributed on an "AS IS" BASIS, 14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | # See the License for the specific language governing permissions and 16 | # limitations under the License. 17 | 18 | import numpy as np 19 | from .base import OperatorLayerBase 20 | from .tensor import Tensor 21 | from functools import reduce 22 | import operator 23 | 24 | class Pointwise(OperatorLayerBase): 25 | 26 | # TODO: Add more operators. 27 | # TODO: Determining the output dtype is tricky. 28 | # TODO: Refine calculations based on direction. 29 | # TODO: Refine calculations for non-arithmetic ops. 30 | 31 | # Unary 32 | unary = ["abs", "abs_", "neg", "neg_", "reciprocal", "reciprocal_"] 33 | unary += ["__abs__", "__neg__"] 34 | 35 | # Unary bitwise 36 | unary += ["__invert__"] 37 | 38 | # Exponential and log (unary) 39 | exp_log = ["exp", "exp_", "exp1m", "exp1m_", "log", "log_", 40 | "log10", "log10_", "log1p", "log1p_", "log2", "log2_"] 41 | 42 | # Sqrt (unary) 43 | sqrt = ["rsqrt", "rsqrt_", "sqrt", "sqrt_"] 44 | 45 | # Representation (unary) 46 | representation = ["ceil", "ceil_", "clamp", "clamp_", "floor", "floor_", 47 | "frac", "frac_", "round", "round_", "sign", "sign_", 48 | "trunc", "trunc_"] 49 | 50 | # Trigonometric and transcendental (unary) 51 | trig_trans = ["acos", "acos_", "asin", "asin_", "atan", "atan_", 52 | "atan2", "atan2_", "cos", "cos_", "cosh", "cosh_", 53 | "sin", "sin_", "sinh", "sinh_", "tan", "tan_", 54 | "sigmoid", "sigmoid_", "tanh", "tanh_"] 55 | 56 | # Error (unary) 57 | error = ["erf", "erf_", "erfc", "erfc_", "erfinv", "erfinv_"] 58 | 59 | # Binary 60 | binary = ["add", "add_", "div", "div_", "mul", "mul_", 61 | "remainder", "remainder_", "sub", "sub_"] 62 | binary += ["__add__", "__sub__", "__mul__", "__floordiv__", 63 | "__truediv__", "__mod__"] 64 | binary += ["__radd__", "__rsub__", "__rmul__", "__rdiv__", 65 | "__rtruediv__", "__rfloordiv__"] 66 | binary += ["fmod", "fmod_"] 67 | 68 | # Binary inplace 69 | ibinary = ["__iadd__", "__isub__", "__imul__", "__itruediv__"] 70 | 71 | # Power (binary) 72 | power = ["pow", "pow_", "__pow__", "__rpow__"] 73 | 74 | # Comparison (binary) 75 | comp = ["lt", "lt_", "gt", "gt_", "ge", "ge_", "le", "le_", 76 | "eq", "eq_", "ne", "ne_"] 77 | comp += ["__lt__", "__gt__", "__ge__", "__le__", "__eq__", "__ne__"] 78 | 79 | # Logical (binary) 80 | logical = ["__and__", "__or__", "__xor__", "__lshift__", "__rshift__"] 81 | 82 | # Logical inplace (binary) 83 | ilogical = ["__iand__", "__ior__", "__ixor__", "__ilshift__", "__irshift__"] 84 | 85 | # Ternary 86 | ternary = ["addcdiv", "addcdiv_", "addcmul", "addcmul_"] 87 | 88 | # Misc 89 | misc = ["digamma", "lerp", "lerp_", "mvlgamma"] 90 | 91 | ops = unary + binary + ibinary + comp + logical + ilogical + \ 92 | ternary + exp_log + power + sqrt + representation + trig_trans + \ 93 | error + misc 94 | 95 | def __init__(self, d): 96 | marker = eval(d.argMarker[0]) 97 | mod = marker['mod'] 98 | op = marker['op'] 99 | args = marker['args'] 100 | 101 | self.marker = marker 102 | self.mod_ = mod 103 | self.op_ = op 104 | self.args = args 105 | 106 | self.dir = d.dir 107 | assert (d.dir in ["fprop", "bprop"]) 108 | assert (op in Pointwise.ops) 109 | 110 | # Filter out all named parameters (kwargs). 111 | # This might require revisiting in future. 112 | args = list(filter(lambda x: x['name'] == "", args)) 113 | 114 | # Filter out non tensors 115 | #args = list(filter(lambda x: x['type'] == "tensor", args)) 116 | 117 | assert (len(args) <= 4) 118 | self.input = [] 119 | 120 | for arg in args: 121 | t = arg['type'] 122 | if (t == "tensor"): 123 | tensor = Tensor(arg['shape'], arg['dtype']) 124 | elif t in ['float', 'int']: 125 | tensor = Tensor([], t) 126 | else: 127 | assert False 128 | 129 | self.input.append(tensor) 130 | 131 | def params(self): 132 | return ";".join([str(t) for t in self.input]) 133 | 134 | def tc(self): 135 | return "-" 136 | 137 | def op(self): 138 | return self.op_ 139 | 140 | def mod(self): 141 | return self.mod_ 142 | 143 | def bytes_flops(self): 144 | b = f = 0 145 | 146 | # Unary 147 | if self.op() in Pointwise.unary + Pointwise.representation: 148 | # Relaxing assert. clamp has > 1 input arguments. 149 | assert (len(self.input) >= 1) 150 | b = 2 * self.input[0].bytes 151 | f = self.input[0].size 152 | 153 | elif self.op() in Pointwise.exp_log + Pointwise.trig_trans + \ 154 | Pointwise.sqrt + Pointwise.error: 155 | assert (len(self.input) == 1) 156 | b = 2 * self.input[0].bytes 157 | f = self.input[0].size * 20 # estimate 158 | 159 | # Binary 160 | elif self.op() in Pointwise.comp + \ 161 | Pointwise.binary + Pointwise.ibinary + \ 162 | Pointwise.logical + Pointwise.ilogical: 163 | 164 | assert (len(self.input) == 2) 165 | out = Tensor.broadcast(self.input) 166 | 167 | if self.dir == "fprop": 168 | b = reduce(operator.add, [t.bytes for t in self.input]) 169 | # The output of comparison is bool 170 | if self.op() in Pointwise.comp: 171 | out = Tensor(out.shape, "bool") 172 | b += out.bytes 173 | f = out.size 174 | else: 175 | if (self.op() in ["add", "__add__", "sub", "__sub__", "__isub__"]): 176 | b = 2 * out.bytes 177 | f = 0 178 | elif (self.op() in ["__mul__", "__imul__", "__rmul__", "div", "__truediv__"]): 179 | b = 3 * out.bytes 180 | f = out.size 181 | else: 182 | e = f'{self.op()} bprop not supported yet. Please file a bug.' 183 | assert False, e 184 | 185 | elif self.op() in Pointwise.power: 186 | assert (len(self.input) == 2) 187 | out = Tensor.broadcast(self.input) 188 | b = reduce(operator.add, [t.bytes for t in self.input]) 189 | b += out.bytes 190 | f = out.size * 20 # estimate 191 | 192 | # Ternary 193 | elif self.op() in Pointwise.ternary: 194 | # Remove scalars 195 | tensors = list(filter(lambda x: x.shape != [], self.input)) 196 | assert len(tensors) == 3 197 | out = Tensor.broadcast(tensors) 198 | b = reduce(operator.add, [t.bytes for t in tensors]) 199 | b += out.bytes 200 | f = 3 * out.size 201 | 202 | else: 203 | e = f'{self.op()} not supported yet. Please file a bug.' 204 | assert False, e 205 | 206 | return b, f 207 | 208 | def bytes(self): 209 | b, f = self.bytes_flops() 210 | return b 211 | 212 | def flops(self): 213 | b, f = self.bytes_flops() 214 | return f 215 | -------------------------------------------------------------------------------- /pyprof/prof/pooling.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | 4 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 5 | # 6 | # Licensed under the Apache License, Version 2.0 (the "License"); 7 | # you may not use this file except in compliance with the License. 8 | # You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, software 13 | # distributed under the License is distributed on an "AS IS" BASIS, 14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | # See the License for the specific language governing permissions and 16 | # limitations under the License. 17 | 18 | from collections import OrderedDict 19 | from .utility import Utility 20 | 21 | # Work in progress. 22 | 23 | 24 | #poolFuncs = ["max_pool2d_with_indices_forward", "max_pool2d_with_indices"] 25 | class MaxPool2d(object): 26 | 27 | def parse(marker): 28 | 29 | def convert2Tuple(arg): 30 | assert (arg['type'] in ["int", "tuple"]) 31 | if arg['type'] == "int": 32 | return (arg['value'], arg['value']) 33 | else: 34 | return arg['value'] 35 | 36 | mod = marker['mod'] 37 | op = marker['op'] 38 | args = marker['args'] 39 | assert (mod == "torch.nn.functional") 40 | assert (op == "max_pool2d") 41 | assert (len(args) >= 2) 42 | 43 | #input 44 | assert (args[0]['name'] == "") 45 | inp = args[0] 46 | assert (inp['type'] == "tensor") 47 | i = inp['shape'] 48 | t = inp['dtype'] 49 | assert (len(i) == 4) #nchw tensor 50 | 51 | #kernel 52 | if (args[1]['name'] == ""): 53 | k = args[1] 54 | else: 55 | k = list(filter(lambda x: x['name'] == "kernel_size", args))[0] 56 | k = convert2Tuple(k) 57 | 58 | #stride 59 | s = k #default value 60 | if ((len(args) >= 3) and args[2] == ""): 61 | s = args[2] 62 | s = convert2Tuple(s) 63 | elif any(x['name'] == "stride" for x in args): 64 | s = list(filter(lambda x: x['name'] == "stride", args))[0] 65 | s = convert2Tuple(s) 66 | 67 | #padding 68 | p = (0, 0) 69 | if ((len(args) >= 4) and args[3] == ""): 70 | p = args[3] 71 | p = convert2Tuple(p) 72 | elif any(x['name'] == "padding" for x in args): 73 | p = list(filter(lambda x: x['name'] == "padding", args))[0] 74 | p = convert2Tuple(p) 75 | 76 | params = OrderedDict([('T', i), ('K', k), ('s', s), ('p', p), ('type', t)]) 77 | return params 78 | -------------------------------------------------------------------------------- /pyprof/prof/prof.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | 4 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 5 | # 6 | # Licensed under the Apache License, Version 2.0 (the "License"); 7 | # you may not use this file except in compliance with the License. 8 | # You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, software 13 | # distributed under the License is distributed on an "AS IS" BASIS, 14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | # See the License for the specific language governing permissions and 16 | # limitations under the License. 17 | """ 18 | This script reads the output (Python dictionary) created by parse.py. 19 | For every kernel (line) in the input it determines 20 | module / class name e.g. torch.nn.functional 21 | operator name e.g. linear 22 | kernel parameters e.g. GEMM M, N, K, datatype 23 | bytes 24 | flops 25 | tensor core usage 26 | direction (fprop, bprop) 27 | and other things. Please see the tool usage. 28 | """ 29 | 30 | from .usage import parseArgs 31 | from .output import Output 32 | from .utility import Utility 33 | from .pointwise import Pointwise 34 | from .convert import Convert 35 | from .blas import * 36 | from .embedding import Embedding 37 | from .reduction import * 38 | from .dropout import Dropout 39 | from .softmax import * 40 | #from pooling import * # work in progress 41 | from .linear import Linear 42 | from .optim import Adam 43 | from .misc import * 44 | from .conv import Conv 45 | from .activation import Activation 46 | from .index_slice_join_mutate import Cat, Reshape, MaskedScatter, Gather, Nonzero, IndexSelect, MaskedSelect 47 | from .recurrentCell import RNNCell 48 | from .normalization import BatchNorm 49 | from .randomSample import RandPerm 50 | from .loss import MSELoss 51 | from .data import Data 52 | from .memory import OneZero, Fill, Full 53 | 54 | 55 | def findFpropKernel(seq): 56 | #Find the last fprop kernel with the same seqId 57 | #First look at seqId and then at altSeqId 58 | for idx in reversed(range(len(kernels))): 59 | k = kernels[idx] 60 | if (seq in k['seqId']) and (k['dir'] == "fprop"): 61 | return idx 62 | 63 | for idx in reversed(range(len(kernels))): 64 | k = kernels[idx] 65 | if (seq in k['altSeqId']) and (k['dir'] == "fprop"): 66 | return idx 67 | 68 | return -1 69 | #print("Error: seqId {} not found.".format(seq), file=sys.stderr) 70 | #assert False 71 | 72 | 73 | def foo(mod, op, d): 74 | if (op[0] == "linear"): 75 | xx = Linear(d) 76 | 77 | # rnncell, lstmcell, grucell 78 | elif (mod[0] in ["LSTMCell", "GRUCell"]) and (op[0] == "forward"): 79 | xx = RNNCell(d) 80 | 81 | elif op[0] in [ 82 | "conv1d", 83 | "conv2d", 84 | ]: 85 | xx = Conv(d) 86 | 87 | elif (op[0] in Pointwise.ops): 88 | xx = Pointwise(d) 89 | 90 | elif (op[0] in Convert.ops): 91 | xx = Convert(d) 92 | 93 | elif op[0] in ["__matmul__", "matmul"]: 94 | xx = Matmul(d) 95 | 96 | elif op[0] == "embedding": 97 | xx = Embedding(d) 98 | 99 | #reduction 100 | elif op[0] == "sum": 101 | xx = Sum(d) 102 | 103 | elif op[0] == "mean": 104 | xx = Mean(d) 105 | 106 | elif op[0] == "norm": 107 | xx = Norm(d) 108 | 109 | elif op[0] == "dropout": 110 | xx = Dropout(d) 111 | 112 | #Index, Slice, Join, Mutate 113 | elif (op[0] == "cat"): 114 | xx = Cat(d) 115 | 116 | elif (op[0] == "reshape"): 117 | xx = Reshape(d) 118 | 119 | elif (op[0] == "masked_scatter_"): 120 | xx = MaskedScatter(d) 121 | 122 | elif (op[0] == "gather"): 123 | xx = Gather(d) 124 | 125 | elif (op[0] == "nonzero"): 126 | xx = Nonzero(d) 127 | 128 | elif (op[0] == "index_select"): 129 | xx = IndexSelect(d) 130 | 131 | elif (op[0] == "masked_select"): 132 | xx = MaskedSelect(d) 133 | 134 | #blas 135 | elif op[0] in ["addmm", "addmm_"]: 136 | xx = Addmm(d) 137 | 138 | elif op[0] == "mm": 139 | xx = Mm(d) 140 | 141 | elif op[0] == "bmm": 142 | xx = Bmm(d) 143 | 144 | #softmax 145 | elif op[0] == "softmax": 146 | xx = Softmax(d) 147 | 148 | elif op[0] == "log_softmax": 149 | xx = LogSoftmax(d) 150 | 151 | #loss 152 | elif op[0] == "mse_loss": 153 | xx = MSELoss(d) 154 | 155 | #optimizers 156 | elif op[0] == "adam": 157 | xx = Adam(d) 158 | 159 | #normalization 160 | elif op[0] == "batch_norm": 161 | xx = BatchNorm(d) 162 | 163 | #random 164 | elif op[0] == "randperm": 165 | xx = RandPerm(d) 166 | 167 | #memory 168 | elif op[0] in OneZero.ops: 169 | xx = OneZero(d) 170 | 171 | elif op[0] == "fill_": 172 | xx = Fill(d) 173 | 174 | elif op[0] == "full": 175 | xx = Full(d) 176 | 177 | #misc 178 | elif op[0] == "copy_": 179 | xx = Copy(d) 180 | 181 | elif op[0] == "clone": 182 | xx = Clone(d) 183 | 184 | elif op[0] == "contiguous": 185 | xx = Contiguous(d) 186 | 187 | elif op[0] == "any": 188 | xx = Any(d) 189 | 190 | elif (op[0] in Activation.ops): 191 | xx = Activation(d) 192 | 193 | elif op[0] == "to": 194 | xx = Convert(d) 195 | 196 | else: 197 | xx = Foo(d) 198 | 199 | return xx 200 | 201 | 202 | def main(): 203 | #Read cmd line arguments 204 | cmdArgs = parseArgs() 205 | 206 | output = Output(cmdArgs) 207 | output.header() 208 | 209 | idx = -1 210 | #Read in all the kernel info 211 | for line in cmdArgs.file: 212 | idx += 1 213 | kernel = eval(line) 214 | assert (kernel) 215 | kernels.append(kernel) 216 | 217 | k = kernel 218 | d = Data(k) 219 | 220 | mod = k['mod'] 221 | op = k['op'] 222 | 223 | flops = 0 224 | params = {"na": "na"} 225 | tc = "na" 226 | bytes = 0 227 | 228 | if (d.dir == "bprop"): 229 | d.seqMarker = k['seqMarker'] 230 | seq = k['seqId'] 231 | if len(seq) > 1: 232 | pass 233 | seq = k['seqId'][:1] 234 | assert (len(seq) == 1), seq 235 | #assert (seq[0] != 0) 236 | assert (len(d.seqMarker) > 0) 237 | #If there is no useful marker associated, use the 238 | #sequence number to find the kernel from fprop 239 | if len(d.argMarker) == 0: 240 | index = findFpropKernel(seq[0]) 241 | if index >= 0: 242 | d.argMarker = kernels[index]['marker'] 243 | d.modMarker = kernels[index]['reprMarkers'] 244 | mod = kernels[index]['mod'] 245 | op = kernels[index]['op'] 246 | 247 | d.layer = kernels[index]['layer'] 248 | d.trace = kernels[index]['trace'] 249 | 250 | # Check if marker has our annotations 251 | if len(d.argMarker) and Utility.hasNVTX(d.argMarker[0]): 252 | 253 | xx = foo(mod, op, d) 254 | 255 | bytes = xx.bytes() 256 | flops = xx.flops() 257 | op = xx.op() 258 | params = xx.params() 259 | tc = xx.tc() 260 | 261 | if type(op) is list: 262 | if len(op): 263 | op = op[0] 264 | else: 265 | op = "" 266 | 267 | if type(mod) is list: 268 | if len(mod): 269 | mod = mod[0] 270 | else: 271 | mod = "" 272 | 273 | d.index = idx + 1 274 | 275 | # The following 8 come from operator class functions. 276 | d.setParams(params) 277 | d.tc = tc 278 | d.flops = flops 279 | d.bytes = bytes 280 | d.mod = mod 281 | d.op = op 282 | 283 | output.data(d) 284 | 285 | 286 | kernels = [] 287 | if __name__ == '__main__': 288 | main() 289 | -------------------------------------------------------------------------------- /pyprof/prof/randomSample.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | 4 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 5 | # 6 | # Licensed under the Apache License, Version 2.0 (the "License"); 7 | # you may not use this file except in compliance with the License. 8 | # You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, software 13 | # distributed under the License is distributed on an "AS IS" BASIS, 14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | # See the License for the specific language governing permissions and 16 | # limitations under the License. 17 | 18 | from collections import OrderedDict 19 | from .utility import Utility 20 | from .base import OperatorLayerBase 21 | 22 | 23 | class RandPerm(OperatorLayerBase): 24 | 25 | def __init__(self, d): 26 | marker = eval(d.argMarker[0]) 27 | mod = marker['mod'] 28 | op = marker['op'] 29 | args = marker['args'] 30 | 31 | self.marker = marker 32 | self.mod_ = mod 33 | self.op_ = op 34 | self.args = args 35 | 36 | assert (mod == "torch") 37 | assert (op == "randperm") 38 | assert (len(args) == 1) 39 | n = args[0] 40 | assert n['type'] == "int" 41 | self.n = n['value'] 42 | 43 | def params(self): 44 | p = OrderedDict([('N', self.n)]) 45 | return p 46 | 47 | def tc(self): 48 | return "-" 49 | 50 | def op(self): 51 | return self.op_ 52 | 53 | def mod(self): 54 | return self.mod_ 55 | 56 | def bytes(self): 57 | return self.n * Utility.typeToBytes("int64") 58 | 59 | def flops(self): 60 | # Depends on RNG but this is probably a reasonable assumption. 61 | return self.n * 3 62 | -------------------------------------------------------------------------------- /pyprof/prof/recurrentCell.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | 4 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 5 | # 6 | # Licensed under the Apache License, Version 2.0 (the "License"); 7 | # you may not use this file except in compliance with the License. 8 | # You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, software 13 | # distributed under the License is distributed on an "AS IS" BASIS, 14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | # See the License for the specific language governing permissions and 16 | # limitations under the License. 17 | 18 | from collections import OrderedDict 19 | from .tc import TC_Whitelist 20 | from .utility import Utility 21 | from .base import OperatorLayerBase 22 | 23 | 24 | def hasTileSize(name): 25 | if ("sgemm" in name) or ("884gemm" in name) or ("hgemm" in name): 26 | return True 27 | else: 28 | return False 29 | 30 | 31 | def ctaTile(name): 32 | name = name.split("_") 33 | name = list(filter(lambda x: "x" in x, name)) 34 | name = list(filter(lambda x: "slice" not in x, name)) 35 | assert (len(name) == 1) 36 | name = name[0].split("x") 37 | assert (len(name) == 2) 38 | name = list(map(int, name)) 39 | return name[0], name[1] 40 | 41 | 42 | class RNNCell(OperatorLayerBase): 43 | """ 44 | This class supports RNNCell, LSTMCell and GRUCell. 45 | """ 46 | 47 | def __init__(self, d): 48 | marker = eval(d.argMarker[0]) 49 | mod = marker['mod'] 50 | op = marker['op'] 51 | args = marker['args'] 52 | 53 | self.marker = marker 54 | self.mod_ = mod 55 | self.op_ = op 56 | self.args = args 57 | 58 | self.name = d.name 59 | self.dir = d.dir 60 | self.sub = d.sub 61 | self.grid = d.grid 62 | 63 | assert (op == "forward") 64 | assert (mod in ["LSTMCell", "GRUCell", "RNNCell"]) 65 | assert (len(args) in [2, 3]) 66 | 67 | x, h = args[0], args[1] 68 | b1, ii = x['shape'] 69 | b2, hh = h['shape'] 70 | assert b1 == b2 71 | assert x['dtype'] == h['dtype'] 72 | t = x['dtype'] 73 | 74 | self.cell = mod 75 | self.inp = ii 76 | self.hid = hh 77 | self.b = b1 78 | self.type = t 79 | 80 | self.multiple = 1 81 | if self.cell == "LSTMCell": 82 | self.multiple = 4 83 | elif self.cell == "GRUCell": 84 | self.multiple = 3 85 | 86 | self.gemm = None 87 | self.m = None 88 | self.n = None 89 | self.k = None 90 | self.elems = 0 91 | 92 | self.bar() 93 | 94 | def params(self): 95 | if self.gemm is None: 96 | p = OrderedDict([('cell', self.cell), ('X', self.inp), ('H', self.hid), ('B', self.b), ('type', self.type)]) 97 | else: 98 | assert self.m is not None 99 | assert self.n is not None 100 | assert self.k is not None 101 | p = OrderedDict([('gemm', self.gemm), ('M', self.m), ('N', self.n), ('K', self.k), ('type', self.type)]) 102 | return p 103 | 104 | def tc(self): 105 | if "gemm" in self.name: 106 | if self.name in TC_Whitelist(): 107 | return 1 108 | return 0 109 | else: 110 | return "-" 111 | 112 | def op(self): 113 | return self.op_ 114 | 115 | def mod(self): 116 | return self.mod_ 117 | 118 | def bytes(self): 119 | if self.gemm is not None: 120 | m, n, k, t = self.m, self.n, self.k, self.type 121 | b = (m * k + k * n + m * n) * Utility.typeToBytes(t) 122 | elif self.elems != 0: 123 | b = self.elems * Utility.typeToBytes(self.type) 124 | else: 125 | b = 0 126 | return b 127 | 128 | def flops(self): 129 | if self.gemm is not None: 130 | m, n, k = self.m, self.n, self.k 131 | f = 2 * m * n * k 132 | elif self.elems != 0: 133 | f = 0 #TODO 134 | else: 135 | f = 0 136 | return f 137 | 138 | def bar(self): 139 | cell = self.cell 140 | X = self.inp 141 | H = self.hid 142 | B = self.b 143 | t = self.type 144 | subseqId = self.sub 145 | direc = self.dir 146 | name = self.name 147 | grid = self.grid 148 | multiple = self.multiple 149 | 150 | if direc == "fprop": 151 | subseqId = subseqId % 3 152 | if subseqId == 0: #layer gemm 153 | self.gemm = "layer" 154 | self.m = multiple * H 155 | self.n = B 156 | self.k = X 157 | elif subseqId == 1: #recurrent gemm 158 | self.gemm = "recur" 159 | self.m = multiple * H 160 | self.n = B 161 | self.k = H 162 | else: 163 | layerGemmElems = multiple * H * B 164 | recurGemmElems = multiple * H * B 165 | cElems = H * B 166 | hElems = H * B 167 | totElems = layerGemmElems + recurGemmElems + 2 * cElems + hElems 168 | self.elems = totElems 169 | 170 | else: 171 | if ("gemm" in name) and hasTileSize(name): #gemm 172 | #Get cta tile size 173 | tileX, tileY = ctaTile(name) 174 | #Get grid dimensions 175 | grid = grid.split(",") 176 | gridX, gridY, gridZ = map(lambda x: int(x), grid) 177 | 178 | gemmM = tileX * gridX 179 | gemmN = tileY * gridY 180 | 181 | if name[-3:] == "_nn": # dgrad 182 | if (gemmM == H): # recurrent dgrad 183 | #Ideally gemmN = B, but we have a limited set of tile sizes. 184 | gemmN = B 185 | gemmK = multiple * H 186 | 187 | self.gemm = "recur" 188 | self.m = gemmM 189 | self.n = gemmN 190 | self.k = gemmK 191 | 192 | elif (gemmM == X): # layer dgrad 193 | #assert(gemmN % B == 0) 194 | gemmK = multiple * H 195 | 196 | self.gemm = "layer" 197 | self.m = gemmM 198 | self.n = gemmN 199 | self.k = gemmK 200 | 201 | else: 202 | pass 203 | 204 | elif name[-3:] == "_nt": #wgrad 205 | if (gemmM == H): #recurrent wgrad 206 | assert (gemmN == multiple * H) 207 | gemmK = B 208 | 209 | self.gemm = "recur" 210 | self.m = gemmM 211 | self.n = gemmN 212 | self.k = gemmK 213 | 214 | elif (gemmM == X): #layer wgrad 215 | assert (gemmN == multiple * H) 216 | gemmK = B 217 | 218 | self.gemm = "layer" 219 | self.m = gemmM 220 | self.n = gemmN 221 | self.k = gemmK 222 | 223 | else: 224 | pass 225 | else: 226 | pass 227 | else: 228 | pass 229 | 230 | return 231 | -------------------------------------------------------------------------------- /pyprof/prof/reduction.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | 4 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 5 | # 6 | # Licensed under the Apache License, Version 2.0 (the "License"); 7 | # you may not use this file except in compliance with the License. 8 | # You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, software 13 | # distributed under the License is distributed on an "AS IS" BASIS, 14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | # See the License for the specific language governing permissions and 16 | # limitations under the License. 17 | 18 | from collections import OrderedDict 19 | from .utility import Utility 20 | from .base import OperatorLayerBase 21 | from .tensor import Tensor 22 | 23 | 24 | class Mean(OperatorLayerBase): 25 | 26 | def __init__(self, d): 27 | marker = eval(d.argMarker[0]) 28 | mod = marker['mod'] 29 | op = marker['op'] 30 | args = marker['args'] 31 | 32 | self.mod_ = mod 33 | self.op_ = op 34 | 35 | assert (mod in ["torch", "Tensor"]) 36 | assert (op == "mean") 37 | 38 | #Filter out named parameters 39 | args = list(filter(lambda x: x['name'] == '', args)) 40 | 41 | assert (len(args) <= 2) 42 | i = args[0] 43 | 44 | # The input can be a scalar or a tensor 45 | if 'shape' in i: # tensor 46 | self.input = Tensor(i['shape'], i['dtype']) 47 | else: # scalar 48 | assert ('value' in i) 49 | self.input = Tensor([], i['type']) 50 | 51 | self.dir = d.dir 52 | self.sub = d.sub 53 | 54 | def params(self): 55 | return str(self.input) 56 | 57 | def tc(self): 58 | return "-" 59 | 60 | def op(self): 61 | return self.op_ 62 | 63 | def mod(self): 64 | return self.mod_ 65 | 66 | def bytes(self): 67 | if self.sub == 0: 68 | return self.input.bytes + self.input.itemsize 69 | else: 70 | return 0 71 | 72 | def flops(self): 73 | if self.sub == 0: 74 | return self.input.size + 1 75 | else: 76 | return 0 77 | 78 | 79 | class Sum(OperatorLayerBase): 80 | 81 | def __init__(self, d): 82 | marker = eval(d.argMarker[0]) 83 | mod = marker['mod'] 84 | op = marker['op'] 85 | args = marker['args'] 86 | 87 | self.marker = marker 88 | self.mod_ = mod 89 | self.op_ = op 90 | self.args = args 91 | 92 | assert (mod in ["torch", "Tensor"]) 93 | assert (op == "sum") 94 | assert (len(args) >= 1) 95 | 96 | #Get input 97 | if (args[0]['name'] == ""): 98 | i = args[0] 99 | else: 100 | i = list(filter(lambda x: x['name'] == "input", args))[0] 101 | 102 | self.shape = i['shape'] 103 | self.type = i['dtype'] 104 | self.sub = d.sub 105 | 106 | def params(self): 107 | p = OrderedDict([('T', self.shape), ('type', self.type)]) 108 | return p 109 | 110 | def tc(self): 111 | return "-" 112 | 113 | def op(self): 114 | return self.op_ 115 | 116 | def mod(self): 117 | return self.mod_ 118 | 119 | def elems(self): 120 | return Utility.numElems(self.shape) 121 | 122 | def flops(self): 123 | # Note: This is incorrect, need to calculate actual flops (say via nvprof) 124 | return self.elems() 125 | 126 | def bytes(self): 127 | b = self.elems() * Utility.typeToBytes(self.type) 128 | if self.sub == 0: 129 | return b 130 | else: 131 | return 0 132 | 133 | 134 | class Norm(OperatorLayerBase): 135 | 136 | def __init__(self, d): 137 | marker = eval(d.argMarker[0]) 138 | mod = marker['mod'] 139 | op = marker['op'] 140 | args = marker['args'] 141 | 142 | self.marker = marker 143 | self.mod_ = mod 144 | self.op_ = op 145 | self.args = args 146 | 147 | assert (mod in ["torch", "Tensor"]) 148 | assert (op == "norm") 149 | #assert (len(args) == 1) 150 | i = args[0] 151 | self.shape = i['shape'] 152 | self.type = i['dtype'] 153 | self.sub = d.sub 154 | 155 | def params(self): 156 | p = OrderedDict([('T', self.shape), ('type', self.type)]) 157 | return p 158 | 159 | def elems(self): 160 | return Utility.numElems(self.shape) 161 | 162 | def bytes(self): 163 | b = self.elems() * Utility.typeToBytes(self.type) 164 | if self.sub == 0: 165 | return b 166 | else: 167 | return 0 168 | 169 | def flops(self): 170 | # square and add plus sqrt 171 | f = 2 * self.elems() + 1 172 | if self.sub == 0: 173 | return f 174 | else: 175 | return 0 176 | 177 | def tc(self): 178 | return "-" 179 | 180 | def op(self): 181 | return self.op_ 182 | 183 | def mod(self): 184 | return self.mod_ 185 | -------------------------------------------------------------------------------- /pyprof/prof/softmax.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | 4 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 5 | # 6 | # Licensed under the Apache License, Version 2.0 (the "License"); 7 | # you may not use this file except in compliance with the License. 8 | # You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, software 13 | # distributed under the License is distributed on an "AS IS" BASIS, 14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | # See the License for the specific language governing permissions and 16 | # limitations under the License. 17 | 18 | from .base import OperatorLayerBase 19 | from .tensor import Tensor 20 | 21 | 22 | class Softmax(OperatorLayerBase): 23 | 24 | def __init__(self, d): 25 | marker = eval(d.argMarker[0]) 26 | mod = marker['mod'] 27 | op = marker['op'] 28 | args = marker['args'] 29 | 30 | self.mod_ = mod 31 | self.op_ = op 32 | 33 | assert (mod == "torch.nn.functional") 34 | assert (op == "softmax") 35 | 36 | #Filter out named parameters 37 | args = list(filter(lambda x: x['name'] == '', args)) 38 | 39 | assert (len(args) <= 2) 40 | arg = args[0] 41 | self.input = Tensor(arg['shape'], arg['dtype']) 42 | self.dir = d.dir 43 | return 44 | 45 | def op(self): 46 | return self.op_ 47 | 48 | def mod(self): 49 | return self.mod_ 50 | 51 | def tc(self): 52 | return "-" 53 | 54 | def params(self): 55 | return str(self.input) 56 | 57 | def flops(self): 58 | # An approximation 59 | # http://ai.stanford.edu/~paskin/slam/javadoc/javaslam/util/Flops.html#exp() 60 | # TODO: consider direction 61 | e = self.input.size 62 | f = e * 20 # denominator, exp all elements and reduce 63 | f += e * 20 # numerator, exp all elements and divide 64 | return f 65 | 66 | def bytes(self): 67 | # TODO: verify 68 | b = self.input.bytes 69 | # fprop is 2 reads, 1 write 70 | # bprop is 4 reads, 1 write 71 | b *= 3 if self.dir == "fprop" else 5 72 | return b 73 | 74 | 75 | class LogSoftmax(OperatorLayerBase): 76 | 77 | def __init__(self, d): 78 | marker = eval(d.argMarker[0]) 79 | mod = marker['mod'] 80 | op = marker['op'] 81 | args = marker['args'] 82 | 83 | self.mod_ = mod 84 | self.op_ = op 85 | 86 | assert (mod in ["torch", "Tensor", "torch.nn.functional"]) 87 | assert (op == "log_softmax") 88 | 89 | #Filter out named parameters 90 | args = list(filter(lambda x: x['name'] == '', args)) 91 | 92 | assert (len(args) <= 2) 93 | 94 | #Get input 95 | if (args[0]['name'] == ""): 96 | i = args[0] 97 | else: 98 | i = list(filter(lambda x: x['name'] == "input", args))[0] 99 | 100 | self.input = Tensor(i['shape'], i['dtype']) 101 | self.dir = d.dir 102 | return 103 | 104 | def op(self): 105 | return self.op_ 106 | 107 | def mod(self): 108 | return self.mod_ 109 | 110 | def tc(self): 111 | return "-" 112 | 113 | def params(self): 114 | return str(self.input) 115 | 116 | def flops(self): 117 | # An approximation 118 | # http://ai.stanford.edu/~paskin/slam/javadoc/javaslam/util/Flops.html#exp() 119 | # TODO: consider direction 120 | e = self.input.size 121 | f = e * 20 # denominator, exp all elements and reduce 122 | f += e # numerator, just a subtraction 123 | return f 124 | 125 | def bytes(self): 126 | # TODO: verify 127 | b = self.input.bytes 128 | # fprop is 2 reads, 1 write 129 | # bprop is 4 reads, 1 write 130 | b *= 3 if self.dir == "fprop" else 5 131 | return b 132 | -------------------------------------------------------------------------------- /pyprof/prof/tc.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | 4 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 5 | # 6 | # Licensed under the Apache License, Version 2.0 (the "License"); 7 | # you may not use this file except in compliance with the License. 8 | # You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, software 13 | # distributed under the License is distributed on an "AS IS" BASIS, 14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | # See the License for the specific language governing permissions and 16 | # limitations under the License. 17 | 18 | 19 | class TC_Whitelist: 20 | whitelist = ['h884', 's884', 'h1688', 's1688', 'hmma', 'i8816', '16816', 21 | 'dgrad_1x1_stride_2x2', 'first_layer_wgrad_kernel', 'conv1x1', 22 | 'conv2d_c1_k1', 'direct_group', 'xmma_implicit_gemm', 23 | 'xmma_sparse_conv', 'xmma_warp_specialized_implicit_gemm', 24 | 'xmma_gemm', 'xmma_sparse_gemm', 'c1688'] 25 | def __contains__(self, item): 26 | for pattern in self.whitelist: 27 | if pattern in item: 28 | return True 29 | return False 30 | -------------------------------------------------------------------------------- /pyprof/prof/tensor.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | # Copyright (c) 2020, Aditya Agrawal. 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | from functools import reduce 18 | import numpy as np 19 | from .dtype import Dtype 20 | 21 | class Tensor(object): 22 | def __init__(self, shape, dtype): 23 | assert type(shape) in [tuple, list] 24 | assert dtype in Dtype.types() 25 | self._shape = list(shape) 26 | self._dtype = dtype 27 | 28 | def __str__(self): 29 | t = Dtype(self.dtype) 30 | return str(self.shape).replace(" ", "") + str(t) 31 | 32 | @property 33 | def ndim(self): 34 | # can be 0 for scalars 35 | return len(self._shape) 36 | 37 | @property 38 | def shape(self): 39 | # can be () for scalars 40 | return self._shape 41 | 42 | @property 43 | def size(self): 44 | # number of elements 45 | return reduce(lambda x, y: x * y, self.shape, 1) 46 | 47 | @property 48 | def dtype(self): 49 | return self._dtype 50 | 51 | @property 52 | def itemsize(self): 53 | return Dtype(self.dtype).itemsize 54 | 55 | @property 56 | def bytes(self): 57 | return self.size * self.itemsize 58 | 59 | @staticmethod 60 | def broadcast(tensors): 61 | r''' 62 | The input is a list of Tensors. 63 | The output is a Tensor. 64 | ''' 65 | 66 | assert len(tensors) > 1 67 | shape = tensors[0].shape 68 | # TODO: Assume the output dtype is the same as the first arg 69 | dt = tensors[0].dtype 70 | 71 | # Check if shapes are different 72 | if any(t.shape != shape for t in tensors): 73 | x = [np.empty(t.shape, t.dtype) for t in tensors] 74 | try: 75 | out = np.broadcast(*x) 76 | except: 77 | assert False # not broadcastable 78 | return Tensor(out.shape, dt) 79 | else: 80 | return Tensor(shape, dt) 81 | 82 | def main(): 83 | for shape in [(), (1,), (3,7), (3,7,11)]: 84 | for dt in Dtype.types(): 85 | t = Tensor(shape, dt) 86 | print(t.ndim, str(t.shape).replace(" ", ""), \ 87 | t.size, t.dtype, t.itemsize, t.bytes, t) 88 | 89 | # Broadcast test 90 | a = Tensor([1,3], "int") 91 | b = Tensor([3,1], "float") 92 | c = Tensor([1,3], "float64") 93 | d = np.ones([], "float64") 94 | out = Tensor.broadcast([a,b,c,d]) 95 | print(out.shape) 96 | 97 | if __name__ == '__main__': 98 | main() 99 | -------------------------------------------------------------------------------- /pyprof/prof/usage.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | 4 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 5 | # 6 | # Licensed under the Apache License, Version 2.0 (the "License"); 7 | # you may not use this file except in compliance with the License. 8 | # You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, software 13 | # distributed under the License is distributed on an "AS IS" BASIS, 14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | # See the License for the specific language governing permissions and 16 | # limitations under the License. 17 | 18 | import sys 19 | import argparse 20 | 21 | 22 | def parseArgs(): 23 | """ 24 | Print usage and parse arguments. 25 | """ 26 | 27 | def check_cols(value): 28 | valid = [ 29 | "idx", "seq", "altseq", "tid", "layer", "trace", "dir", "sub", "mod", "op", "kernel", "params", "sil", "tc", 30 | "device", "stream", "grid", "block", "flops", "bytes" 31 | ] 32 | cols = value.split(",") 33 | for col in cols: 34 | if col not in valid: 35 | raise argparse.ArgumentTypeError( 36 | "{} is not a valid column name. Valid column names are {}.".format(col, ",".join(valid)) 37 | ) 38 | return cols 39 | 40 | def openFile(f): 41 | try: 42 | d = open(f, "r") 43 | return d 44 | except IOError: 45 | print("Error opening file {}. Exiting.".format(f), file=sys.stderr) 46 | sys.exit(1) 47 | 48 | parser = argparse.ArgumentParser( 49 | prog=sys.argv[0], description="PyTorch Profiler", formatter_class=argparse.RawTextHelpFormatter 50 | ) 51 | parser.add_argument("file", nargs='?', type=str, default=None, help="Output of parse.py (Python dictionary).") 52 | 53 | parser.add_argument( 54 | "-c", type=check_cols, default="idx,dir,sub,mod,op,kernel,params,sil", 55 | help='''Comma seperated names of columns to print. 56 | idx: Index 57 | seq: PyTorch Sequence Id 58 | altseq: PyTorch Alternate Sequence Id 59 | tid: Thread Id 60 | layer: User annotated NVTX string (can be nested) 61 | trace: Function Call Trace 62 | dir: Direction 63 | sub: Sub Sequence Id 64 | mod: Module 65 | op: Operattion 66 | kernel: Kernel Name 67 | params: Parameters 68 | sil: Silicon Time (in ns) 69 | tc: Tensor Core Usage 70 | device: GPU Device Id 71 | stream: Stream Id 72 | grid: Grid Dimensions 73 | block: Block Dimensions 74 | flops: Floating point ops (FMA = 2 FLOPs) 75 | bytes: Number of bytes in and out of DRAM 76 | e.g. -c idx,kernel,sil''' 77 | ) 78 | 79 | group = parser.add_mutually_exclusive_group() 80 | group.add_argument("--csv", action="store_true", default=False, help="Print a CSV output.") 81 | group.add_argument("-w", type=int, default=0, help="Width of columnated output.") 82 | 83 | args = parser.parse_args() 84 | if args.file is None: 85 | args.file = sys.stdin 86 | else: 87 | args.file = openFile(args.file) 88 | return args 89 | -------------------------------------------------------------------------------- /pyprof/prof/utility.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | 4 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 5 | # 6 | # Licensed under the Apache License, Version 2.0 (the "License"); 7 | # you may not use this file except in compliance with the License. 8 | # You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, software 13 | # distributed under the License is distributed on an "AS IS" BASIS, 14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | # See the License for the specific language governing permissions and 16 | # limitations under the License. 17 | 18 | from functools import reduce 19 | 20 | 21 | class Utility(object): 22 | 23 | @staticmethod 24 | def numElems(shape): 25 | assert (type(shape) == tuple) 26 | return reduce(lambda x, y: x * y, shape, 1) 27 | 28 | @staticmethod 29 | def typeToBytes(t): 30 | if (t in ["uint8", "int8", "byte", "char", "bool"]): 31 | return 1 32 | elif (t in ["float16", "half", "int16", "short"]): 33 | return 2 34 | elif (t in ["float32", "float", "int32", "int"]): 35 | return 4 36 | elif (t in ["int64", "long", "float64", "double"]): 37 | return 8 38 | assert False 39 | 40 | @staticmethod 41 | def typeToString(t): 42 | if (t in ["uint8", "byte", "char"]): 43 | return "uint8" 44 | elif (t in [ 45 | "int8", 46 | ]): 47 | return "int8" 48 | elif (t in [ 49 | "int16", 50 | "short", 51 | ]): 52 | return "int16" 53 | elif (t in ["float16", "half"]): 54 | return "fp16" 55 | elif (t in ["float32", "float"]): 56 | return "fp32" 57 | elif (t in [ 58 | "int32", 59 | "int", 60 | ]): 61 | return "int32" 62 | elif (t in ["int64", "long"]): 63 | return "int64" 64 | elif (t in [ 65 | "float64", 66 | "double", 67 | ]): 68 | return "fp64" 69 | elif (t in [ 70 | "bool", 71 | ]): 72 | return "bool" 73 | assert False 74 | 75 | @staticmethod 76 | def hasNVTX(marker): 77 | if type(marker) is str: 78 | try: 79 | marker = eval(marker) 80 | except: 81 | return False 82 | 83 | if type(marker) is dict: 84 | keys = marker.keys() 85 | return ("mod" in keys) and ("op" in keys) and ("args" in keys) 86 | else: 87 | return False 88 | 89 | @staticmethod 90 | def isscalar(t): 91 | return (t in ["float", "int"]) 92 | -------------------------------------------------------------------------------- /qa/L0_docs/test.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | TEST_LOG="./docs.log" 17 | 18 | rm -f $TEST_LOG 19 | RET=0 20 | 21 | apt-get update && \ 22 | apt-get install -y --no-install-recommends python3-pip zip doxygen && \ 23 | rm -rf /root/.cache/pip && \ 24 | pip uninstall -y Sphinx && \ 25 | pip3 install --upgrade setuptools wheel && \ 26 | pip3 install --upgrade sphinx==2.4.4 sphinx-rtd-theme==0.4.3 \ 27 | nbsphinx==0.6.0 breathe==4.14.1 28 | 29 | set +e 30 | 31 | # Set visitor script to be included on every HTML page 32 | export VISITS_COUNTING_SCRIPT=//assets.adobedtm.com/b92787824f2e0e9b68dc2e993f9bd995339fe417/satelliteLib-7ba51e58dc61bcb0e9311aadd02a0108ab24cc6c.js 33 | 34 | (cd docs && rm -f pyprof_docs.zip && \ 35 | make BUILDDIR=/opt/pytorch/pyprof/qa/L0_docs/build clean html) > $TEST_LOG 2>&1 36 | if [ $? -ne 0 ]; then 37 | RET=1 38 | fi 39 | 40 | (cd build && zip -r ../pyprof_docs.zip html) 41 | if [ $? -ne 0 ]; then 42 | RET=1 43 | fi 44 | 45 | set -e 46 | 47 | if [ $RET -eq 0 ]; then 48 | echo -e "\n***\n*** Test Passed\n***" 49 | else 50 | cat $TEST_LOG 51 | echo -e "\n***\n*** Test FAILED\n***" 52 | fi 53 | 54 | exit $RET 55 | -------------------------------------------------------------------------------- /qa/L0_lenet/test.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | TEST_LOG="./data.log" 17 | 18 | 19 | rm -f $TEST_LOG 20 | RET=0 21 | 22 | set +e 23 | 24 | ./test_lenet.py > $TEST_LOG 2>&1 25 | if [ $? -ne 0 ]; then 26 | RET=1 27 | fi 28 | 29 | set -e 30 | 31 | if [ $RET -eq 0 ]; then 32 | echo -e "\n***\n*** Test Passed\n***" 33 | else 34 | cat $TEST_LOG 35 | echo -e "\n***\n*** Test FAILED\n***" 36 | fi 37 | 38 | exit $RET 39 | -------------------------------------------------------------------------------- /qa/L0_lenet/test_lenet.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | 4 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 5 | # 6 | # Licensed under the Apache License, Version 2.0 (the "License"); 7 | # you may not use this file except in compliance with the License. 8 | # You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, software 13 | # distributed under the License is distributed on an "AS IS" BASIS, 14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | # See the License for the specific language governing permissions and 16 | # limitations under the License. 17 | ''' 18 | This test runs lenet through the 3 steps on pyprof. 19 | It ensures: 20 | - A database is created from nsys 21 | - A dict is created from pyprof.parse 22 | - A csv with valid data is created from pyprof.prof 23 | ''' 24 | 25 | import subprocess 26 | from pathlib import Path 27 | import unittest 28 | import csv 29 | 30 | unittest.TestLoader.sortTestMethodsUsing = None 31 | 32 | 33 | class TestPyprofWithLenet(unittest.TestCase): 34 | 35 | @classmethod 36 | def setUpClass(cls): 37 | cls.pyprof_path = Path("/opt/pytorch/pyprof/pyprof/examples") 38 | 39 | def test_run_nsys(self): 40 | # Print a blank line to make the test output more readable 41 | print() 42 | command = "nsys profile -f true -o lenet --export sqlite python " + self.pyprof_path.as_posix() + "/lenet.py" 43 | command_tokens = command.split() 44 | 45 | ret_val = subprocess.run(command_tokens) 46 | 47 | self.assertEqual(ret_val.returncode, 0) 48 | db_path = Path('./lenet.sqlite') 49 | self.assertTrue(db_path.exists()) 50 | 51 | def test_run_parse(self): 52 | command = "python -m pyprof.parse lenet.sqlite" 53 | command_tokens = command.split() 54 | 55 | with open("lenet.dict", "w") as f: 56 | ret_val = subprocess.run(command_tokens, stdout=f) 57 | 58 | self.assertEqual(ret_val.returncode, 0) 59 | dict_path = Path('./lenet.dict') 60 | self.assertTrue(dict_path.exists()) 61 | 62 | def test_run_profile(self): 63 | lenet_csv = "./lenet.csv" 64 | command = "python -m pyprof.prof --csv lenet.dict" 65 | command_tokens = command.split() 66 | with open(lenet_csv, "w") as f: 67 | ret_val = subprocess.run(command_tokens, stdout=f) 68 | 69 | self.assertEqual(ret_val.returncode, 0) 70 | csv_path = Path(lenet_csv) 71 | self.assertTrue(csv_path.exists()) 72 | 73 | directions = ["bprop", "fprop"] 74 | ops = [ 75 | "", # covers the "reduce_kernel" kernel, op will be an empty string in the report 76 | "add_", 77 | "backward", 78 | "bias", 79 | "conv2d", 80 | "linear", 81 | "max_pool2d", 82 | "mse_loss", 83 | "relu", 84 | "sum", 85 | ] 86 | 87 | with open("lenet.csv", "r") as csvfile: 88 | reader = csv.DictReader(csvfile) 89 | for row in reader: 90 | # verify direction 91 | self.assertTrue(row['Direction'] in directions, f"Row direction: {row['Direction']}") 92 | # verify op 93 | self.assertTrue(row['Op'] in ops, f"Row op: {row['Op']}") 94 | # verify final id is in the range 95 | # Which kernel cuDNN uses is nondeterministic. 96 | # While the exact number of kernels is not clear, for this network, it should be [60, 70] 97 | self.assertTrue(int(row['Idx']) in range(65, 75), f"Final Idx: {row['Idx']}") 98 | 99 | 100 | if __name__ == '__main__': 101 | unittest.main(verbosity=2) 102 | -------------------------------------------------------------------------------- /qa/L0_nvtx/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import test_pyprof_nvtx.TestPyProfNvtx as TestPyProfNvtx 16 | -------------------------------------------------------------------------------- /qa/L0_nvtx/test.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | TEST_LOG="./nvtx.log" 17 | 18 | 19 | apt-get update && \ 20 | apt-get install -y --no-install-recommends python3 21 | 22 | rm -f $TEST_LOG 23 | RET=0 24 | 25 | ./test_pyprof_nvtx.py > $TEST_LOG 2>&1 26 | if [ $? -ne 0 ]; then 27 | RET=1 28 | fi 29 | 30 | set -e 31 | 32 | if [ $RET -eq 0 ]; then 33 | echo -e "\n***\n*** Test Passed\n***" 34 | else 35 | cat $TEST_LOG 36 | echo -e "\n***\n*** Test FAILED\n***" 37 | fi 38 | 39 | exit $RET -------------------------------------------------------------------------------- /qa/L0_pyprof_data/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA/PyProf/218dcc183bf7fdf97dbfc648878a3d09aea3b199/qa/L0_pyprof_data/__init__.py -------------------------------------------------------------------------------- /qa/L0_pyprof_data/test.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | TEST_LOG="./data.log" 17 | 18 | 19 | apt-get update && \ 20 | apt-get install -y --no-install-recommends python 21 | 22 | rm -f $TEST_LOG 23 | RET=0 24 | 25 | ./test_pyprof_data.py > $TEST_LOG 2>&1 26 | if [ $? -ne 0 ]; then 27 | RET=1 28 | fi 29 | 30 | set -e 31 | 32 | if [ $RET -eq 0 ]; then 33 | echo -e "\n***\n*** Test Passed\n***" 34 | else 35 | cat $TEST_LOG 36 | echo -e "\n***\n*** Test FAILED\n***" 37 | fi 38 | 39 | exit $RET -------------------------------------------------------------------------------- /qa/L0_pyprof_data/test_pyprof_data.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | 4 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 5 | # 6 | # Licensed under the Apache License, Version 2.0 (the "License"); 7 | # you may not use this file except in compliance with the License. 8 | # You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, software 13 | # distributed under the License is distributed on an "AS IS" BASIS, 14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | # See the License for the specific language governing permissions and 16 | # limitations under the License. 17 | ''' 18 | This test creates 2 kernels and exercises the pyprof code for generating their representation. 19 | ''' 20 | import inspect 21 | import unittest 22 | 23 | from pyprof.prof.data import Data 24 | from pyprof.prof.prof import foo 25 | 26 | 27 | class TestPyProfData(unittest.TestCase): 28 | 29 | def __init__(self, testName): 30 | super().__init__(testName) 31 | 32 | def setUp(self): 33 | pass 34 | 35 | def tearDown(self): 36 | pass 37 | 38 | def test_data(self): 39 | kernels = [ 40 | { 41 | 'kShortName': 42 | 'elementwise_kernel', 43 | 'kDuration': 44 | 2848, 45 | 'layer': [], 46 | 'trace': [], 47 | 'reprMarkers': [], 48 | 'marker': 49 | [ 50 | "{'mod': 'Tensor', 'op': 'float', 'args': [{'name': '', 'type': 'tensor', 'shape': (18, 104, 160), 'dtype': 'bool'}]}" 51 | ], 52 | 'seqMarker': ['to, seq = 60471'], 53 | 'seqId': [60471], 54 | 'subSeqId': 55 | 0, 56 | 'altSeqId': [], 57 | 'dir': 58 | 'fprop', 59 | 'mod': ['Tensor'], 60 | 'op': ['float'], 61 | 'tid': 62 | 1431533376, 63 | 'device': 64 | 0, 65 | 'stream': 66 | 7, 67 | 'grid': (585, 1, 1), 68 | 'block': (512, 1, 1), 69 | 'kLongName': 70 | 'void at::native::elementwise_kernel<512, 1, void at::native::gpu_kernel_impl(at::TensorIterator&)::{lambda(bool)#1}>(at::TensorIterator&, void at::native::copy_kernel_impl(at::TensorIterator&)::{lambda(bool)#1} const&)::{lambda(int)#1}>(int, void at::native::gpu_kernel_impl(at::TensorIterator&)::{lambda(bool)#1}>(at::TensorIterator&, void at::native::copy_kernel_impl(at::TensorIterator&)::{lambda(bool)#1} const&)::{lambda(int)#1})' 71 | }, 72 | { 73 | 'kShortName': 74 | 'elementwise_kernel', 75 | 'kDuration': 76 | 201182, 77 | 'layer': [], 78 | 'trace': [], 79 | 'reprMarkers': [], 80 | 'marker': 81 | [ 82 | "{'mod': 'Tensor', 'op': 'clone', 'args': [{'name': '', 'type': 'tensor', 'shape': (18, 4, 416, 640), 'dtype': 'float32'}]}" 83 | ], 84 | 'seqMarker': ['clone, seq = 60161'], 85 | 'seqId': [60161], 86 | 'subSeqId': 87 | 0, 88 | 'altSeqId': [], 89 | 'dir': 90 | 'fprop', 91 | 'mod': ['Tensor'], 92 | 'op': ['clone'], 93 | 'tid': 94 | 1431533376, 95 | 'device': 96 | 0, 97 | 'stream': 98 | 7, 99 | 'grid': (37440, 1, 1), 100 | 'block': (128, 1, 1), 101 | 'kLongName': 102 | 'void at::native::elementwise_kernel<128, 4, void at::native::gpu_kernel_impl(at::TensorIterator&)::{lambda(float)#1}>(at::TensorIterator&, void at::native::copy_kernel_impl(at::TensorIterator&)::{lambda(float)#1} const&)::{lambda(int)#2}>(int, void at::native::gpu_kernel_impl(at::TensorIterator&)::{lambda(float)#1}>(at::TensorIterator&, void at::native::copy_kernel_impl(at::TensorIterator&)::{lambda(float)#1} const&)::{lambda(int)#2})' 103 | }, 104 | ] 105 | 106 | for k in kernels: 107 | d = Data(k) 108 | mod = k['mod'] 109 | op = k['op'] 110 | xx = foo(mod, op, d) 111 | d.setParams(xx.params()) 112 | 113 | 114 | def run_tests(test_name): 115 | dummy = TestPyProfData(test_name) 116 | test_cases = list( 117 | filter(lambda x: 'test_' in x, map(lambda x: x[0], inspect.getmembers(dummy, predicate=inspect.ismethod))) 118 | ) 119 | print(f'Running tests for {test_name}') 120 | suite = unittest.TestSuite() 121 | for test_case in test_cases: 122 | suite.addTest(TestPyProfData(test_case)) 123 | result = unittest.TextTestRunner(verbosity=2).run(suite) 124 | if result.wasSuccessful(): 125 | exit(0) 126 | else: 127 | exit(1) 128 | 129 | 130 | if __name__ == '__main__': 131 | run_tests('test_data') 132 | -------------------------------------------------------------------------------- /qa/common/check_copyright.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | 3 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | import argparse 18 | import os 19 | import re 20 | 21 | FLAGS = None 22 | SKIP_EXTS = ('jpeg', 'jpg', 'pgm', 'png', 23 | 'log', 'serverlog', 24 | 'preprocessed', 'jmx', 'gz', 25 | 'caffemodel', 'json') 26 | SKIP_PATHS = ('requirements.txt', 27 | 'requirements/requirements_nsys.txt', 28 | 'requirements/requirements.txt', 29 | 'qa/L0_docs/VERSION', 30 | 'LICENSE', 31 | 'VERSION', 32 | 'MANIFEST.in', 33 | 'build/', 34 | 'dist/', 35 | 'nvidia_pyprof.egg-info/') 36 | 37 | COPYRIGHT_YEAR_RE0 = 'Copyright \\(c\\) (20[0-9][0-9]),' 38 | COPYRIGHT_YEAR_RE1 = 'Copyright \\(c\\) (20[0-9][0-9])-(20[0-9][0-9]),' 39 | 40 | COPYRIGHT =''' 41 | 42 | Licensed under the Apache License, Version 2.0 (the "License"); 43 | you may not use this file except in compliance with the License. 44 | You may obtain a copy of the License at 45 | 46 | http://www.apache.org/licenses/LICENSE-2.0 47 | 48 | Unless required by applicable law or agreed to in writing, software 49 | distributed under the License is distributed on an "AS IS" BASIS, 50 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 51 | See the License for the specific language governing permissions and 52 | limitations under the License. 53 | ''' 54 | 55 | single_re = re.compile(COPYRIGHT_YEAR_RE0) 56 | range_re = re.compile(COPYRIGHT_YEAR_RE1) 57 | 58 | def visit(path): 59 | if FLAGS.verbose: 60 | print("visiting " + path) 61 | 62 | for skip in SKIP_EXTS: 63 | if path.endswith('.' + skip): 64 | if FLAGS.verbose: 65 | print("skipping due to extension: " + path) 66 | return True 67 | 68 | for skip in SKIP_PATHS: 69 | if path.startswith(skip): 70 | if FLAGS.verbose: 71 | print("skipping due to path prefix: " + path) 72 | return True 73 | 74 | with open(path, 'r') as f: 75 | first_line = True 76 | second_line = True 77 | line = None 78 | try: 79 | for fline in f: 80 | line = fline 81 | 82 | # Skip any '#!', '..', '