├── .dockerignore
├── .gitignore
├── CONTRIBUTING.md
├── Dockerfile
├── LICENSE
├── MANIFEST.in
├── README.rst
├── VERSION
├── docs
    ├── Makefile
    ├── _templates
    │   └── layout.html
    ├── advanced.rst
    ├── conf.py
    ├── examples.rst
    ├── faqs.rst
    ├── index.rst
    ├── install.rst
    ├── profile.rst
    └── quickstart.rst
├── pyprof
    ├── __init__.py
    ├── examples
    │   ├── .gitignore
    │   ├── custom_func_module
    │   │   ├── README.md
    │   │   ├── custom_function.py
    │   │   ├── custom_module.py
    │   │   └── test.sh
    │   ├── imagenet
    │   │   ├── imagenet.py
    │   │   └── test.sh
    │   ├── jit
    │   │   ├── README.md
    │   │   ├── jit_script_function.py
    │   │   ├── jit_script_method.py
    │   │   ├── jit_trace_function.py
    │   │   ├── jit_trace_method.py
    │   │   └── test.sh
    │   ├── lenet.py
    │   ├── operators.py
    │   ├── simple.py
    │   └── user_annotation
    │   │   ├── README.md
    │   │   ├── resnet.py
    │   │   └── test.sh
    ├── nvtx
    │   ├── __init__.py
    │   └── nvmarker.py
    ├── parse
    │   ├── __init__.py
    │   ├── __main__.py
    │   ├── db.py
    │   ├── kernel.py
    │   ├── nsight.py
    │   ├── nvvp.py
    │   └── parse.py
    └── prof
    │   ├── __init__.py
    │   ├── __main__.py
    │   ├── activation.py
    │   ├── base.py
    │   ├── blas.py
    │   ├── conv.py
    │   ├── convert.py
    │   ├── data.py
    │   ├── dropout.py
    │   ├── dtype.py
    │   ├── embedding.py
    │   ├── index_slice_join_mutate.py
    │   ├── linear.py
    │   ├── loss.py
    │   ├── memory.py
    │   ├── misc.py
    │   ├── normalization.py
    │   ├── optim.py
    │   ├── output.py
    │   ├── pointwise.py
    │   ├── pooling.py
    │   ├── prof.py
    │   ├── randomSample.py
    │   ├── recurrentCell.py
    │   ├── reduction.py
    │   ├── softmax.py
    │   ├── tc.py
    │   ├── tensor.py
    │   ├── usage.py
    │   └── utility.py
├── qa
    ├── L0_docs
    │   └── test.sh
    ├── L0_lenet
    │   ├── test.sh
    │   └── test_lenet.py
    ├── L0_nvtx
    │   ├── __init__.py
    │   ├── test.sh
    │   └── test_pyprof_nvtx.py
    ├── L0_pyprof_data
    │   ├── __init__.py
    │   ├── test.sh
    │   └── test_pyprof_data.py
    └── common
    │   ├── check_copyright.py
    │   └── run_test.py
├── requirements
    ├── requirements.txt
    └── requirements_nsys.txt
├── setup.cfg
└── setup.py


/.dockerignore:
--------------------------------------------------------------------------------
1 | .git*


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | __pycache__
 2 | *.sql
 3 | *.sqlite
 4 | *.qdrep
 5 | *.dict
 6 | *.csv
 7 | *.log
 8 | *.pyc
 9 | 
10 | build/
11 | dist/
12 | nvidia_pyprof.egg-info/
13 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
  1 | <!--
  2 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
  3 | #
  4 | # Licensed under the Apache License, Version 2.0 (the "License");
  5 | # you may not use this file except in compliance with the License.
  6 | # You may obtain a copy of the License at
  7 | #
  8 | #     http://www.apache.org/licenses/LICENSE-2.0
  9 | # 
 10 | # Unless required by applicable law or agreed to in writing, software
 11 | # distributed under the License is distributed on an "AS IS" BASIS,
 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 | # See the License for the specific language governing permissions and
 14 | # limitations under the License.
 15 | -->
 16 | 
 17 | # Contribution Rules
 18 | 
 19 | - The code style convention is enforced by clang-format. See the
 20 |   Developer Guide for instructions on how to ensure your contributions
 21 |   conform. In general please follow the existing conventions in the
 22 |   relevant file, submodule, module, and project when you add new code
 23 |   or when you extend/fix existing functionality.
 24 | 
 25 | - Avoid introducing unnecessary complexity into existing code so that
 26 |   maintainability and readability are preserved.
 27 | 
 28 | - Try to keep pull requests (PRs) as concise as possible:
 29 | 
 30 |   - Avoid committing commented-out code.
 31 | 
 32 |   - Wherever possible, each PR should address a single concern. If
 33 |     there are several otherwise-unrelated things that should be fixed
 34 |     to reach a desired endpoint, it is perfectly fine to open several
 35 |     PRs and state in the description which PR depends on another
 36 |     PR. The more complex the changes are in a single PR, the more time
 37 |     it will take to review those changes.
 38 | 
 39 |   - Make sure that the build log is clean, meaning no warnings or
 40 |     errors should be present.
 41 | 
 42 | - Make sure all `L0_*` tests pass:
 43 | 
 44 |   - In the `qa/` directory, there are basic sanity tests scripted in
 45 |     directories named `L0_...`.  See the Testing section in the
 46 |     Developer Guide for instructions on running these tests.
 47 | 
 48 | - PyProf's default build assumes recent versions of
 49 |   dependencies (CUDA, PyTorch, Nsight Systems, etc.). Contributions 
 50 |   that add compatibility with older versions of those dependencies 
 51 |   will be considered, but NVIDIA cannot guarantee that all possible 
 52 |   build configurations work, are not broken by future contributions, 
 53 |   and retain highest performance.
 54 | 
 55 | - Make sure that you can contribute your work to open source (no
 56 |   license and/or patent conflict is introduced by your code). You need
 57 |   to [`sign`](#Sign) your commit.
 58 | 
 59 | - Thanks in advance for your patience as we review your contributions;
 60 |   we do appreciate them!
 61 | 
 62 | <a name="Sign"></a>Sign Your Work
 63 | --------------
 64 | 
 65 | We require that all contributors "sign-off" on their commits. This
 66 | certifies that the contribution is your original work, or you have
 67 | rights to submit it under the same license, or a compatible license.
 68 | 
 69 | Any contribution which contains commits that are not Signed-Off will
 70 | not be accepted.
 71 | 
 72 | To sign off on a commit you simply use the `--signoff` (or `-s`)
 73 | option when committing your changes:
 74 | 
 75 |     $ git commit -s -m "Add cool feature."
 76 | 
 77 | This will append the following to your commit message:
 78 | 
 79 |     Signed-off-by: Your Name <your@email.com>
 80 | 
 81 | By doing this you certify the below:
 82 | 
 83 |     Developer Certificate of Origin
 84 |     Version 1.1
 85 | 
 86 |     Copyright (C) 2004, 2006 The Linux Foundation and its contributors.
 87 |     1 Letterman Drive
 88 |     Suite D4700
 89 |     San Francisco, CA, 94129
 90 | 
 91 |     Everyone is permitted to copy and distribute verbatim copies of
 92 |     this license document, but changing it is not allowed.
 93 | 
 94 | 
 95 |     Developer's Certificate of Origin 1.1
 96 | 
 97 |     By making a contribution to this project, I certify that:
 98 | 
 99 |     (a) The contribution was created in whole or in part by me and I
100 |     have the right to submit it under the open source license
101 |     indicated in the file; or
102 | 
103 |     (b) The contribution is based upon previous work that, to the best
104 |     of my knowledge, is covered under an appropriate open source
105 |     license and I have the right under that license to submit that
106 |     work with modifications, whether created in whole or in part by
107 |     me, under the same open source license (unless I am permitted to
108 |     submit under a different license), as indicated in the file; or
109 | 
110 |     (c) The contribution was provided directly to me by some other
111 |     person who certified (a), (b) or (c) and I have not modified it.
112 | 
113 |     (d) I understand and agree that this project and the contribution
114 |     are public and that a record of the contribution (including all
115 |     personal information I submit with it, including my sign-off) is
116 |     maintained indefinitely and may be redistributed consistent with
117 |     this project or the open source license(s) involved.


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | ARG BASE_IMAGE=nvcr.io/nvidia/pytorch:21.04-py3
16 | 
17 | ############################################################################
18 | ## Install PyProf
19 | ############################################################################
20 | FROM $BASE_IMAGE
21 | 
22 | ARG PYPROF_VERSION=3.11.0dev
23 | ARG PYPROF_CONTAINER_VERSION=21.06dev
24 | 
25 | # Copy entire repo into container even though some is not needed for the 
26 | # build itself... because we want to be able to copyright check on 
27 | # files that aren't directly needed for build.    
28 | WORKDIR /opt/pytorch/pyprof
29 | RUN rm -fr *
30 | COPY . .
31 | 
32 | RUN pip uninstall -y pyprof
33 | RUN pip install --no-cache-dir .
34 | 
35 | # Generating the docs requires the docs source so copy that into the L0_docs so 
36 | # that it is available when the test runs.
37 | RUN cp VERSION qa/L0_docs/. && \
38 |     cp README.rst qa/L0_docs/. && \
39 |     cp -r docs qa/L0_docs/.
40 |     
41 | 


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include *.py
2 | recursive-include pyprof *
3 | 


--------------------------------------------------------------------------------
/README.rst:
--------------------------------------------------------------------------------
  1 | ..
  2 |  # Copyright (c) 2020-2021, NVIDIA CORPORATION. All rights reserved.
  3 |  #
  4 |  # Licensed under the Apache License, Version 2.0 (the "License");
  5 |  # you may not use this file except in compliance with the License.
  6 |  # You may obtain a copy of the License at
  7 |  #
  8 |  #     http://www.apache.org/licenses/LICENSE-2.0
  9 |  # 
 10 |  # Unless required by applicable law or agreed to in writing, software
 11 |  # distributed under the License is distributed on an "AS IS" BASIS,
 12 |  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 |  # See the License for the specific language governing permissions and
 14 |  # limitations under the License.
 15 | 
 16 | |License|
 17 | 
 18 | PyProf - PyTorch Profiling tool
 19 | ===============================
 20 | 
 21 |     **ANNOUNCEMENT:** 
 22 |     On June 30th 2021, NVIDIA will no longer make contributions to the PyProf repository.
 23 |     
 24 |     To profile models in PyTorch, please use `NVIDIA Deep Learning Profiler (DLProf) <https://docs.nvidia.com/deeplearning/frameworks/dlprof-user-guide/>`_
 25 |     
 26 |     DLProf can help data scientists, engineers, and researchers understand and improve performance of their models by analyzing text reports or visualizing the reports in a web browser with the DLProf Viewer
 27 |     
 28 |     DLProf is available on NGC or as a python PIP wheel installation.
 29 |     
 30 |     To look for continued development on PyProf, please use https://github.com/adityaiitb/PyProf
 31 | 
 32 | .. overview-begin-marker-do-not-remove
 33 | 
 34 | PyProf is a tool that profiles and analyzes the GPU performance of PyTorch
 35 | models. PyProf aggregates kernel performance from `Nsight Systems
 36 | <https://developer.nvidia.com/nsight-systems>`_ or `NvProf
 37 | <https://developer.nvidia.com/nvidia-visual-profiler>`_ and provides the 
 38 | following additional features:
 39 | 
 40 | * Identifies the layer that launched a kernel: e.g. the association of 
 41 |   `ComputeOffsetsKernel` with a concrete PyTorch layer or API is not obvious.
 42 | 
 43 | * Identifies the tensor dimensions and precision: without knowing the tensor 
 44 |   dimensions and precision, it's impossible to reason about whether the actual 
 45 |   (silicon) kernel time is close to maximum performance of such a kernel on 
 46 |   the GPU. Knowing the tensor dimensions and precision, we can figure out the 
 47 |   FLOPs and bandwidth required by a layer, and then determine how close to 
 48 |   maximum performance the kernel is for that operation.
 49 | 
 50 | * Forward-backward correlation: PyProf determines what the forward pass step 
 51 |   is that resulted in the particular weight and data gradients (wgrad, dgrad), 
 52 |   which makes it possible to determine the tensor dimensions required by these
 53 |   backprop steps to assess their performance.
 54 |  
 55 | * Determines Tensor Core usage: PyProf can highlight the kernels that use 
 56 |   `Tensor Cores <https://developer.nvidia.com/tensor-cores>`_.
 57 |  
 58 | * Correlate the line in the user's code that launched a particular kernel (program trace).
 59 | 
 60 | .. overview-end-marker-do-not-remove
 61 | 
 62 | The current release of PyProf is 3.10.0 and is available in the 21.04 release of
 63 | the PyTorch container on `NVIDIA GPU Cloud (NGC) <https://ngc.nvidia.com>`_. The 
 64 | branch for this release is `r21.04
 65 | <https://github.com/NVIDIA/PyProf/tree/r21.04>`_.
 66 | 
 67 | Quick Installation Instructions
 68 | -------------------------------
 69 | 
 70 | .. quick-install-start-marker-do-not-remove
 71 | 
 72 | * Clone the git repository ::
 73 |     
 74 |     $ git clone https://github.com/NVIDIA/PyProf.git
 75 | 
 76 | * Navigate to the top level PyProf directory
 77 | 
 78 | * Install PyProf ::
 79 | 
 80 |    $ pip install .
 81 | 
 82 | * Verify installation is complete with pip list ::
 83 | 
 84 |    $ pip list | grep pyprof 
 85 | 
 86 | * Should display ::
 87 | 
 88 |    pyprof            3.11.0.dev0
 89 | 
 90 | .. quick-install-end-marker-do-not-remove
 91 | 
 92 | Quick Start Instructions
 93 | ------------------------
 94 | 
 95 | .. quick-start-start-marker-do-not-remove
 96 | 
 97 | * Add the following lines to the PyTorch network you want to profile: ::
 98 | 
 99 |     import torch.cuda.profiler as profiler
100 |     import pyprof
101 |     pyprof.init()
102 | 
103 | * Profile with NVProf or Nsight Systems to generate a SQL file. ::
104 | 
105 |     $ nsys profile -f true -o net --export sqlite python net.py
106 | 
107 | * Run the parse.py script to generate the dictionary. ::
108 |   
109 |     $ python -m pyprof.parse net.sqlite > net.dict
110 | 
111 | * Run the prof.py script to generate the reports. ::
112 | 
113 |     $ python -m pyprof.prof --csv net.dict
114 | 
115 | .. quick-start-end-marker-do-not-remove
116 | 
117 | Documentation
118 | -------------
119 | 
120 | The User Guide can be found in the 
121 | `documentation for current release 
122 | <https://docs.nvidia.com/deeplearning/frameworks/pyprof-user-guide/index.html>`_, and 
123 | provides instructions on how to install and profile with PyProf.
124 | 
125 | A complete `Quick Start Guide <https://docs.nvidia.com/deeplearning/frameworks/pyprof-user-guide/quickstart.html>`_ 
126 | provides step-by-step instructions to get you quickly started using PyProf.
127 | 
128 | An `FAQ <https://docs.nvidia.com/deeplearning/frameworks/pyprof-user-guide/faqs.html>`_ provides
129 | answers for frequently asked questions.
130 | 
131 | The `Release Notes 
132 | <https://docs.nvidia.com/deeplearning/frameworks/pyprof-release-notes/index.html>`_
133 | indicate the required versions of the NVIDIA Driver and CUDA, and also describe 
134 | which GPUs are supported by PyProf
135 | 
136 | Presentation and Papers
137 | ^^^^^^^^^^^^^^^^^^^^^^^
138 | 
139 | * `Automating End-toEnd PyTorch Profiling <https://developer.nvidia.com/gtc/2020/video/s21143>`_.
140 |    * `Presentation slides <https://developer.download.nvidia.com/video/gputechconf/gtc/2020/presentations/s21143-automating-end-to-end-pytorch-profiling.pdf>`_.
141 | 
142 | Contributing
143 | ------------
144 | 
145 | Contributions to PyProf are more than welcome. To
146 | contribute make a pull request and follow the guidelines outlined in
147 | the `Contributing <CONTRIBUTING.md>`_ document.
148 | 
149 | Reporting problems, asking questions
150 | ------------------------------------
151 | 
152 | We appreciate any feedback, questions or bug reporting regarding this
153 | project. When help with code is needed, follow the process outlined in
154 | the Stack Overflow (https://stackoverflow.com/help/mcve)
155 | document. Ensure posted examples are:
156 | 
157 | * minimal – use as little code as possible that still produces the
158 |   same problem
159 | 
160 | * complete – provide all parts needed to reproduce the problem. Check
161 |   if you can strip external dependency and still show the problem. The
162 |   less time we spend on reproducing problems the more time we have to
163 |   fix it
164 | 
165 | * verifiable – test the code you're about to provide to make sure it
166 |   reproduces the problem. Remove all other problems that are not
167 |   related to your request/question.
168 | 
169 | .. |License| image:: https://img.shields.io/badge/License-Apache2-green.svg
170 |    :target: http://www.apache.org/licenses/LICENSE-2.0
171 | 


--------------------------------------------------------------------------------
/VERSION:
--------------------------------------------------------------------------------
1 | 3.11.0dev
2 | 


--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | # Makefile for Sphinx documentation
16 | #
17 | 
18 | # You can set these variables from the command line.
19 | SPHINXOPTS    =
20 | SPHINXBUILD   = sphinx-build
21 | SPHINXPROJ    = PyProf
22 | SOURCEDIR     = .
23 | BUILDDIR      = build
24 | 
25 | # Put it first so that "make" without argument is like "make help".
26 | help:
27 | 	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(O)
28 | 
29 | clean:
30 | 	@rm -fr $(BUILDDIR) 
31 | 
32 | # Catch-all target: route all unknown targets to Sphinx using the new
33 | # "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
34 | %: Makefile
35 | 	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
36 | 
37 | .PHONY: help clean Makefile
38 | 


--------------------------------------------------------------------------------
/docs/_templates/layout.html:
--------------------------------------------------------------------------------
 1 | <!--
 2 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | -->
16 | {% extends "!layout.html" %}
17 |   {% block sidebartitle %} {{ super() }}
18 | 
19 |   <style>
20 |     /* Sidebar header (and topbar for mobile) */
21 |     .wy-side-nav-search, .wy-nav-top {
22 |       background: #76b900;
23 |     }
24 | 
25 |     .wy-side-nav-search a:link, .wy-nav-top a:link {
26 |       color: #fff;
27 |     }
28 |     .wy-side-nav-search a:visited, .wy-nav-top a:visited {
29 |       color: #fff;
30 |     }
31 |     .wy-side-nav-search a:hover, .wy-nav-top a:hover {
32 |       color: #fff;
33 |     }
34 | 
35 |     .wy-menu-vertical a:link, .wy-menu-vertical a:visited {
36 |       color: #d9d9d9
37 |     }
38 | 
39 |     .wy-menu-vertical a:active {
40 |       background-color: #76b900
41 |     }
42 | 
43 |     .wy-side-nav-search>div.version {
44 |       color: rgba(0, 0, 0, 0.3)
45 |     }
46 |   </style>
47 |   {% endblock %}
48 | 
49 |   {% block footer %} {{ super() }}
50 | 
51 |   <style>
52 |   a:link, a:visited {
53 |     color: #76b900;
54 |   }
55 | 
56 |   a:hover {
57 |     color: #8c0;
58 |   }
59 | 
60 |   .rst-content dl:not(.docutils) dt {
61 |     background: rgba(118, 185, 0, 0.1);
62 |     color: rgba(59,93,0,1);
63 |     border-top: solid 3px rgba(59,93,0,1);
64 |   }
65 |   </style>
66 |   {% endblock %}
67 | 


--------------------------------------------------------------------------------
/docs/advanced.rst:
--------------------------------------------------------------------------------
  1 | ..
  2 |  # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
  3 |  #
  4 |  # Licensed under the Apache License, Version 2.0 (the "License");
  5 |  # you may not use this file except in compliance with the License.
  6 |  # You may obtain a copy of the License at
  7 |  #
  8 |  #     http://www.apache.org/licenses/LICENSE-2.0
  9 |  # 
 10 |  # Unless required by applicable law or agreed to in writing, software
 11 |  # distributed under the License is distributed on an "AS IS" BASIS,
 12 |  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 |  # See the License for the specific language governing permissions and
 14 |  # limitations under the License.
 15 | 
 16 | Advanced PyProf Usage 
 17 | =====================
 18 | 
 19 | This section demonstrates some advanced techniques to get even more from your
 20 | PyProf profiles.
 21 | 
 22 | .. _section-layer-annotation:
 23 | 
 24 | Layer Annotation
 25 | ----------------
 26 | 
 27 | Adding custom NVTX ranges to the model layers will allow PyProf to aggregate
 28 | profile results based on the ranges. ::
 29 | 
 30 |   # examples/user_annotation/resnet.py
 31 |   # Use the “layer:” prefix
 32 |   
 33 |   class Bottleneck(nn.Module):
 34 |     def forward(self, x):
 35 |       nvtx.range_push("layer:Bottleneck_{}".format(self.id))  # NVTX push marker
 36 |       
 37 |       nvtx.range_push("layer:Conv1")                          # Nested NVTX push/pop markers
 38 |       out = self.conv1(x)
 39 |       nvtx.range_pop()
 40 |       
 41 |       nvtx.range_push("layer:BN1")                            # Use the “layer:” prefix
 42 |       out = self.bn1(out)
 43 |       nvtx.range_pop()
 44 |       
 45 |       nvtx.range_push("layer:ReLU")
 46 |       out = self.relu(out)
 47 |       nvtx.range_pop()
 48 |       
 49 |       ...
 50 |       
 51 |       nvtx.range_pop()                                        # NVTX pop marker.return out
 52 | 
 53 | .. _section-custom-function:
 54 | 
 55 | Custom Function
 56 | ---------------
 57 | 
 58 | The following is example of how to enable Torch Autograd to profile a custom
 59 | function. ::
 60 | 
 61 |   # examples/custom_func_module/custom_function.py
 62 |   
 63 |   import torch
 64 |   import pyprof
 65 |   pyprof.init()
 66 |   
 67 |   class Foo(torch.autograd.Function):
 68 |     @staticmethoddef forward(ctx, in1, in2):
 69 |       out = in1 + in2                    # This could be a custom C++ function
 70 |       return out
 71 |     @staticmethod
 72 |     def backward(ctx, grad):
 73 |       in1_grad, in2_grad = grad, grad    # This could be a custom C++ function
 74 |       return in1_grad, in2_grad
 75 |   
 76 |   # Hook the forward and backward functions to pyprof
 77 |   pyprof.wrap(Foo, 'forward')
 78 |   pyprof.wrap(Foo, 'backward')
 79 | 
 80 | .. _section-custom-module:
 81 | 
 82 | Custom Module
 83 | ---------------
 84 | 
 85 | The following is example of how to enable Torch Autograd to profile a custom
 86 | module. ::
 87 | 
 88 |   # examples/custom_func_module/custom_module.py
 89 |   
 90 |   import torch
 91 |   import pyprof
 92 |   pyprof.init()
 93 |   
 94 |   class Foo(torch.nn.Module):
 95 |     def __init__(self, size):
 96 |       super(Foo, self).__init__()
 97 |       self.n = torch.nn.Parameter(torch.ones(size))
 98 |       self.m = torch.nn.Parameter(torch.ones(size))
 99 |       
100 |     def forward(self, input):
101 |       return self.n*input + self.m         # This could be a custom C++ function.
102 |   
103 |   # Hook the forward function to pyprof
104 |   pyprof.wrap(Foo, 'forward')
105 | 
106 | Extensibility
107 | -------------
108 | 
109 | * For custom functions and modules, users can add flops and bytes calculation
110 | 
111 | * Python code is easy to extend - no need to recompile, no need to change the 
112 |   PyTorch backend and resolve merge conflicts on every version upgrade
113 | 
114 | Actionable Items
115 | ----------------
116 | 
117 | The following list provides some common actionable items to consider when 
118 | analyzing profile results and deciding on how best to improve the performance. 
119 | For more customized and directed actionable items, consider using the `NVIDIA 
120 | Deep Learning Profiler <https://docs.nvidia.com/deeplearning/frameworks/dlprof-user-guide/index.html>`_ 
121 | that provide direct *Expert Systems* feedback based on the profile.
122 | 
123 | * NvProf/ NsightSystems tell us what the hotspots are, but not if we can act on 
124 |   them.
125 | 
126 | * If a kernel runs close to max perf based on FLOPs and bytes (and maximum FLOPs
127 |   and bandwidth of the GPU), then there’s no point in optimizing it even if it’s
128 |   a hotspot.
129 |   
130 | * If the ideal timing based on FLOPs and bytes (max(compute_time, 
131 |   bandwidth_time)) is much shorter than the silicon time, there’s scope for 
132 |   improvement.
133 |   
134 | * Tensor Core usage (conv): for Volta, convolutions should have the input 
135 |   channel count (C) and the output channel count (K) divisible by 8, in order to
136 |   use tensor cores. For Turing, it’s optimal for C and K to be divisible by 16. 
137 |   
138 | * Tensor core usage (GEMM): M, N and K divisible by 8 (Volta) or 16 (Turing) (https://docs.nvidia.com/deeplearning/sdk/dl-performance-guide/index.html)  
139 | 


--------------------------------------------------------------------------------
/docs/examples.rst:
--------------------------------------------------------------------------------
 1 | ..
 2 |  # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
 3 |  #
 4 |  # Licensed under the Apache License, Version 2.0 (the "License");
 5 |  # you may not use this file except in compliance with the License.
 6 |  # You may obtain a copy of the License at
 7 |  #
 8 |  #     http://www.apache.org/licenses/LICENSE-2.0
 9 |  # 
10 |  # Unless required by applicable law or agreed to in writing, software
11 |  # distributed under the License is distributed on an "AS IS" BASIS,
12 |  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  # See the License for the specific language governing permissions and
14 |  # limitations under the License.
15 | 
16 | .. _section-examples:
17 | 
18 | Examples
19 | ========
20 | 
21 | This section provides several real examples on how to profile with PyPRrof.
22 | 
23 | Profile Lenet
24 | -------------
25 | 
26 | Navigate to the lenet example. ::
27 | 
28 |   $ cd pyprof/examples
29 | 
30 | Run nsight systems to profile the network. ::
31 | 
32 |   $ nsys profile -f true -o lenet --export sqlite python lenet.py
33 | 
34 | Parse the resulting lenet.sqlite database. ::
35 | 
36 |   $ python -m pyprof.parse lenet.sqlite > lenet.dict
37 | 
38 | Run the prof script on the resulting dictionary. ::
39 | 
40 |   $ python -m pyprof.prof --csv lenet.dict > lenet.csv
41 | 


--------------------------------------------------------------------------------
/docs/faqs.rst:
--------------------------------------------------------------------------------
 1 | ..
 2 |  # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
 3 |  #
 4 |  # Licensed under the Apache License, Version 2.0 (the "License");
 5 |  # you may not use this file except in compliance with the License.
 6 |  # You may obtain a copy of the License at
 7 |  #
 8 |  #     http://www.apache.org/licenses/LICENSE-2.0
 9 |  # 
10 |  # Unless required by applicable law or agreed to in writing, software
11 |  # distributed under the License is distributed on an "AS IS" BASIS,
12 |  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  # See the License for the specific language governing permissions and
14 |  # limitations under the License.
15 | 
16 | .. _section-faqs:
17 | 
18 | PyProf FAQs
19 | ===========
20 | 
21 | **How do I intercept the Adam optimizer in APEX?** ::
22 | 
23 | 	import pyprof
24 | 	import fused_adam_cuda
25 | 	pyprof.nvtx.wrap(fused_adam_cuda, 'adam')
26 | 
27 | **What is the correct initialization if you are using JIT and/or AMP?**
28 | 
29 | #. Let any JIT to finish.
30 | #. Initlialize pyprof ``pyprof.init()``.
31 | #. Initialize AMP.
32 | 
33 | **How do I profile with ``torch.distributed.launch``?** ::
34 | 
35 | 	nvprof -f -o net%p.sql --profile-from-start off --profile-child-processes \
36 | 		python -m torch.distributed.launch net.py
37 |     


--------------------------------------------------------------------------------
/docs/index.rst:
--------------------------------------------------------------------------------
 1 | ..
 2 |  # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
 3 |  #
 4 |  # Licensed under the Apache License, Version 2.0 (the "License");
 5 |  # you may not use this file except in compliance with the License.
 6 |  # You may obtain a copy of the License at
 7 |  #
 8 |  #     http://www.apache.org/licenses/LICENSE-2.0
 9 |  # 
10 |  # Unless required by applicable law or agreed to in writing, software
11 |  # distributed under the License is distributed on an "AS IS" BASIS,
12 |  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  # See the License for the specific language governing permissions and
14 |  # limitations under the License.
15 | 
16 | NVIDIA PyProf - Pytorch Profiler
17 | ================================
18 | 
19 | .. include:: ../README.rst
20 |    :start-after: overview-begin-marker-do-not-remove
21 |    :end-before: overview-end-marker-do-not-remove
22 | 
23 | .. toctree::
24 |    :hidden:
25 | 
26 |    Documentation home <self>
27 | 
28 | .. toctree::
29 |    :maxdepth: 2
30 |    :caption: User Guide
31 | 
32 |    quickstart
33 |    install
34 |    profile
35 |    advanced
36 |    examples
37 |    faqs


--------------------------------------------------------------------------------
/docs/install.rst:
--------------------------------------------------------------------------------
 1 | ..
 2 |  # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
 3 |  #
 4 |  # Licensed under the Apache License, Version 2.0 (the "License");
 5 |  # you may not use this file except in compliance with the License.
 6 |  # You may obtain a copy of the License at
 7 |  #
 8 |  #     http://www.apache.org/licenses/LICENSE-2.0
 9 |  # 
10 |  # Unless required by applicable law or agreed to in writing, software
11 |  # distributed under the License is distributed on an "AS IS" BASIS,
12 |  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  # See the License for the specific language governing permissions and
14 |  # limitations under the License.
15 | 
16 | .. _section-install:
17 | 
18 | Installing PyProf
19 | =================
20 | 
21 | PyProf is available from GitHub.
22 | 
23 | .. _section-installing-from-github:
24 | 
25 | Installing from GitHub
26 | ----------------------
27 | 
28 | .. include:: ../README.rst
29 |    :start-after: quick-install-start-marker-do-not-remove
30 |    :end-before: quick-install-end-marker-do-not-remove
31 | 
32 | .. _section-installing-from-ngc:
33 | 
34 | Install from NGC Container
35 | --------------------------
36 | 
37 | PyProf is available in the PyTorch container on the `NVIDIA GPU Cloud (NGC) 
38 | <https://ngc.nvidia.com>`_.
39 | 
40 | Before you can pull a container from the NGC container registry, you
41 | must have Docker and nvidia-docker installed. For DGX users, this is
42 | explained in `Preparing to use NVIDIA Containers Getting Started Guide
43 | <http://docs.nvidia.com/deeplearning/dgx/preparing-containers/index.html>`_.
44 | For users other than DGX, follow the `nvidia-docker installation
45 | documentation <https://github.com/NVIDIA/nvidia-docker>`_ to install
46 | the most recent version of CUDA, Docker, and nvidia-docker.
47 | 
48 | After performing the above setup, you can pull the PyProf container
49 | using the following command::
50 | 
51 |   docker pull nvcr.io/nvidia/pytorch:20.12-py3
52 | 
53 | Replace *20.12* with the version of PyTorch container that you want to pull.
54 | 


--------------------------------------------------------------------------------
/docs/quickstart.rst:
--------------------------------------------------------------------------------
 1 | ..
 2 |  # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
 3 |  #
 4 |  # Licensed under the Apache License, Version 2.0 (the "License");
 5 |  # you may not use this file except in compliance with the License.
 6 |  # You may obtain a copy of the License at
 7 |  #
 8 |  #     http://www.apache.org/licenses/LICENSE-2.0
 9 |  # 
10 |  # Unless required by applicable law or agreed to in writing, software
11 |  # distributed under the License is distributed on an "AS IS" BASIS,
12 |  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  # See the License for the specific language governing permissions and
14 |  # limitations under the License.
15 | 
16 | .. _section-quickstart:
17 | 
18 | Quickstart
19 | ==========
20 | 
21 | PyProf is available in the following ways:
22 | 
23 | * As :ref:`installable python code located in GitHub <section-installing-from-github>`.
24 | 
25 | * As a pre-built Docker container available from the `NVIDIA GPU Cloud (NGC) 
26 |   <https://ngc.nvidia.com>`_. For more information, see :ref:`section-installing-from-ngc`.
27 | 
28 | * As a buildable docker container. You can :ref:`build your
29 |   own container using Docker <section-quickstart-building-with-docker>`
30 | 
31 | .. _section-quickstart-prerequisites:
32 | 
33 | Prerequisites
34 | -------------
35 | 
36 | * If you are installing directly from GitHub or building your own docker 
37 |   container, you will need to clone the PyProf GitHub repo. Go to 
38 |   https://github.com/NVIDIA/PyProf and then select the *clone* or *download* 
39 |   drop down button. After cloning the repo be sure to select the r<xx.yy> 
40 |   release branch that corresponds to the version of PyProf want to use::
41 | 
42 |   $ git checkout r20.12
43 | 
44 | * If you are starting with a pre-built NGC container, you will need to install 
45 |   Docker and nvidia-docker. For DGX users, see `Preparing to use NVIDIA Containers
46 |   <http://docs.nvidia.com/deeplearning/dgx/preparing-containers/index.html>`_.
47 |   For users other than DGX, see the `nvidia-docker installation documentation 
48 |   <https://github.com/NVIDIA/nvidia-docker>`_.
49 | 
50 | .. _section-quickstart-using-a-prebuilt-docker-container:
51 | 
52 | Using a Prebuilt Docker Containers
53 | ----------------------------------
54 | 
55 | Use docker pull to get the PyTorch container from NGC::
56 | 
57 |   $ docker pull nvcr.io/nvidia/pytorch:<xx.yy>-py3
58 | 
59 | Where <xx.yy> is the version of PyProf that you want to pull. Once you have the 
60 | container, you can run the container with the following command::
61 | 
62 |   $ docker run --gpus=1 --rm --shm-size=1g --ulimit memlock=-1 --ulimit stack=67108864 -v/full/path/to/example/model/repository:/models <docker image>
63 | 
64 | Where <docker image> is *nvcr.io/nvidia/pytorch:<xx.yy>-py3*.
65 | 
66 | .. _section-quickstart-building-with-docker:
67 | 
68 | Building With Docker
69 | --------------------
70 | 
71 | Make sure you complete the step in 
72 | :ref:`section-quickstart-prerequisites` before attempting to build the PyProf 
73 | container. To build PyProf from source, change to the root directory of
74 | the GitHub repo and checkout the release version of the branch that
75 | you want to build (or the `main` branch if you want to build the
76 | under-development version)::
77 | 
78 |   $ git checkout r20.12
79 | 
80 | Then use docker to build::
81 | 
82 |   $ docker build --pull -t pyprof .
83 | 
84 | After the build completes you can run the container with the following command::
85 | 
86 |   $ docker run --gpus=1 --rm --shm-size=1g --ulimit memlock=-1 --ulimit stack=67108864 -v/full/path/to/example/model/repository:/models <docker image>
87 | 
88 | Where <docker image> is *pyprof*.
89 | 
90 | .. _section-quickstart-profile-with-pyprof:
91 | 
92 | Profile with PyProf
93 | -------------------
94 | 
95 | .. include:: ../README.rst
96 |    :start-after: quick-start-start-marker-do-not-remove
97 |    :end-before: quick-start-end-marker-do-not-remove
98 | 


--------------------------------------------------------------------------------
/pyprof/__init__.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding: utf-8 -*-
 3 | 
 4 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
 5 | #
 6 | # Licensed under the Apache License, Version 2.0 (the "License");
 7 | # you may not use this file except in compliance with the License.
 8 | # You may obtain a copy of the License at
 9 | #
10 | #     http://www.apache.org/licenses/LICENSE-2.0
11 | #
12 | # Unless required by applicable law or agreed to in writing, software
13 | # distributed under the License is distributed on an "AS IS" BASIS,
14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | # See the License for the specific language governing permissions and
16 | # limitations under the License.
17 | 
18 | import warnings
19 | 
20 | from .nvtx.nvmarker import init
21 | from .nvtx.nvmarker import add_wrapper as wrap
22 | 


--------------------------------------------------------------------------------
/pyprof/examples/.gitignore:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | __pycache__
16 | *.sql
17 | *.dict
18 | *.csv
19 | 


--------------------------------------------------------------------------------
/pyprof/examples/custom_func_module/README.md:
--------------------------------------------------------------------------------
 1 | <!-- 
 2 | Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
 3 | 
 4 | Licensed under the Apache License, Version 2.0 (the "License");
 5 | you may not use this file except in compliance with the License.
 6 | You may obtain a copy of the License at
 7 | 
 8 |     http://www.apache.org/licenses/LICENSE-2.0
 9 | 
10 | Unless required by applicable law or agreed to in writing, software
11 | distributed under the License is distributed on an "AS IS" BASIS,
12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | See the License for the specific language governing permissions and
14 | limitations under the License. 
15 | -->
16 | 
17 | This directory has examples which show how to intercept (monkey patch) custom
18 | functions and modules with `pyprof`. No changes are required in `pyprof/parse`,
19 | however, users can add support for bytes and flops calculation for custom
20 | functions and modules in `pyprof/prof` by extending the `OperatorLayerBase` 
21 | class.
22 | 


--------------------------------------------------------------------------------
/pyprof/examples/custom_func_module/custom_function.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding: utf-8 -*-
 3 | 
 4 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
 5 | #
 6 | # Licensed under the Apache License, Version 2.0 (the "License");
 7 | # you may not use this file except in compliance with the License.
 8 | # You may obtain a copy of the License at
 9 | #
10 | #     http://www.apache.org/licenses/LICENSE-2.0
11 | #
12 | # Unless required by applicable law or agreed to in writing, software
13 | # distributed under the License is distributed on an "AS IS" BASIS,
14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | # See the License for the specific language governing permissions and
16 | # limitations under the License.
17 | 
18 | import torch
19 | import torch.cuda.profiler as profiler
20 | import pyprof
21 | #Initialize pyprof
22 | pyprof.init()
23 | 
24 | 
25 | class Foo(torch.autograd.Function):
26 | 
27 |     @staticmethod
28 |     def forward(ctx, in1, in2):
29 |         out = in1 + in2  #This could be a custom C/C++ function.
30 |         return out
31 | 
32 |     @staticmethod
33 |     def backward(ctx, grad):
34 |         in1_grad = grad  #This could be a custom C/C++ function.
35 |         in2_grad = grad  #This could be a custom C/C++ function.
36 |         return in1_grad, in2_grad
37 | 
38 | 
39 | #Hook the forward and backward functions to pyprof
40 | pyprof.nvtx.wrap(Foo, 'forward')
41 | pyprof.nvtx.wrap(Foo, 'backward')
42 | 
43 | foo = Foo.apply
44 | 
45 | x = torch.ones(4, 4).cuda()
46 | y = torch.ones(4, 4).cuda()
47 | 
48 | with torch.autograd.profiler.emit_nvtx():
49 |     profiler.start()
50 |     z = foo(x, y)
51 |     profiler.stop()
52 | 


--------------------------------------------------------------------------------
/pyprof/examples/custom_func_module/custom_module.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding: utf-8 -*-
 3 | 
 4 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
 5 | #
 6 | # Licensed under the Apache License, Version 2.0 (the "License");
 7 | # you may not use this file except in compliance with the License.
 8 | # You may obtain a copy of the License at
 9 | #
10 | #     http://www.apache.org/licenses/LICENSE-2.0
11 | #
12 | # Unless required by applicable law or agreed to in writing, software
13 | # distributed under the License is distributed on an "AS IS" BASIS,
14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | # See the License for the specific language governing permissions and
16 | # limitations under the License.
17 | 
18 | import torch
19 | import torch.cuda.profiler as profiler
20 | import pyprof
21 | pyprof.init()
22 | 
23 | 
24 | class Foo(torch.nn.Module):
25 | 
26 |     def __init__(self, size):
27 |         super(Foo, self).__init__()
28 |         self.n = torch.nn.Parameter(torch.ones(size))
29 |         self.m = torch.nn.Parameter(torch.ones(size))
30 | 
31 |     def forward(self, input):
32 |         return self.n * input + self.m
33 | 
34 | 
35 | # Hook the forward function to pyprof
36 | pyprof.nvtx.wrap(Foo, 'forward')
37 | 
38 | foo = Foo(4)
39 | foo.cuda()
40 | x = torch.ones(4).cuda()
41 | 
42 | with torch.autograd.profiler.emit_nvtx():
43 |     profiler.start()
44 |     z = foo(x)
45 |     profiler.stop()
46 | 


--------------------------------------------------------------------------------
/pyprof/examples/custom_func_module/test.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | set -e
17 | 
18 | SCRIPT=`realpath $0`
19 | SCRIPTPATH=`dirname $SCRIPT`
20 | PYPROF="$SCRIPTPATH/../.."
21 | 
22 | parse="python $PYPROF/parse/parse.py"
23 | prof="python $PYPROF/prof/prof.py"
24 | 
25 | for f in *.py
26 | do
27 | 	base=`basename $f .py`
28 | 	sql=$base.sql
29 | 	dict=$base.dict
30 | 
31 | 	#NVprof
32 | 	echo "nvprof -fo $sql python $f"
33 | 	nvprof -fo $sql python $f
34 | 
35 | 	#Parse
36 | 	echo $parse $sql
37 | 	$parse $sql > $dict
38 | 
39 | 	#Prof
40 | 	echo $prof $dict
41 | 	$prof -w 130 $dict
42 | 	\rm $sql $dict
43 | done
44 | 


--------------------------------------------------------------------------------
/pyprof/examples/imagenet/imagenet.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | 
  4 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
  5 | #
  6 | # Licensed under the Apache License, Version 2.0 (the "License");
  7 | # you may not use this file except in compliance with the License.
  8 | # You may obtain a copy of the License at
  9 | #
 10 | #     http://www.apache.org/licenses/LICENSE-2.0
 11 | #
 12 | # Unless required by applicable law or agreed to in writing, software
 13 | # distributed under the License is distributed on an "AS IS" BASIS,
 14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 15 | # See the License for the specific language governing permissions and
 16 | # limitations under the License.
 17 | """
 18 | Example to run pyprof with imagenet models.
 19 | """
 20 | 
 21 | import sys
 22 | import torch
 23 | import torch.nn as nn
 24 | import torchvision.models as models
 25 | import torch.cuda.profiler as profiler
 26 | import argparse
 27 | 
 28 | import pyprof
 29 | from apex.optimizers import FusedAdam
 30 | 
 31 | 
 32 | def parseArgs():
 33 |     parser = argparse.ArgumentParser(prog=sys.argv[0], description="Run popular imagenet models.")
 34 | 
 35 |     parser.add_argument(
 36 |         "-m", type=str, default="resnet50", choices=[
 37 |             "alexnet", "densenet121", "densenet161", "densenet169", "densenet201", "googlenet", "mnasnet0_5",
 38 |             "mnasnet0_75", "mnasnet1_0", "mnasnet1_3", "mobilenet_v2", "resnet18", "resnet34", "resnet50", "resnet101",
 39 |             "resnet152", "resnext50_32x4d", "resnext101_32x8d", "wide_resnet50_2", "wide_resnet101_2",
 40 |             "shufflenet_v2_x0_5", "shufflenet_v2_x1_0", "shufflenet_v2_x1_5", "shufflenet_v2_x2_0", "squeezenet1_0",
 41 |             "squeezenet1_1", "vgg11", "vgg11_bn", "vgg13", "vgg13_bn", "vgg16", "vgg16_bn", "vgg19", "vgg19_bn",
 42 |             "inception_v3"
 43 |         ], help="Model."
 44 |     )
 45 | 
 46 |     parser.add_argument("-b", type=int, default=32, help="Batch size.")
 47 | 
 48 |     parser.add_argument("-o", type=str, default="adam", choices=["adam", "sgd"], help="Optimizer.")
 49 | 
 50 |     args = parser.parse_args()
 51 |     return args
 52 | 
 53 | 
 54 | d = {
 55 |     "alexnet": {
 56 |         'H': 224,
 57 |         'W': 224,
 58 |         'opts': {}
 59 |     },
 60 |     "densenet121": {
 61 |         'H': 224,
 62 |         'W': 224,
 63 |         'opts': {}
 64 |     },
 65 |     "densenet161": {
 66 |         'H': 224,
 67 |         'W': 224,
 68 |         'opts': {}
 69 |     },
 70 |     "densenet169": {
 71 |         'H': 224,
 72 |         'W': 224,
 73 |         'opts': {}
 74 |     },
 75 |     "densenet201": {
 76 |         'H': 224,
 77 |         'W': 224,
 78 |         'opts': {}
 79 |     },
 80 |     "googlenet": {
 81 |         'H': 224,
 82 |         'W': 224,
 83 |         'opts': {
 84 |             'aux_logits': False
 85 |         }
 86 |     },
 87 |     "mnasnet0_5": {
 88 |         'H': 224,
 89 |         'W': 224,
 90 |         'opts': {}
 91 |     },
 92 |     "mnasnet0_75": {
 93 |         'H': 224,
 94 |         'W': 224,
 95 |         'opts': {}
 96 |     },
 97 |     "mnasnet1_0": {
 98 |         'H': 224,
 99 |         'W': 224,
100 |         'opts': {}
101 |     },
102 |     "mnasnet1_3": {
103 |         'H': 224,
104 |         'W': 224,
105 |         'opts': {}
106 |     },
107 |     "mobilenet_v2": {
108 |         'H': 224,
109 |         'W': 224,
110 |         'opts': {}
111 |     },
112 |     "resnet18": {
113 |         'H': 224,
114 |         'W': 224,
115 |         'opts': {}
116 |     },
117 |     "resnet34": {
118 |         'H': 224,
119 |         'W': 224,
120 |         'opts': {}
121 |     },
122 |     "resnet50": {
123 |         'H': 224,
124 |         'W': 224,
125 |         'opts': {}
126 |     },
127 |     "resnet101": {
128 |         'H': 224,
129 |         'W': 224,
130 |         'opts': {}
131 |     },
132 |     "resnet152": {
133 |         'H': 224,
134 |         'W': 224,
135 |         'opts': {}
136 |     },
137 |     "resnext50_32x4d": {
138 |         'H': 224,
139 |         'W': 224,
140 |         'opts': {}
141 |     },
142 |     "resnext101_32x8d": {
143 |         'H': 224,
144 |         'W': 224,
145 |         'opts': {}
146 |     },
147 |     "wide_resnet50_2": {
148 |         'H': 224,
149 |         'W': 224,
150 |         'opts': {}
151 |     },
152 |     "wide_resnet101_2": {
153 |         'H': 224,
154 |         'W': 224,
155 |         'opts': {}
156 |     },
157 |     "shufflenet_v2_x0_5": {
158 |         'H': 224,
159 |         'W': 224,
160 |         'opts': {}
161 |     },
162 |     "shufflenet_v2_x1_0": {
163 |         'H': 224,
164 |         'W': 224,
165 |         'opts': {}
166 |     },
167 |     "shufflenet_v2_x1_5": {
168 |         'H': 224,
169 |         'W': 224,
170 |         'opts': {}
171 |     },
172 |     "shufflenet_v2_x2_0": {
173 |         'H': 224,
174 |         'W': 224,
175 |         'opts': {}
176 |     },
177 |     "squeezenet1_0": {
178 |         'H': 224,
179 |         'W': 224,
180 |         'opts': {}
181 |     },
182 |     "squeezenet1_1": {
183 |         'H': 224,
184 |         'W': 224,
185 |         'opts': {}
186 |     },
187 |     "vgg11": {
188 |         'H': 224,
189 |         'W': 224,
190 |         'opts': {}
191 |     },
192 |     "vgg11_bn": {
193 |         'H': 224,
194 |         'W': 224,
195 |         'opts': {}
196 |     },
197 |     "vgg13": {
198 |         'H': 224,
199 |         'W': 224,
200 |         'opts': {}
201 |     },
202 |     "vgg13_bn": {
203 |         'H': 224,
204 |         'W': 224,
205 |         'opts': {}
206 |     },
207 |     "vgg16": {
208 |         'H': 224,
209 |         'W': 224,
210 |         'opts': {}
211 |     },
212 |     "vgg16_bn": {
213 |         'H': 224,
214 |         'W': 224,
215 |         'opts': {}
216 |     },
217 |     "vgg19": {
218 |         'H': 224,
219 |         'W': 224,
220 |         'opts': {}
221 |     },
222 |     "vgg19_bn": {
223 |         'H': 224,
224 |         'W': 224,
225 |         'opts': {}
226 |     },
227 |     "inception_v3": {
228 |         'H': 299,
229 |         'W': 299,
230 |         'opts': {
231 |             'aux_logits': False
232 |         }
233 |     },
234 | }
235 | 
236 | 
237 | def main():
238 |     args = parseArgs()
239 | 
240 |     pyprof.init()
241 | 
242 |     N = args.b
243 |     C = 3
244 |     H = d[args.m]['H']
245 |     W = d[args.m]['W']
246 |     opts = d[args.m]['opts']
247 |     classes = 1000
248 | 
249 |     net = getattr(models, args.m)
250 |     net = net(**opts).cuda().half()
251 |     net.train()
252 | 
253 |     x = torch.rand(N, C, H, W).cuda().half()
254 |     target = torch.empty(N, dtype=torch.long).random_(classes).cuda()
255 | 
256 |     criterion = nn.CrossEntropyLoss().cuda()
257 |     if (args.o == "sgd"):
258 |         optimizer = torch.optim.SGD(net.parameters(), lr=0.01, momentum=0.9)
259 |     elif (args.o == "adam"):
260 |         optimizer = FusedAdam(net.parameters())
261 |     else:
262 |         assert False
263 | 
264 |     #Warm up without profiler
265 |     for i in range(2):
266 |         output = net(x)
267 |         loss = criterion(output, target)
268 |         optimizer.zero_grad()
269 |         loss.backward()
270 |         optimizer.step()
271 | 
272 |     with torch.autograd.profiler.emit_nvtx():
273 |         profiler.start()
274 |         output = net(x)
275 |         loss = criterion(output, target)
276 |         optimizer.zero_grad()
277 |         loss.backward()
278 |         optimizer.step()
279 |         profiler.stop()
280 | 
281 | 
282 | if __name__ == "__main__":
283 |     main()
284 | 


--------------------------------------------------------------------------------
/pyprof/examples/imagenet/test.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | set -e
17 | 
18 | SCRIPT=`realpath $0`
19 | SCRIPTPATH=`dirname $SCRIPT`
20 | PYPROF="$SCRIPTPATH/../.."
21 | 
22 | parse="python $PYPROF/parse/parse.py"
23 | prof="python $PYPROF/prof/prof.py"
24 | 
25 | for net in "resnet50"
26 | do
27 | 	for optim in adam sgd
28 | 	do
29 | 		for batch in 32 64
30 | 		do
31 | 			base="torchvision".$net.$optim.$batch
32 | 			sql=$base.sql
33 | 			dict=$base.dict
34 | 
35 | 			#NVprof
36 | 			echo "nvprof -fo $sql --profile-from-start off python imagenet.py -m ${net} -o $optim -b $batch"
37 | 			sudo env "PATH=$PATH" nvprof -fo $sql --profile-from-start off python imagenet.py -m ${net} -o $optim -b $batch
38 | 
39 | 			#Parse
40 | 			echo $parse $sql
41 | 			$parse $sql > $dict
42 | 
43 | 			#Prof
44 | 			echo $prof $dict
45 | 			$prof -w 130 $dict
46 | #			\rm $sql $dict
47 | 		done
48 | 	done
49 | done
50 | 


--------------------------------------------------------------------------------
/pyprof/examples/jit/README.md:
--------------------------------------------------------------------------------
 1 | <!-- 
 2 | Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
 3 | 
 4 | Licensed under the Apache License, Version 2.0 (the "License");
 5 | you may not use this file except in compliance with the License.
 6 | You may obtain a copy of the License at
 7 | 
 8 |     http://www.apache.org/licenses/LICENSE-2.0
 9 | 
10 | Unless required by applicable law or agreed to in writing, software
11 | distributed under the License is distributed on an "AS IS" BASIS,
12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | See the License for the specific language governing permissions and
14 | limitations under the License. 
15 | -->
16 | 
17 | *As of this writing, these examples do not work
18 | because of changes being proposed in PyTorch.*
19 | 
20 | There are two ways to use PyTorch JIT
21 |  - Scripting
22 |  - Tracing
23 | 
24 | In addition, we can JIT a
25 |  - Stand alone function
26 |  - Class / class method
27 | 
28 | This directory has an example for each of the 4 cases.
29 | Intercepting (monkey patching) JITted code has a few extra steps,
30 | which are explained through comments.
31 | 


--------------------------------------------------------------------------------
/pyprof/examples/jit/jit_script_function.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding: utf-8 -*-
 3 | 
 4 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
 5 | #
 6 | # Licensed under the Apache License, Version 2.0 (the "License");
 7 | # you may not use this file except in compliance with the License.
 8 | # You may obtain a copy of the License at
 9 | #
10 | #     http://www.apache.org/licenses/LICENSE-2.0
11 | #
12 | # Unless required by applicable law or agreed to in writing, software
13 | # distributed under the License is distributed on an "AS IS" BASIS,
14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | # See the License for the specific language governing permissions and
16 | # limitations under the License.
17 | 
18 | import torch
19 | import torch.cuda.profiler as profiler
20 | import pyprof
21 | 
22 | #The following creates an object "foo" of type ScriptModule
23 | #The new object has a function called "forward"
24 | 
25 | 
26 | @torch.jit.script
27 | def foo(x, y):
28 |     return torch.sigmoid(x) + y
29 | 
30 | 
31 | #Initialize pyprof after the JIT step
32 | pyprof.init()
33 | 
34 | #Assign a name to the object "foo"
35 | foo.__name__ = "foo"
36 | 
37 | #Hook up the forward function to pyprof
38 | pyprof.nvtx.wrap(foo, 'forward')
39 | 
40 | x = torch.zeros(4, 4).cuda()
41 | y = torch.ones(4, 4).cuda()
42 | 
43 | with torch.autograd.profiler.emit_nvtx():
44 |     profiler.start()
45 |     z = foo(x, y)
46 |     profiler.stop()
47 |     print(z)
48 | 


--------------------------------------------------------------------------------
/pyprof/examples/jit/jit_script_method.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding: utf-8 -*-
 3 | 
 4 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
 5 | #
 6 | # Licensed under the Apache License, Version 2.0 (the "License");
 7 | # you may not use this file except in compliance with the License.
 8 | # You may obtain a copy of the License at
 9 | #
10 | #     http://www.apache.org/licenses/LICENSE-2.0
11 | #
12 | # Unless required by applicable law or agreed to in writing, software
13 | # distributed under the License is distributed on an "AS IS" BASIS,
14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | # See the License for the specific language governing permissions and
16 | # limitations under the License.
17 | 
18 | import torch
19 | import torch.cuda.profiler as profiler
20 | import pyprof
21 | 
22 | 
23 | class Foo(torch.jit.ScriptModule):
24 | 
25 |     def __init__(self, size):
26 |         super(Foo, self).__init__()
27 |         self.n = torch.nn.Parameter(torch.ones(size))
28 |         self.m = torch.nn.Parameter(torch.ones(size))
29 | 
30 |     @torch.jit.script_method
31 |     def forward(self, input):
32 |         return self.n * input + self.m
33 | 
34 | 
35 | #Initialize pyprof after the JIT step
36 | pyprof.init()
37 | 
38 | #Hook up the forward function to pyprof
39 | pyprof.nvtx.wrap(Foo, 'forward')
40 | 
41 | foo = Foo(4)
42 | foo.cuda()
43 | x = torch.ones(4).cuda()
44 | 
45 | with torch.autograd.profiler.emit_nvtx():
46 |     profiler.start()
47 |     z = foo(x)
48 |     profiler.stop()
49 |     print(z)
50 | 


--------------------------------------------------------------------------------
/pyprof/examples/jit/jit_trace_function.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding: utf-8 -*-
 3 | 
 4 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
 5 | #
 6 | # Licensed under the Apache License, Version 2.0 (the "License");
 7 | # you may not use this file except in compliance with the License.
 8 | # You may obtain a copy of the License at
 9 | #
10 | #     http://www.apache.org/licenses/LICENSE-2.0
11 | #
12 | # Unless required by applicable law or agreed to in writing, software
13 | # distributed under the License is distributed on an "AS IS" BASIS,
14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | # See the License for the specific language governing permissions and
16 | # limitations under the License.
17 | 
18 | import torch
19 | import torch.cuda.profiler as profiler
20 | import pyprof
21 | 
22 | 
23 | def foo(x, y):
24 |     return torch.sigmoid(x) + y
25 | 
26 | 
27 | x = torch.zeros(4, 4).cuda()
28 | y = torch.ones(4, 4).cuda()
29 | 
30 | #JIT the function using tracing
31 | #This returns an object of type ScriptModule with a forward method.
32 | traced_foo = torch.jit.trace(foo, (x, y))
33 | 
34 | #Initialize pyprof after the JIT step
35 | pyprof.init()
36 | 
37 | #Assign a name to the object "traced_foo"
38 | traced_foo.__dict__['__name__'] = "foo"
39 | 
40 | #Hook up the forward function to pyprof
41 | pyprof.nvtx.wrap(traced_foo, 'forward')
42 | 
43 | with torch.autograd.profiler.emit_nvtx():
44 |     profiler.start()
45 |     z = traced_foo(x, y)
46 |     profiler.stop()
47 |     print(z)
48 | 


--------------------------------------------------------------------------------
/pyprof/examples/jit/jit_trace_method.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding: utf-8 -*-
 3 | 
 4 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
 5 | #
 6 | # Licensed under the Apache License, Version 2.0 (the "License");
 7 | # you may not use this file except in compliance with the License.
 8 | # You may obtain a copy of the License at
 9 | #
10 | #     http://www.apache.org/licenses/LICENSE-2.0
11 | #
12 | # Unless required by applicable law or agreed to in writing, software
13 | # distributed under the License is distributed on an "AS IS" BASIS,
14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | # See the License for the specific language governing permissions and
16 | # limitations under the License.
17 | 
18 | import torch
19 | import torch.cuda.profiler as profiler
20 | import pyprof
21 | 
22 | 
23 | class Foo(torch.nn.Module):
24 | 
25 |     def __init__(self, size):
26 |         super(Foo, self).__init__()
27 |         self.n = torch.nn.Parameter(torch.ones(size))
28 |         self.m = torch.nn.Parameter(torch.ones(size))
29 | 
30 |     def forward(self, input):
31 |         return self.n * input + self.m
32 | 
33 | 
34 | foo = Foo(4)
35 | foo.cuda()
36 | x = torch.ones(4).cuda()
37 | 
38 | #JIT the class using tracing
39 | traced_foo = torch.jit.trace(foo, x)
40 | 
41 | #Initialize pyprof after the JIT step
42 | pyprof.init()
43 | 
44 | #Assign a name to the object "traced_foo"
45 | traced_foo.__dict__['__name__'] = "foo"
46 | 
47 | #Hook up the forward function to pyprof
48 | pyprof.nvtx.wrap(traced_foo, 'forward')
49 | 
50 | with torch.autograd.profiler.emit_nvtx():
51 |     profiler.start()
52 |     z = traced_foo(x)
53 |     profiler.stop()
54 |     print(z)
55 | 


--------------------------------------------------------------------------------
/pyprof/examples/jit/test.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | set -e
17 | 
18 | SCRIPT=`realpath $0`
19 | SCRIPTPATH=`dirname $SCRIPT`
20 | PYPROF="$SCRIPTPATH/../.."
21 | 
22 | parse="python $PYPROF/parse/parse.py"
23 | prof="python $PYPROF/prof/prof.py"
24 | 
25 | for f in *.py
26 | do
27 | 	base=`basename $f .py`
28 | 	sql=$base.sql
29 | 	dict=$base.dict
30 | 
31 | 	#NVprof
32 | 	echo "nvprof -fo $sql python $f"
33 | 	nvprof -fo $sql python $f
34 | 
35 | 	#Parse
36 | 	echo $parse $sql
37 | 	$parse $sql > $dict
38 | 
39 | 	#Prof
40 | 	echo $prof $dict
41 | 	$prof -w 130 $dict
42 | 	\rm $sql $dict
43 | done
44 | 


--------------------------------------------------------------------------------
/pyprof/examples/lenet.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding: utf-8 -*-
 3 | 
 4 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
 5 | #
 6 | # Licensed under the Apache License, Version 2.0 (the "License");
 7 | # you may not use this file except in compliance with the License.
 8 | # You may obtain a copy of the License at
 9 | #
10 | #     http://www.apache.org/licenses/LICENSE-2.0
11 | #
12 | # Unless required by applicable law or agreed to in writing, software
13 | # distributed under the License is distributed on an "AS IS" BASIS,
14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | # See the License for the specific language governing permissions and
16 | # limitations under the License.
17 | 
18 | import torch
19 | import torch.nn as nn
20 | import torch.nn.functional as F
21 | import torch.cuda.profiler as profiler
22 | import torch.optim as optim
23 | 
24 | import pyprof
25 | pyprof.init()
26 | 
27 | 
28 | class LeNet5(nn.Module):
29 | 
30 |     def __init__(self):
31 |         super(LeNet5, self).__init__()
32 |         # 1 input image channel, 6 output channels, 5x5 square convolution
33 |         # kernel
34 |         self.conv1 = nn.Conv2d(1, 6, 5)
35 |         self.conv2 = nn.Conv2d(6, 16, 5)
36 |         # an affine operation: y = Wx + b
37 |         self.fc1 = nn.Linear(16 * 5 * 5, 120)
38 |         self.fc2 = nn.Linear(120, 84)
39 |         self.fc3 = nn.Linear(84, 10)
40 | 
41 |     def forward(self, x):
42 |         # Max pooling over a (2, 2) window
43 |         x = F.max_pool2d(F.relu(self.conv1(x)), (2, 2))
44 |         # If the size is a square you can only specify a single number
45 |         x = F.max_pool2d(F.relu(self.conv2(x)), 2)
46 |         x = x.view(-1, self.num_flat_features(x))
47 |         x = F.relu(self.fc1(x))
48 |         x = F.relu(self.fc2(x))
49 |         x = self.fc3(x)
50 |         return x
51 | 
52 |     def num_flat_features(self, x):
53 |         size = x.size()[1:]  # all dimensions except the batch dimension
54 |         num_features = 1
55 |         for s in size:
56 |             num_features *= s
57 |         return num_features
58 | 
59 | 
60 | with torch.autograd.profiler.emit_nvtx():
61 | 
62 |     net = LeNet5().cuda()
63 | 
64 |     input = torch.randn(1, 1, 32, 32).cuda()
65 |     out = net(input)
66 | 
67 |     target = torch.randn(10)  # a dummy target, for example
68 |     target = target.view(1, -1).cuda()  # make it the same shape as output
69 |     criterion = nn.MSELoss()
70 | 
71 |     # create your optimizer
72 |     optimizer = optim.SGD(net.parameters(), lr=0.01)
73 | 
74 |     # in your training loop:
75 |     optimizer.zero_grad()  # zero the gradient buffers
76 | 
77 |     profiler.start()
78 |     output = net(input)
79 |     loss = criterion(output, target)
80 |     loss.backward()
81 |     optimizer.step()  # Does the update
82 |     profiler.stop()
83 | 


--------------------------------------------------------------------------------
/pyprof/examples/operators.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | 
  4 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
  5 | #
  6 | # Licensed under the Apache License, Version 2.0 (the "License");
  7 | # you may not use this file except in compliance with the License.
  8 | # You may obtain a copy of the License at
  9 | #
 10 | #     http://www.apache.org/licenses/LICENSE-2.0
 11 | #
 12 | # Unless required by applicable law or agreed to in writing, software
 13 | # distributed under the License is distributed on an "AS IS" BASIS,
 14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 15 | # See the License for the specific language governing permissions and
 16 | # limitations under the License.
 17 | """
 18 | This file checks all Python operators.
 19 | """
 20 | 
 21 | import sys
 22 | import torch
 23 | import torch.cuda.profiler as profiler
 24 | import operator
 25 | import inspect
 26 | 
 27 | #Import and initialize pyprof
 28 | import pyprof
 29 | pyprof.init()
 30 | 
 31 | X = 1024
 32 | Y = 1024
 33 | 
 34 | fa = torch.rand(X, Y).cuda()
 35 | fb = torch.rand(X, Y).cuda()
 36 | fc = torch.rand(X, Y).cuda()
 37 | 
 38 | ia = torch.randint(0, 100, (X, Y)).cuda()
 39 | ib = torch.randint(0, 100, (X, Y)).cuda()
 40 | 
 41 | sa = torch.ones(1, 1).cuda()
 42 | sb = torch.ones(1, 1).cuda()
 43 | 
 44 | ba = fa.byte()
 45 | 
 46 | unaryOps = [
 47 |     "abs",
 48 |     "__abs__",
 49 |     "neg",
 50 |     "__neg__",
 51 | ]
 52 | invertOps = [
 53 |     "inv",
 54 |     "invert",
 55 |     "__inv__",
 56 |     "__invert__",
 57 | ]  #imlemented only for byte tensors
 58 | #pos, __pos__ is not implemented for tensors
 59 | 
 60 | binaryOps = []
 61 | binaryOps += ["lt", "__lt__", "le", "__le__", "eq", "__eq__", "ne", "__ne__", "ge", "__ge__", "gt", "__gt__"]
 62 | binaryOps += [
 63 |     "add", "__add__", "sub", "__sub__", "mul", "__mul__", "floordiv", "__floordiv__", "truediv", "__truediv__", "pow",
 64 |     "__pow__", "mod", "__mod__"
 65 | ]
 66 | binaryOps += ["and_", "__and__", "or_", "__or__", "xor", "__xor__", "lshift", "__lshift__", "rshift", "__rshift__"]
 67 | 
 68 | inplaceOps = []
 69 | inplaceOps += [
 70 |     "iadd",
 71 |     "__iadd__",
 72 |     "isub",
 73 |     "__isub__",
 74 |     "imul",
 75 |     "__imul__",
 76 |     "ifloordiv",
 77 |     "__ifloordiv__",
 78 |     "itruediv",
 79 |     "__itruediv__",
 80 |     "imod",
 81 |     "__imod__",
 82 | ]
 83 | #ipow, __ipow__ is not implemented in pytorch
 84 | inplaceOps += [
 85 |     "iand",
 86 |     "__iand__",
 87 |     "ior",
 88 |     "__ior__",
 89 |     "ixor",
 90 |     "__ixor__",
 91 |     "ilshift",
 92 |     "__ilshift__",
 93 |     "irshift",
 94 |     "__irshift__",
 95 | ]
 96 | 
 97 | matmulOps = ["matmul", "__matmul__"]
 98 | inplacematmulOps = ["imatmul", "__imatmul__"]
 99 | 
100 | reverseIntBinaryOps = [
101 |     "__radd__",
102 |     "__rsub__",
103 |     "__rmul__",
104 |     "__rfloordiv__",
105 |     "__rpow__",
106 | ]
107 | reverseFloatBinaryOps = [
108 |     "__radd__",
109 |     "__rsub__",
110 |     "__rmul__",
111 |     "__rdiv__",
112 |     "__rtruediv__",
113 |     "__rfloordiv__",
114 |     "__rpow__",
115 | ]
116 | '''
117 | TODO
118 | .concat(a, b)
119 | .__concat__(a, b)
120 | .contains(a, b)
121 | .__contains__(a, b)
122 | .countOf(a, b)
123 | .delitem(a, b)
124 | .__delitem__(a, b)
125 | .getitem(a, b)
126 | .__getitem__(a, b)
127 | .indexOf(a, b)
128 | .setitem(a, b, c)
129 | .__setitem__(a, b, c)
130 | .length_hint(obj, default=0)
131 | .iconcat(a, b)
132 | .__iconcat__(a, b)
133 | .index(a)
134 | .__index__(a)
135 | '''
136 | 
137 | #Context manager
138 | with torch.autograd.profiler.emit_nvtx():
139 | 
140 |     #Start profiler
141 |     profiler.start()
142 | 
143 |     for op in unaryOps:
144 |         assert hasattr(operator, op)
145 |         f = getattr(operator, op)
146 |         assert inspect.isbuiltin(f)
147 |         c = f(ia)
148 | 
149 |     for op in invertOps:
150 |         assert hasattr(operator, op)
151 |         f = getattr(operator, op)
152 |         assert inspect.isbuiltin(f)
153 |         c = f(ba)
154 | 
155 |     for op in binaryOps:
156 |         assert hasattr(operator, op)
157 |         f = getattr(operator, op)
158 |         assert inspect.isbuiltin(f)
159 |         c = f(ia, ib)
160 |         c = f(ia, 2)
161 | 
162 |     for op in inplaceOps:
163 |         assert hasattr(operator, op)
164 |         f = getattr(operator, op)
165 |         assert inspect.isbuiltin(f)
166 |         ia = f(ia, ib)
167 |         ia = f(ia, 2)
168 | 
169 |     for op in matmulOps:
170 |         assert hasattr(operator, op)
171 |         f = getattr(operator, op)
172 |         assert inspect.isbuiltin(f)
173 |         c = f(fa, fb)
174 | 
175 |     for op in inplacematmulOps:
176 |         assert hasattr(operator, op)
177 |         f = getattr(operator, op)
178 |         assert inspect.isbuiltin(f)
179 |         fa = f(fa, fb)
180 | 
181 |     for op in reverseIntBinaryOps:
182 |         assert hasattr(torch.Tensor, op)
183 |         f = getattr(torch.Tensor, op)
184 |         ia = f(ia, ib)
185 | 
186 |     for op in reverseFloatBinaryOps:
187 |         assert hasattr(torch.Tensor, op)
188 |         f = getattr(torch.Tensor, op)
189 |         fa = f(fa, fb)
190 |     '''
191 | 	#c = fa[3]
192 | 	#c = fa[3][3]
193 | 	#c = torch.min(fa, 3)
194 | 	c = torch.sum(fa)
195 | 	c = torch.max(fa)
196 | 	c = -fa
197 | 	#fc[2][2] = fa[2][2]
198 | 
199 | 	c = a_scalar and b_scalar
200 | 	c = a_scalar or b_scalar
201 | 	c = not a_scalar
202 | 
203 | 	c = a is b
204 | 	c = a is not b
205 | 	'''
206 | 
207 |     #Stop profiler
208 |     profiler.stop()
209 | 


--------------------------------------------------------------------------------
/pyprof/examples/simple.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding: utf-8 -*-
 3 | 
 4 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
 5 | #
 6 | # Licensed under the Apache License, Version 2.0 (the "License");
 7 | # you may not use this file except in compliance with the License.
 8 | # You may obtain a copy of the License at
 9 | #
10 | #     http://www.apache.org/licenses/LICENSE-2.0
11 | #
12 | # Unless required by applicable law or agreed to in writing, software
13 | # distributed under the License is distributed on an "AS IS" BASIS,
14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | # See the License for the specific language governing permissions and
16 | # limitations under the License.
17 | """
18 | This simple file provides an example of how to
19 |  - import the pyprof library and initialize it
20 |  - use the emit_nvtx context manager
21 |  - start and stop the profiler
22 | 
23 | Only kernels within profiler.start and profiler.stop calls are profiled.
24 | To profile
25 | $ nvprof -f -o simple.sql --profile-from-start off ./simple.py
26 | """
27 | 
28 | import sys
29 | import torch
30 | import torch.cuda.profiler as profiler
31 | 
32 | #Import and initialize pyprof
33 | import pyprof
34 | pyprof.init()
35 | 
36 | a = torch.randn(5, 5).cuda()
37 | b = torch.randn(5, 5).cuda()
38 | 
39 | #Context manager
40 | with torch.autograd.profiler.emit_nvtx():
41 | 
42 |     #Start profiler
43 |     profiler.start()
44 | 
45 |     c = a + b
46 |     c = torch.mul(a, b)
47 |     c = torch.matmul(a, b)
48 |     c = torch.argmax(a, dim=1)
49 |     c = torch.nn.functional.pad(a, (1, 1))
50 | 
51 |     #Stop profiler
52 |     profiler.stop()
53 | 


--------------------------------------------------------------------------------
/pyprof/examples/user_annotation/README.md:
--------------------------------------------------------------------------------
 1 | <!-- 
 2 | Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
 3 | 
 4 | Licensed under the Apache License, Version 2.0 (the "License");
 5 | you may not use this file except in compliance with the License.
 6 | You may obtain a copy of the License at
 7 | 
 8 |     http://www.apache.org/licenses/LICENSE-2.0
 9 | 
10 | Unless required by applicable law or agreed to in writing, software
11 | distributed under the License is distributed on an "AS IS" BASIS,
12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | See the License for the specific language governing permissions and
14 | limitations under the License. 
15 | -->
16 | 
17 | Nvidia NVTX range markers (https://docs.nvidia.com/gameworks/content/gameworkslibrary/nvtx/nvidia_tools_extension_library_nvtx.htm) 
18 | are a useful tool to capture and observe events and code ranges etc. 
19 | Using PyTorch APIs e.g, `torch.cuda.nvtx.range_push("xxx")` and `torch.cuda.nvtx.range_pop()` users can easily add their own NVTX range markers. These markers can then be observed in the Nvidia Visual Profiler (NVVP).
20 | 
21 | While inserting NVTX markers (strings), if the users follow a specific string pattern `"layer:your_string_here"` e.g. `"layer:conv1"` or `"layer:encoder_layer_3_self_attention`, then `pyprof` will display the strings `conv1` and `encoder_layer_3_self_attention` next to the associated kernels in the output of `prof.py` when used with the `-c layer` option.
22 | 
23 | NVTX range markers can be nested and if users follow the above string pattern, the output of `prof.py` will show all the markers associated with a kernel.
24 | 
25 | The file `resnet.py` (a simplified version of the torchvision model) shows an example of how users can add (nested) NVTX markers with information which can greatly aid in understanding and analysis of networks.
26 | 
27 | Note that the pattern `"layer:your_string_here"` was chosen to aid information extraction by `pyprof`. The tool will work seamlessly even if there are other markers or no markers at all.
28 | 
29 | ### To run
30 | 
31 | ```sh
32 | nvprof -fo resnet.sql --profile-from-start off python resnet.py
33 | parse.py resnet.sql > resnet.dict
34 | prof.py --csv -c idx,layer,dir,mod,op,kernel,params,sil resnet.dict
35 | ```
36 | 
37 | The file `resnet.sql` can also be opened with NVVP as usual.
38 | 


--------------------------------------------------------------------------------
/pyprof/examples/user_annotation/test.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | set -e
17 | 
18 | SCRIPT=`realpath $0`
19 | SCRIPTPATH=`dirname $SCRIPT`
20 | PYPROF="$SCRIPTPATH/../.."
21 | 
22 | parse="python $PYPROF/parse/parse.py"
23 | prof="python $PYPROF/prof/prof.py"
24 | 
25 | for f in *.py
26 | do
27 | 	base=`basename $f .py`
28 | 	sql=$base.sql
29 | 	dict=$base.dict
30 | 
31 | 	#NVprof
32 | 	echo "nvprof -fo --profile-from-start off $sql python $f"
33 | 	nvprof -fo $sql --profile-from-start off python $f
34 | 
35 | 	#Parse
36 | 	echo $parse $sql
37 | 	$parse $sql > $dict
38 | 
39 | 	#Prof
40 | 	echo $prof $dict
41 | 	#$prof -w 130 $dict
42 | 	$prof --csv -c idx,layer,dir,mod,op,kernel,params,sil $dict
43 | 	\rm $sql $dict
44 | done
45 | 


--------------------------------------------------------------------------------
/pyprof/nvtx/__init__.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding: utf-8 -*-
 3 | 
 4 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
 5 | #
 6 | # Licensed under the Apache License, Version 2.0 (the "License");
 7 | # you may not use this file except in compliance with the License.
 8 | # You may obtain a copy of the License at
 9 | #
10 | #     http://www.apache.org/licenses/LICENSE-2.0
11 | #
12 | # Unless required by applicable law or agreed to in writing, software
13 | # distributed under the License is distributed on an "AS IS" BASIS,
14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | # See the License for the specific language governing permissions and
16 | # limitations under the License.
17 | 
18 | from .nvmarker import init
19 | from .nvmarker import add_wrapper as wrap
20 | 


--------------------------------------------------------------------------------
/pyprof/parse/__init__.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding: utf-8 -*-
 3 | 
 4 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
 5 | #
 6 | # Licensed under the Apache License, Version 2.0 (the "License");
 7 | # you may not use this file except in compliance with the License.
 8 | # You may obtain a copy of the License at
 9 | #
10 | #     http://www.apache.org/licenses/LICENSE-2.0
11 | #
12 | # Unless required by applicable law or agreed to in writing, software
13 | # distributed under the License is distributed on an "AS IS" BASIS,
14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | # See the License for the specific language governing permissions and
16 | # limitations under the License.
17 | 


--------------------------------------------------------------------------------
/pyprof/parse/__main__.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding: utf-8 -*-
 3 | 
 4 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
 5 | #
 6 | # Licensed under the Apache License, Version 2.0 (the "License");
 7 | # you may not use this file except in compliance with the License.
 8 | # You may obtain a copy of the License at
 9 | #
10 | #     http://www.apache.org/licenses/LICENSE-2.0
11 | #
12 | # Unless required by applicable law or agreed to in writing, software
13 | # distributed under the License is distributed on an "AS IS" BASIS,
14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | # See the License for the specific language governing permissions and
16 | # limitations under the License.
17 | 
18 | from .parse import main
19 | 
20 | if __name__ == '__main__':
21 |     main()
22 | 


--------------------------------------------------------------------------------
/pyprof/parse/db.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding: utf-8 -*-
 3 | 
 4 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
 5 | #
 6 | # Licensed under the Apache License, Version 2.0 (the "License");
 7 | # you may not use this file except in compliance with the License.
 8 | # You may obtain a copy of the License at
 9 | #
10 | #     http://www.apache.org/licenses/LICENSE-2.0
11 | #
12 | # Unless required by applicable law or agreed to in writing, software
13 | # distributed under the License is distributed on an "AS IS" BASIS,
14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | # See the License for the specific language governing permissions and
16 | # limitations under the License.
17 | 
18 | import sys, sqlite3
19 | 
20 | 
21 | class DB(object):
22 |     """
23 | 	This class provides functions for DB operations
24 | 	with exception handling.
25 | 	"""
26 | 
27 |     def __init__(self, dbFile):
28 |         try:
29 |             conn = sqlite3.connect(dbFile)
30 |             conn.row_factory = sqlite3.Row
31 |             c = conn.cursor()
32 |         except:
33 |             print("Error opening {}".format(dbFile))
34 |             sys.exit(1)
35 | 
36 |         self.conn = conn
37 |         self.c = c
38 | 
39 |     def select(self, cmd):
40 |         try:
41 |             self.c.execute(cmd)
42 |             #rows = self.c.fetchall()
43 |             rows = [dict(row) for row in self.c.fetchall()]
44 |         except sqlite3.Error as e:
45 |             print(e)
46 |             sys.exit(1)
47 |         except:
48 |             print("Uncaught error in SQLite access while executing {}".format(cmd))
49 |             sys.exit(1)
50 | 
51 |         #print(rows)
52 |         return rows
53 | 
54 |     def insert(self, cmd, data):
55 |         try:
56 |             self.c.execute(cmd, data)
57 |         except sqlite3.Error as e:
58 |             print(e)
59 |             sys.exit(1)
60 |         except:
61 |             print("Uncaught error in SQLite access while executing {}".format(cmd))
62 |             sys.exit(1)
63 | 
64 |     def execute(self, cmd):
65 |         try:
66 |             self.c.execute(cmd)
67 |         except sqlite3.Error as e:
68 |             print(e)
69 |             sys.exit(1)
70 |         except:
71 |             print("Uncaught error in SQLite access while executing {}".format(cmd))
72 |             sys.exit(1)
73 | 
74 |     def commit(self):
75 |         self.conn.commit()
76 | 
77 |     def close(self):
78 |         self.c.close()
79 |         self.conn.close()
80 | 


--------------------------------------------------------------------------------
/pyprof/parse/kernel.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | 
  4 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
  5 | #
  6 | # Licensed under the Apache License, Version 2.0 (the "License");
  7 | # you may not use this file except in compliance with the License.
  8 | # You may obtain a copy of the License at
  9 | #
 10 | #     http://www.apache.org/licenses/LICENSE-2.0
 11 | #
 12 | # Unless required by applicable law or agreed to in writing, software
 13 | # distributed under the License is distributed on an "AS IS" BASIS,
 14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 15 | # See the License for the specific language governing permissions and
 16 | # limitations under the License.
 17 | 
 18 | import cxxfilt, struct, binascii
 19 | 
 20 | #Helper functions
 21 | 
 22 | 
 23 | def demangle(name):
 24 |     """
 25 | 	Demangle a C++ string
 26 | 	"""
 27 |     result = name
 28 |     try:
 29 |         result = cxxfilt.demangle(name)
 30 |     except:
 31 |         pass
 32 |     return result
 33 | 
 34 | 
 35 | def getShortName(name):
 36 |     """
 37 | 	Returns a shorter kernel name
 38 | 	"""
 39 |     sname = name.split("<")[0] \
 40 |        .replace("void ", "") \
 41 |        .replace("at::","") \
 42 |        .replace("cuda::", "") \
 43 |        .replace("native::","") \
 44 |        .replace("(anonymous namespace)::", "")
 45 |     sname = sname.split("(")[0]
 46 |     return sname
 47 | 
 48 | 
 49 | class Kernel(object):
 50 |     """
 51 | 	This class stores information about a kernel.
 52 | 	"""
 53 | 
 54 |     kernels = []
 55 |     profStart = 0
 56 | 
 57 |     def __init__(self):
 58 |         self.kNameId = None
 59 |         self.kShortName = None
 60 |         self.kLongName = None
 61 |         self.kStartTime = None  #GPU start time
 62 |         self.kEndTime = None  #GPU end time
 63 |         self.kDuration = None
 64 |         self.device = None
 65 |         self.stream = None
 66 |         self.grid = ()
 67 |         self.block = ()
 68 |         self.corrId = None
 69 |         self.rStartTime = None  #CPU start time
 70 |         self.rEndTime = None  #CPU end time
 71 |         self.rDuration = None
 72 |         self.tid = None
 73 |         self.pid = None
 74 |         self.objId = None
 75 |         self.timeOffset = None
 76 | 
 77 |         self.layerMarkers = []
 78 |         self.traceMarkers = []
 79 |         self.reprMarkers = []
 80 |         self.pyprofMarkers = []
 81 |         self.seqMarkers = []
 82 |         self.otherMarkers = []
 83 |         self.altMarkers = []
 84 |         self.seqId = []
 85 |         self.altSeqId = []
 86 |         self.layer = []
 87 | 
 88 |         self.subSeqId = None
 89 |         self.dir = None
 90 |         self.mod = []
 91 |         self.op = []
 92 | 
 93 |     def setKernelInfo(self, info):
 94 |         self.kNameId = info['kNameId']
 95 |         self.corrId = int(info['correlationId'])
 96 |         start = int(info['start'])
 97 |         end = int(info['end'])
 98 |         assert end > start, "This assertion can fail for very large profiles. It usually fails when start = end = 0."
 99 |         self.kStartTime = start
100 |         self.kEndTime = end
101 |         self.kDuration = end - start
102 |         assert (start > Kernel.profStart)
103 |         self.device = int(info['deviceId'])
104 |         self.stream = int(info['streamId'])
105 |         self.grid = (info['gridX'], info['gridY'], info['gridZ'])
106 |         self.block = (info['blockX'], info['blockY'], info['blockZ'])
107 |         self.timeOffset = Kernel.profStart
108 |         self.setKernelName(info['name'])
109 |         self.setRunTimeInfo(info)
110 | 
111 |     def setKernelName(self, name):
112 |         cadena = demangle(name)
113 |         self.kLongName = cadena
114 |         self.kShortName = getShortName(cadena)
115 | 
116 |     def setRunTimeInfo(self, info):
117 |         self.rStartTime = info['rStart']
118 |         self.rEndTime = info['rEnd']
119 |         self.rDuration = info['rEnd'] - info['rStart']
120 |         self.pid = info['pid']
121 |         self.tid = info['tid']
122 |         self.objId = info['objId']
123 |         assert (self.rStartTime < self.rEndTime)
124 |         assert (self.rStartTime < self.kStartTime)
125 | 
126 |     def setMarkerInfo(self, info):
127 |         self.layerMarkers, self.traceMarkers, self.reprMarkers, self.pyprofMarkers, self.seqMarkers, self.otherMarkers, self.altMarkers, self.seqId, self.altSeqId, self.layer = info
128 |         self.subSeqId = 0
129 | 
130 |     def setDirection(self):
131 |         """
132 | 		Set direction (fprop, bprop) based on PyTorch sequence markers.
133 | 		It is a heuristic and not a foolproof method.
134 | 		"""
135 |         if any("Backward, seq = " in x for x in self.seqMarkers) or \
136 |          any("backward, seq = " in x for x in self.seqMarkers) or \
137 |          any("Backward0, seq = " in x for x in self.seqMarkers):
138 |             self.dir = "bprop"
139 |         else:
140 |             self.dir = "fprop"
141 | 
142 |     def setOp(self):
143 |         """
144 | 		Detect and set the class/module (mod) and operation (op)
145 | 		of the kernel e.g. torch.nn.functional / linear, torch / sigmoid.
146 | 		The lookup sequence we use is
147 | 			NVTX markers inserted by pyprof
148 | 			NVTX markers inserted by PyTorch in bprop
149 | 			NVTX markers inserted by PyTorch in fprop
150 | 		It is a heuristic and not a foolproof method.
151 | 		"""
152 | 
153 |         def sanitize(name):
154 |             name = name.replace("torch","") \
155 |                .replace("autograd","") \
156 |                .replace("_backward","") \
157 |                .replace("::","") \
158 |                .replace("jit","") \
159 |                .replace("(anonymous namespace)","")
160 |             head, sep, tail = name.partition("Backward")
161 |             return head
162 | 
163 |         #Check pyprof markers
164 |         for m in self.pyprofMarkers:
165 |             assert ("mod" in m) and ("op" in m) and ("args" in m)
166 |             t = eval(m)
167 |             self.op.append(t['op'])
168 |             self.mod.append(t['mod'])
169 | 
170 |         if len(self.op):
171 |             return
172 | 
173 |         #Check bprop kernel markers
174 |         for m in self.seqMarkers:
175 |             if ("backward, seq = " in m) or ("Backward, seq = " in m):
176 |                 op = m.split(",")[0]
177 |                 op = sanitize(op)
178 |                 self.op.append(op)
179 |                 self.mod.append('na')
180 | 
181 |         if len(self.op):
182 |             return
183 | 
184 |         #Check markers with "seq = "
185 |         for m in self.seqMarkers:
186 |             if ", seq = " in m:
187 |                 op = m.split(",")[0]
188 |                 self.op.append(op)
189 |                 self.mod.append('na')
190 | 
191 |         if len(self.op):
192 |             return
193 | 
194 |         #If nothing else
195 |         if len(self.otherMarkers):
196 |             self.op.append(self.otherMarkers[0])
197 |         self.mod.append('na')
198 | 
199 |     def print(self):
200 |         """
201 | 		Print kernel information. This is used by prof.py.
202 | 		"""
203 | 
204 |         a = lambda: None
205 |         a.kShortName = self.kShortName
206 |         a.kDuration = self.kDuration
207 |         #a.layerMarkers = self.layerMarkers
208 |         a.layer = self.layer
209 |         a.trace = self.traceMarkers
210 |         a.reprMarkers = self.reprMarkers
211 |         a.marker = self.pyprofMarkers
212 |         a.seqMarker = self.seqMarkers
213 | 
214 |         a.seqId = self.seqId
215 |         a.subSeqId = self.subSeqId
216 |         a.altSeqId = self.altSeqId
217 | 
218 |         a.dir = self.dir
219 |         a.mod = self.mod
220 |         a.op = self.op
221 | 
222 |         a.tid = self.tid
223 |         a.device = self.device
224 |         a.stream = self.stream
225 |         a.grid = self.grid
226 |         a.block = self.block
227 |         a.kLongName = self.kLongName
228 | 
229 |         print(a.__dict__)
230 | 


--------------------------------------------------------------------------------
/pyprof/parse/parse.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | 
  4 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
  5 | #
  6 | # Licensed under the Apache License, Version 2.0 (the "License");
  7 | # you may not use this file except in compliance with the License.
  8 | # You may obtain a copy of the License at
  9 | #
 10 | #     http://www.apache.org/licenses/LICENSE-2.0
 11 | #
 12 | # Unless required by applicable law or agreed to in writing, software
 13 | # distributed under the License is distributed on an "AS IS" BASIS,
 14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 15 | # See the License for the specific language governing permissions and
 16 | # limitations under the License.
 17 | """
 18 | Parse the SQLite3 database from NVprof or Nsight and print a dictionary for every kernel.
 19 | """
 20 | 
 21 | import sys
 22 | import os
 23 | import argparse
 24 | from tqdm import tqdm
 25 | 
 26 | from .db import DB
 27 | from .kernel import Kernel
 28 | from .nvvp import NVVP
 29 | from .nsight import Nsight
 30 | 
 31 | 
 32 | def parseArgs():
 33 |     parser = argparse.ArgumentParser(prog=sys.argv[0], description="Parse SQLite3 DB from NVprof or Nsight.")
 34 |     parser.add_argument("file", type=str, default=None, help="SQLite3 database.")
 35 | 
 36 |     args = parser.parse_args()
 37 | 
 38 |     if not os.path.isfile(args.file):
 39 |         raise parser.error("No such file '{}'.".format(args.file))
 40 | 
 41 |     return args
 42 | 
 43 | 
 44 | def dbIsNvvp(db):
 45 |     cmd = "SELECT * FROM sqlite_master where type='table' AND name='StringTable'"
 46 |     result = db.select(cmd)
 47 |     return True if len(result) == 1 else False
 48 | 
 49 | 
 50 | def main():
 51 |     args = parseArgs()
 52 | 
 53 |     db = DB(args.file)
 54 |     nvvp = None
 55 |     if dbIsNvvp(db):
 56 |         nvvp = NVVP(db)
 57 |     else:
 58 |         nvvp = Nsight(db)
 59 | 
 60 |     kInfo = nvvp.getKernelInfo()
 61 |     if len(kInfo) == 0:
 62 |         print("Found 0 kernels. Exiting.", file=sys.stderr)
 63 |         db.close()
 64 |         sys.exit(0)
 65 |     else:
 66 |         print("Found {} kernels. Getting info for each kernel.".format(len(kInfo)), file=sys.stderr)
 67 | 
 68 |     nvvp.createMarkerTable()
 69 | 
 70 |     prevSeqId = -1
 71 |     prevSubSeqId = -1
 72 |     prevOp = "na"
 73 | 
 74 |     Kernel.profStart = nvvp.getProfileStart()
 75 | 
 76 |     for i in tqdm(range(len(kInfo)), ascii=True):
 77 |         info = kInfo[i]
 78 |         k = Kernel()
 79 | 
 80 |         #Calculate/encode object ID
 81 |         nvvp.encode_object_id(info)
 82 | 
 83 |         #Set kernel info
 84 |         k.setKernelInfo(info)
 85 | 
 86 |         #Get and set marker and seqid info
 87 |         info = nvvp.getMarkerInfo(k.objId, k.rStartTime, k.rEndTime)
 88 |         k.setMarkerInfo(info)
 89 | 
 90 |         #If the seqId contains both 0 and non zero integers, remove 0.
 91 |         if any(seq != 0 for seq in k.seqId) and (0 in k.seqId):
 92 |             k.seqId.remove(0)
 93 | 
 94 |         #Set direction (it uses seq id)
 95 |         k.setDirection()
 96 | 
 97 |         #Set op
 98 |         k.setOp()
 99 | 
100 |         #The following code is based on heuristics.
101 |         #TODO: Refactor.
102 |         #Assign subSeqId, adjust seqId and altSeqId
103 |         #seqId can be 0.
104 |         #A kernel can have multiple seqIds both in fprop and bprop.
105 |         #In bprop, seqIds might not decrease monotonically. I have observed a few blips.
106 |         if len(k.seqId):
107 |             assert (k.dir in ["fprop", "bprop"])
108 |             if (k.dir == "fprop"):
109 |                 #Check if there is a sequence id larger than the previous
110 |                 inc = (k.seqId[-1] > prevSeqId)
111 |                 if inc:
112 |                     currSeqId = [x for x in k.seqId if x > prevSeqId][0]
113 |                 else:
114 |                     currSeqId = prevSeqId
115 |             else:
116 |                 currSeqId = k.seqId[0]
117 | 
118 |             #if ((currSeqId == prevSeqId) and (k.op == prevOp)):
119 |             if ((currSeqId == prevSeqId) and (k.op == prevOp)) or ((k.op[0] == "forward") and (k.op == prevOp) and
120 |                                                                    (k.mod[0] in ["LSTMCell", "GRUCell", "RNNCell"])):
121 |                 #The second condition is to trap cases when pytorch does not use cudnn for a LSTMCell.
122 |                 k.subSeqId = prevSubSeqId + 1
123 | 
124 |             prevSeqId = currSeqId
125 |             prevSubSeqId = k.subSeqId
126 |             prevOp = k.op
127 | 
128 |             #Keep currSeqId in k.seqId, move everything else to k.altSeqId
129 |             for s in k.seqId:
130 |                 if s != currSeqId:
131 |                     k.seqId.remove(s)
132 |                     k.altSeqId.append(s)
133 | 
134 |             for s in k.altSeqId:
135 |                 if s == currSeqId:
136 |                     k.altSeqId.remove(s)
137 | 
138 |             k.altSeqId = list(set(k.altSeqId))
139 |             if (len(k.altSeqId)):
140 |                 (k.altSeqId).sort()
141 | 
142 |         k.print()
143 | 
144 |     db.close()
145 | 
146 | 
147 | if __name__ == '__main__':
148 |     main()
149 | 


--------------------------------------------------------------------------------
/pyprof/prof/__init__.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding: utf-8 -*-
 3 | 
 4 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
 5 | #
 6 | # Licensed under the Apache License, Version 2.0 (the "License");
 7 | # you may not use this file except in compliance with the License.
 8 | # You may obtain a copy of the License at
 9 | #
10 | #     http://www.apache.org/licenses/LICENSE-2.0
11 | #
12 | # Unless required by applicable law or agreed to in writing, software
13 | # distributed under the License is distributed on an "AS IS" BASIS,
14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | # See the License for the specific language governing permissions and
16 | # limitations under the License.
17 | 


--------------------------------------------------------------------------------
/pyprof/prof/__main__.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding: utf-8 -*-
 3 | 
 4 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
 5 | #
 6 | # Licensed under the Apache License, Version 2.0 (the "License");
 7 | # you may not use this file except in compliance with the License.
 8 | # You may obtain a copy of the License at
 9 | #
10 | #     http://www.apache.org/licenses/LICENSE-2.0
11 | #
12 | # Unless required by applicable law or agreed to in writing, software
13 | # distributed under the License is distributed on an "AS IS" BASIS,
14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | # See the License for the specific language governing permissions and
16 | # limitations under the License.
17 | 
18 | from .prof import main
19 | 
20 | if __name__ == '__main__':
21 |     main()
22 | 


--------------------------------------------------------------------------------
/pyprof/prof/activation.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding: utf-8 -*-
 3 | 
 4 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
 5 | #
 6 | # Licensed under the Apache License, Version 2.0 (the "License");
 7 | # you may not use this file except in compliance with the License.
 8 | # You may obtain a copy of the License at
 9 | #
10 | #     http://www.apache.org/licenses/LICENSE-2.0
11 | #
12 | # Unless required by applicable law or agreed to in writing, software
13 | # distributed under the License is distributed on an "AS IS" BASIS,
14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | # See the License for the specific language governing permissions and
16 | # limitations under the License.
17 | 
18 | from .base import OperatorLayerBase
19 | from .tensor import Tensor
20 | 
21 | 
22 | class Activation(OperatorLayerBase):
23 |     """
24 | 	This class handles the various activation functions.
25 | 	"""
26 | 
27 |     ops = [
28 |         "celu", "elu", "elu_", "hardshrink", "hardtanh", "hardtanh_", "leaky_relu", "leaky_relu_", "logsigmoid",
29 |         "prelu", "relu", "relu_", "relu6", "rrelu", "rrelu_", "selu", "sigmoid", "softplus", "softshrink", "softsign",
30 |         "tanh", "tanhshrink", "threshold", "threshold_"
31 |     ]
32 | 
33 |     def __init__(self, d):
34 |         marker = eval(d.argMarker[0])
35 |         mod = marker['mod']
36 |         op = marker['op']
37 |         args = marker['args']
38 | 
39 |         self.mod_ = mod
40 |         self.op_ = op
41 | 
42 |         assert (mod in ["torch.nn.functional", "torch", "Tensor"])
43 | 
44 |         #Filter out named parameters
45 |         args = list(filter(lambda x: x['name'] == '', args))
46 | 
47 |         assert (len(args) >= 1)
48 |         arg = args[0]
49 |         assert (arg['type'] == "tensor")
50 | 
51 |         self.input = Tensor(arg['shape'], arg['dtype'])
52 |         self.dir = d.dir
53 | 
54 |     def params(self):
55 |         return str(self.input)
56 | 
57 |     def flops(self):
58 |         # TODO: revise based on op
59 |         return self.input.size
60 | 
61 |     def bytes(self):
62 |         # TODO: revise based on op
63 |         direction = self.dir
64 |         b = self.input.bytes
65 |         # fprop is 1 read, 1 write
66 |         # bprop is 2 reads, 1 write
67 |         b *= 2 if direction == "fprop" else 3
68 |         return b
69 | 
70 |     def tc(self):
71 |         return "-"
72 | 
73 |     def op(self):
74 |         return self.op_
75 | 
76 |     def mod(self):
77 |         return self.mod_
78 | 


--------------------------------------------------------------------------------
/pyprof/prof/base.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding: utf-8 -*-
 3 | 
 4 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
 5 | #
 6 | # Licensed under the Apache License, Version 2.0 (the "License");
 7 | # you may not use this file except in compliance with the License.
 8 | # You may obtain a copy of the License at
 9 | #
10 | #     http://www.apache.org/licenses/LICENSE-2.0
11 | #
12 | # Unless required by applicable law or agreed to in writing, software
13 | # distributed under the License is distributed on an "AS IS" BASIS,
14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | # See the License for the specific language governing permissions and
16 | # limitations under the License.
17 | 
18 | from abc import ABC, abstractmethod
19 | 
20 | 
21 | class OperatorLayerBase(ABC):
22 |     """
23 | 	Base class for all layers and operators.
24 | 	Every derived class should have the following functions.
25 | 	"""
26 | 
27 |     @abstractmethod
28 |     def tc(self):
29 |         """
30 | 		Tensor core usage by the kernel.
31 | 		Return "1" (yes), "0" (no, but possible), "-" (not applicable)
32 | 		"""
33 |         pass
34 | 
35 |     @abstractmethod
36 |     def params(self):
37 |         """
38 | 		Kernel parameters to be printed.
39 | 		"""
40 |         pass
41 | 
42 |     @abstractmethod
43 |     def flops(self):
44 |         """
45 | 		Note that 1 FMA = 2 flops.
46 | 		"""
47 |         pass
48 | 
49 |     @abstractmethod
50 |     def bytes(self):
51 |         pass
52 | 
53 |     @abstractmethod
54 |     def mod(self):
55 |         """
56 | 		Name of the module/class e.g. torch.nn.functional.
57 | 		"""
58 |         pass
59 | 
60 |     @abstractmethod
61 |     def op(self):
62 |         """
63 | 		Name of the operator e.g. sigmoid.
64 | 		"""
65 |         pass
66 | 


--------------------------------------------------------------------------------
/pyprof/prof/convert.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding: utf-8 -*-
 3 | 
 4 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
 5 | #
 6 | # Licensed under the Apache License, Version 2.0 (the "License");
 7 | # you may not use this file except in compliance with the License.
 8 | # You may obtain a copy of the License at
 9 | #
10 | #     http://www.apache.org/licenses/LICENSE-2.0
11 | #
12 | # Unless required by applicable law or agreed to in writing, software
13 | # distributed under the License is distributed on an "AS IS" BASIS,
14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | # See the License for the specific language governing permissions and
16 | # limitations under the License.
17 | 
18 | from .base import OperatorLayerBase
19 | from .tensor import Tensor
20 | 
21 | 
22 | class Convert(OperatorLayerBase):
23 |     """
24 | 	Class to handle convert operations.
25 | 	"""
26 |     ops = ["byte", "char", "double", "float", "half", "int", "long", "short", "to"]
27 | 
28 |     def __init__(self, d):
29 |         marker = eval(d.argMarker[0])
30 |         mod = marker['mod']
31 |         op = marker['op']
32 |         args = marker['args']
33 | 
34 |         self.mod_ = mod
35 |         self.op_ = op
36 | 
37 |         assert (mod == "Tensor")
38 |         assert (op in Convert.ops)
39 |         assert (len(args) == 1)
40 | 
41 |         t = args[0]
42 |         if t['type'] == "tensor":
43 |             self.input = Tensor(t['shape'], t['dtype'])
44 |         else:  # scalar
45 |             self.input = Tensor([], t['type'])
46 | 
47 |         if op == "to":
48 |             # the output dtype is unknown
49 |             self.output = self.input
50 |         else:
51 |             self.output = Tensor(self.input.shape, op)
52 | 
53 |     def params(self):
54 |         return str(self.input)
55 | 
56 |     def op(self):
57 |         return self.op_
58 | 
59 |     def mod(self):
60 |         return self.mod_
61 | 
62 |     def tc(self):
63 |         return "-"
64 | 
65 |     def flops(self):
66 |         return 0
67 | 
68 |     def bytes(self):
69 |         return self.input.bytes + self.output.bytes
70 | 


--------------------------------------------------------------------------------
/pyprof/prof/data.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding: utf-8 -*-
 3 | 
 4 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
 5 | #
 6 | # Licensed under the Apache License, Version 2.0 (the "License");
 7 | # you may not use this file except in compliance with the License.
 8 | # You may obtain a copy of the License at
 9 | #
10 | #     http://www.apache.org/licenses/LICENSE-2.0
11 | #
12 | # Unless required by applicable law or agreed to in writing, software
13 | # distributed under the License is distributed on an "AS IS" BASIS,
14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | # See the License for the specific language governing permissions and
16 | # limitations under the License.
17 | 
18 | from .utility import Utility
19 | 
20 | 
21 | class Data(object):
22 |     """
23 | 	Class to store all the data for every kernel e.g. name, bytes, flops, device, stream etc.
24 | 	"""
25 | 
26 |     def __init__(self, kernel):
27 |         #Available from NVprof
28 |         self.tid = kernel['tid']
29 |         self.device = kernel['device']
30 |         self.stream = kernel['stream']
31 |         self.grid = str(kernel['grid']).replace(" ", "").replace("(", "").replace(")", "")
32 |         self.block = str(kernel['block']).replace(" ", "").replace("(", "").replace(")", "")
33 |         self.name = kernel['kShortName'].replace(" ", "_")
34 |         self.lName = kernel['kLongName']
35 |         self.sil = kernel['kDuration']  #units ns
36 | 
37 |         self.index = None
38 | 
39 |         #Markers
40 |         self.argMarker = kernel['marker']
41 |         self.modMarker = kernel['reprMarkers']
42 |         self.seqMarker = kernel['seqMarker']
43 | 
44 |         self.layer = kernel['layer']
45 |         self.trace = kernel['trace']
46 | 
47 |         self.seqId = kernel['seqId']
48 |         self.altSeqId = kernel['altSeqId']
49 | 
50 |         self.dir = kernel['dir']
51 |         self.sub = kernel['subSeqId']
52 | 
53 |         self.mod = "na"
54 |         self.op = "na"
55 |         self.params = {"na": "na"}
56 |         self.tc = "na"
57 |         self.flops = 0
58 |         self.bytes = 0
59 | 
60 |     def setParams(self, params):
61 |         # TODO: Remove the else block after refactoring.
62 |         if type(params) == str:
63 |           self.params = params
64 |         else:
65 |           #Remove space from params
66 |           qaz = ""
67 |           for key, value in params.items():
68 |               if "type" not in key:
69 |                   qaz += "{}={},".format(key, value)
70 |               else:
71 |                   if type(value) is str:
72 |                       qaz += "{},".format(Utility.typeToString(value))
73 |                   else:
74 |                       qaz += "{}".format(value)
75 |           
76 |           self.params = qaz.replace(" ", "")
77 | 


--------------------------------------------------------------------------------
/pyprof/prof/dropout.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding: utf-8 -*-
 3 | 
 4 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
 5 | #
 6 | # Licensed under the Apache License, Version 2.0 (the "License");
 7 | # you may not use this file except in compliance with the License.
 8 | # You may obtain a copy of the License at
 9 | #
10 | #     http://www.apache.org/licenses/LICENSE-2.0
11 | #
12 | # Unless required by applicable law or agreed to in writing, software
13 | # distributed under the License is distributed on an "AS IS" BASIS,
14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | # See the License for the specific language governing permissions and
16 | # limitations under the License.
17 | 
18 | from .base import OperatorLayerBase
19 | from .tensor import Tensor
20 | 
21 | class Dropout(OperatorLayerBase):
22 | 
23 |     def __init__(self, d):
24 |         marker = eval(d.argMarker[0])
25 |         mod = marker['mod']
26 |         op = marker['op']
27 |         args = marker['args']
28 | 
29 |         self.marker = marker
30 |         self.mod_ = mod
31 |         self.op_ = op
32 |         self.args = args
33 | 
34 |         assert (mod == "torch.nn.functional")
35 |         assert (op == "dropout")
36 | 
37 |         self.inp = Tensor(args[0]['shape'], args[0]['dtype'])
38 |         self.dir = d.dir
39 | 
40 |         return
41 | 
42 |     def params(self):
43 |         return str(self.inp)
44 | 
45 |     def op(self):
46 |         return self.op_
47 | 
48 |     def mod(self):
49 |         return self.mod_
50 | 
51 |     def tc(self):
52 |         return "-"
53 | 
54 |     def bytes(self):
55 |         #Ignoring the cost of writing and reading the mask
56 |         return self.inp.bytes * 2
57 | 
58 |     def flops(self):
59 |         # Note: This is approximate and depends on the RNG
60 |         return 5 * self.inp.size
61 | 


--------------------------------------------------------------------------------
/pyprof/prof/dtype.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | # Copyright (c) 2020, Aditya Agrawal.
 4 | #
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | #
 9 | #     http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | 
17 | class Dtype(object):
18 | 
19 |     _types = {
20 |         "uint8" : (1, "uint8"),
21 |         "int8" : (1, "int8"),
22 |         "byte" : (1, "byte"),
23 |         "char" : (1, "char"),
24 |         "bool" : (1, "bool"),
25 | 
26 |         "float16" : (2, "fp16"),
27 |         "half" : (2, "fp16"),
28 |         "int16" : (2, "int16"),
29 |         "short" : (2, "int16"),
30 | 
31 |         "float32" : (4, "fp32"),
32 |         "float" : (4, "fp32"),
33 |         "int32" : (4, "int32"),
34 |         "int" : (4, "int32"),
35 | 
36 |         "int64" : (8, "int64"),
37 |         "long" : (8, "int64"),
38 |         "float64" : (8, "fp64"),
39 |         "double" : (8, "fp64"),
40 |     }
41 | 
42 |     @staticmethod
43 |     def types():
44 |         t = Dtype._types.keys()
45 |         return list(t)
46 | 
47 |     def __init__(self, dtype):
48 |         assert dtype in Dtype.types()
49 |         size, name = Dtype._types[dtype]
50 |         self._itemsize = size
51 |         self._name = name
52 | 
53 |     def __str__(self):
54 |         return self._name
55 | 
56 |     @property
57 |     def itemsize(self):
58 |         return self._itemsize
59 |         
60 | def main():
61 |     print(Dtype.types())
62 |     for i in Dtype.types():
63 |         dt = Dtype(i)
64 |         print(i, dt, dt.itemsize)
65 | 
66 | if __name__ == '__main__':
67 |     main()
68 | 


--------------------------------------------------------------------------------
/pyprof/prof/embedding.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding: utf-8 -*-
 3 | 
 4 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
 5 | #
 6 | # Licensed under the Apache License, Version 2.0 (the "License");
 7 | # you may not use this file except in compliance with the License.
 8 | # You may obtain a copy of the License at
 9 | #
10 | #     http://www.apache.org/licenses/LICENSE-2.0
11 | #
12 | # Unless required by applicable law or agreed to in writing, software
13 | # distributed under the License is distributed on an "AS IS" BASIS,
14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | # See the License for the specific language governing permissions and
16 | # limitations under the License.
17 | 
18 | from .base import OperatorLayerBase
19 | from .tensor import Tensor
20 | 
21 | 
22 | class Embedding(OperatorLayerBase):
23 | 
24 |     def __init__(self, d):
25 |         marker = eval(d.argMarker[0])
26 |         mod = marker['mod']
27 |         op = marker['op']
28 |         args = marker['args']
29 | 
30 |         self.mod_ = mod
31 |         self.op_ = op
32 | 
33 |         assert (mod == "torch.nn.functional")
34 |         assert (op == "embedding")
35 | 
36 |         input = args[0]
37 |         embedding = args[1]
38 | 
39 |         self.input = Tensor(input['shape'], input['dtype'])
40 |         self.embedding = Tensor(embedding['shape'], embedding['dtype'])
41 | 
42 |         assert (len(self.embedding.shape) == 2)
43 | 
44 |         self.dir = d.dir
45 |         self.sub = d.sub
46 |         return
47 | 
48 |     def params(self):
49 |         return str(self.input) + ";" + str(self.embedding)
50 | 
51 |     def op(self):
52 |         return self.op_
53 | 
54 |     def mod(self):
55 |         return self.mod_
56 | 
57 |     def tc(self):
58 |         return "-"
59 | 
60 |     def bytes(self):
61 |         b = 0
62 |         if self.dir == "fprop":
63 |             # read indices
64 |             b += self.input.bytes
65 |             # read and write the embedding values
66 |             b += 2 * self.input.size * self.embedding.shape[1] * self.embedding.itemsize
67 |         else:
68 |             # 3 times the size of the incoming gradient
69 |             b = 3 * self.input.size * self.embedding.shape[1] * self.embedding.itemsize
70 | 
71 |             if self.sub > 0:
72 |                 b = 0
73 | 
74 |         return b
75 | 
76 |     def flops(self):
77 |         # Note: not implemented yet
78 |         return 0
79 | 


--------------------------------------------------------------------------------
/pyprof/prof/linear.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | 
  4 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
  5 | #
  6 | # Licensed under the Apache License, Version 2.0 (the "License");
  7 | # you may not use this file except in compliance with the License.
  8 | # You may obtain a copy of the License at
  9 | #
 10 | #     http://www.apache.org/licenses/LICENSE-2.0
 11 | #
 12 | # Unless required by applicable law or agreed to in writing, software
 13 | # distributed under the License is distributed on an "AS IS" BASIS,
 14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 15 | # See the License for the specific language governing permissions and
 16 | # limitations under the License.
 17 | 
 18 | from collections import OrderedDict
 19 | from .tc import TC_Whitelist
 20 | from .utility import Utility
 21 | from .base import OperatorLayerBase
 22 | 
 23 | 
 24 | class Linear(OperatorLayerBase):
 25 |     '''
 26 | 	Notes:
 27 | 	If the bias occurs before the GEMM, then its 1 write (bias expansion).
 28 | 	If the bias occurs after, then its 1 read and 1 write.
 29 | 	bias in bprop is a reduction and hence is 1 read.
 30 | 	'''
 31 | 
 32 |     gemmKernels = [
 33 |         "gemm", "gemv", "dot_kernel", "splitKreduce_kernel",
 34 |         "reduce_1Block_kernel", "cutlass"
 35 |     ]
 36 | 
 37 |     biasKernels = [
 38 |         "kernelReduceContigDim", "kernelReduceNoncontigDim_shared",
 39 |         "elementwise_kernel", "reduce_kernel", "kernelPointwiseApply2",
 40 |         "2d_grouped_direct_kernel"
 41 |     ]
 42 | 
 43 |     def setXWBMNK(self, args):
 44 |         x = None
 45 |         w = None
 46 |         b = None
 47 |         if (len(args) == 2):
 48 |             x, w = args
 49 |         elif (len(args) == 3):
 50 |             x, w, b = args
 51 |             assert (x['type'] == w['type'] == "tensor")
 52 |             if (b['type'] == "tensor"):
 53 |                 assert (len(b['shape']) == 1)
 54 |             elif (b['type'] == "NoneType"):
 55 |                 assert b['value'] is None
 56 |                 b = None
 57 |             else:
 58 |                 assert False
 59 |         else:
 60 |             assert False
 61 | 
 62 |         assert (len(w['shape']) == 2)
 63 |         k1 = x['shape'][-1]
 64 |         n, k2 = w['shape']
 65 |         assert (k1 == k2)
 66 |         if b is not None:
 67 |             assert (b['shape'][0] == n)
 68 |         t1 = x['dtype']
 69 |         t2 = w['dtype']
 70 |         assert (t1 == t2)
 71 | 
 72 |         # X, W, B
 73 |         self.x = x['shape']
 74 |         self.w = w['shape']
 75 |         self.b = b['shape'] if b is not None else None
 76 |         self.type = t1
 77 | 
 78 |         # M, N, K
 79 |         #n = Utility.numElems(x[0:-1])
 80 |         n = self.x[0:-1]
 81 |         k = self.x[-1]
 82 |         m, k1 = self.w
 83 |         assert (k == k1)
 84 | 
 85 |         self.m = m
 86 |         self.n = n
 87 |         self.k = k
 88 | 
 89 |     def tc(self):
 90 |         if self.op() == "linear":
 91 |             if self.name in TC_Whitelist():
 92 |                 return 1
 93 |             return 0
 94 |         else:
 95 |             return "-"
 96 | 
 97 |     def __init__(self, d):
 98 |         self.name = d.name
 99 |         self.dir = d.dir
100 |         self.sub = d.sub
101 | 
102 |         marker = eval(d.argMarker[0])
103 |         mod = marker['mod']
104 |         op = marker['op']
105 |         args = marker['args']
106 | 
107 |         assert (mod == "torch.nn.functional")
108 |         assert (op == "linear")
109 | 
110 |         self.setXWBMNK(args)
111 | 
112 |         if any(x in d.name for x in Linear.gemmKernels):
113 |             self.op_ = "linear"
114 |         else:
115 |             assert any(x in d.name for x in Linear.biasKernels), f"Kernel name: {d.name}"
116 |             self.op_ = "bias"
117 |         '''
118 | 		elif (("kernelPointwiseApply2" in d.name) or ("kernelReduceContigDim" in d.name) or ("kernelReduceNoncontigDim_shared" in d.name)):
119 | 			#bias expansion was before the gemm
120 | 			self.op_ = "bias"
121 | 
122 | 		elif ("elementwise_kernel" in d.name):
123 | 			#Bias addition happens later with a broadcast tensor
124 | 			self.op_ = "bias"
125 | 			assert (len(d.argMarker) == 2)
126 | 			marker = eval(d.argMarker[1])
127 | 			mod = marker['mod']
128 | 			op = marker['op']
129 | 			args = marker['args']
130 | 
131 | 			assert (mod == "Tensor")
132 | 			assert (op == "__iadd__")
133 | 			assert (len(args) == 2)
134 | 			mn = args[0]['shape']
135 | 			b = args[1]['shape']
136 | 			assert (len(b) == 1)
137 | 
138 | 			assert (mn == (self.n + (self.m,)))
139 | 			assert (b == self.b)
140 | 
141 | 		else:
142 | 			assert False
143 | 		'''
144 | 
145 |     def params(self):
146 |         #p = OrderedDict([('X', self.x), ('W', self.w), ('B', self.b), ('type', self.type)])
147 | 
148 |         m, n, k, x, w, t = self.m, self.n, self.k, self.x, self.w, self.type
149 |         if len(n) == 1:
150 |             n = n[0]
151 | 
152 |         if self.op_ == "linear":
153 |             if self.dir == "fprop":
154 |                 p = OrderedDict([('M', m), ('N', n), ('K', k), ('type', t)])
155 |             elif self.dir == "bprop":
156 |                 if self.sub == 0:  #dgrad (most likely)
157 |                     p = OrderedDict([('M', k), ('N', n), ('K', m), ('type', t)])
158 |                 elif self.sub == 1:  #wgrad (most likely)
159 |                     p = OrderedDict([('M', k), ('N', m), ('K', n), ('type', t)])
160 |                 else:
161 |                     #This happens when there are additional kernels for reduction
162 |                     p = OrderedDict([('X', x), ('W', w), ('type', t)])
163 |             else:
164 |                 assert False
165 | 
166 |         elif self.op_ == "bias":
167 |             p = OrderedDict([('M', m), ('N', n), ('type', t)])
168 |         else:
169 |             assert False
170 |         return p
171 | 
172 |     def op(self):
173 |         return self.op_
174 | 
175 |     def bytesFlops(self):
176 | 
177 |         m = self.m
178 |         n = Utility.numElems(self.n)
179 |         k = self.k
180 | 
181 |         if self.op_ == "linear":
182 |             if self.dir == "fprop":
183 |                 f = m * n * k * 2
184 |                 b = m * n + m * k + n * k * Utility.typeToBytes(self.type)
185 |             elif self.dir == "bprop":
186 |                 if self.sub == 0:  #dgrad (most likely)
187 |                     f = m * n * k * 2
188 |                     b = m * n + m * k + n * k * Utility.typeToBytes(self.type)
189 |                 elif self.sub == 1:  #wgrad (most likely)
190 |                     f = m * n * k * 2
191 |                     b = m * n + m * k + n * k * Utility.typeToBytes(self.type)
192 |                 else:
193 |                     #This happens when there are additional kernels for reduction
194 |                     f = 0
195 |                     b = 0
196 |             else:
197 |                 assert False
198 | 
199 |         elif self.op_ == "bias":
200 |             f = m * n
201 |             b = 2 * m * n * Utility.typeToBytes(self.type)
202 |         else:
203 |             assert False
204 |         return b, f
205 | 
206 |     # TODO: Fix bytes and flops with CUTLASS kernels.
207 |     def bytes(self):
208 |         b, f = self.bytesFlops()
209 |         return b
210 | 
211 |     def flops(self):
212 |         b, f = self.bytesFlops()
213 |         return f
214 | 
215 |     def mod(self):
216 |         return self.mod_
217 | 


--------------------------------------------------------------------------------
/pyprof/prof/loss.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | 
  4 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
  5 | #
  6 | # Licensed under the Apache License, Version 2.0 (the "License");
  7 | # you may not use this file except in compliance with the License.
  8 | # You may obtain a copy of the License at
  9 | #
 10 | #     http://www.apache.org/licenses/LICENSE-2.0
 11 | #
 12 | # Unless required by applicable law or agreed to in writing, software
 13 | # distributed under the License is distributed on an "AS IS" BASIS,
 14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 15 | # See the License for the specific language governing permissions and
 16 | # limitations under the License.
 17 | 
 18 | from collections import OrderedDict
 19 | from .utility import Utility
 20 | from .base import OperatorLayerBase
 21 | 
 22 | #TODO: Add support for additional loss functions.
 23 | 
 24 | 
 25 | class MSELoss(OperatorLayerBase):
 26 | 
 27 |     def __init__(self, d):
 28 |         marker = eval(d.argMarker[0])
 29 |         mod = marker['mod']
 30 |         op = marker['op']
 31 |         args = marker['args']
 32 | 
 33 |         self.marker = marker
 34 |         self.mod_ = mod
 35 |         self.op_ = op
 36 |         self.args = args
 37 | 
 38 |         assert (mod == "torch.nn.functional")
 39 |         assert (op == "mse_loss")
 40 |         assert (len(args) == 3)
 41 | 
 42 |         #Get input, target and reduction
 43 |         if (args[0]['name'] == ""):
 44 |             x = args[0]
 45 |         else:
 46 |             x = list(filter(lambda x: x['name'] == "input", args))[0]
 47 | 
 48 |         if (args[1]['name'] == ""):
 49 |             y = args[1]
 50 |         else:
 51 |             y = list(filter(lambda x: x['name'] == "target", args))[0]
 52 | 
 53 |         if (args[2]['name'] == ""):
 54 |             r = args[2]
 55 |         else:
 56 |             r = list(filter(lambda x: x['name'] == "reduction", args))[0]
 57 | 
 58 |         assert (x['type'] == y['type'] == "tensor")
 59 |         assert (x['shape'] == y['shape'])
 60 |         assert (x['dtype'] == y['dtype'])
 61 |         assert (r['type'] == "str")
 62 |         assert (r['value'] in ["none", "mean", "sum"])
 63 | 
 64 |         self.shape = x['shape']
 65 |         self.type = x['dtype']
 66 |         self.red = r['value']
 67 |         self.dir = d.dir
 68 | 
 69 |     def params(self):
 70 |         p = OrderedDict([('T', self.shape), ('type', self.type), ('red', self.red)])
 71 |         return p
 72 | 
 73 |     def elems(self):
 74 |         red = self.red
 75 |         e = Utility.numElems(self.shape)
 76 | 
 77 |         if self.dir == "fprop":
 78 |             if red == "none":
 79 |                 e *= 3
 80 |             else:
 81 |                 e *= 2
 82 |         else:
 83 |             if red == "none":
 84 |                 e *= 4
 85 |             else:
 86 |                 e *= 3
 87 |         return e
 88 | 
 89 |     def bytes(self):
 90 |         return self.elems() * Utility.typeToBytes(self.type)
 91 | 
 92 |     def flops(self):
 93 |         return self.elems() * 2 + 1
 94 | 
 95 |     def tc(self):
 96 |         return "-"
 97 | 
 98 |     def op(self):
 99 |         return self.op_
100 | 
101 |     def mod(self):
102 |         return self.mod_
103 | 


--------------------------------------------------------------------------------
/pyprof/prof/memory.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | 
  3 | # Copyright (c) 2020, Aditya Agrawal.
  4 | #
  5 | # Licensed under the Apache License, Version 2.0 (the "License");
  6 | # you may not use this file except in compliance with the License.
  7 | # You may obtain a copy of the License at
  8 | #
  9 | #     http://www.apache.org/licenses/LICENSE-2.0
 10 | #
 11 | # Unless required by applicable law or agreed to in writing, software
 12 | # distributed under the License is distributed on an "AS IS" BASIS,
 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 14 | # See the License for the specific language governing permissions and
 15 | # limitations under the License.
 16 | 
 17 | from .base import OperatorLayerBase
 18 | from .tensor import Tensor
 19 | 
 20 | def readMarker(d):
 21 |     marker = eval(d.argMarker[0])
 22 |     return marker['mod'], marker['op'], marker['args']
 23 | 
 24 | class OneZero(OperatorLayerBase):
 25 |     """
 26 |     Support for torch.ones, torch.zeros etc.
 27 |     Fill a tensor with ones or zeros.
 28 |     """
 29 | 
 30 |     ops = ["ones", "ones_like", "zero_", "zeros", "zeros_like"]
 31 | 
 32 |     def __init__(self, d):
 33 |         mod, op, args = readMarker(d)
 34 |         assert mod in ["torch", "Tensor"]
 35 |         assert op in OneZero.ops
 36 | 
 37 |         self.mod_ = mod
 38 |         self.op_ = op
 39 | 
 40 |         # For ones_like, zero_, zeros_like, the input is a tensor.
 41 |         if op in ["ones_like", "zero_", "zeros_like"]:
 42 |             assert(len(args) == 1)
 43 |             arg = args[0]
 44 |             self.input = Tensor(arg['shape'], arg['dtype'])
 45 | 
 46 |         # For ones and zeros, the input can be a list, tuple, sequence of integers.
 47 |         # E.g. torch.ones((3,5,6)) or torch.ones([3,5,6]) or torch.ones(3,5,6)
 48 |         else:
 49 |             assert op in ["ones", "zeros"]
 50 |             # TODO: Assume the output dtype is float
 51 |             if args[0]['type'] in ['list', 'tuple']:
 52 |                 assert(len(args) == 1)
 53 |                 self.input = Tensor(args[0]['value'], "float")
 54 |             elif args[0]['type'] == "int":
 55 |                 # Get all unnamed arguments of type int
 56 |                 args = list(filter(lambda x: x['name'] == "" and x['type'] == "int", args))
 57 |                 shape = [x['value'] for x in args]
 58 |                 self.input = Tensor(shape, "float")
 59 |             else:
 60 |                 assert False
 61 | 
 62 |     def params(self):
 63 |         return str(self.input)
 64 | 
 65 |     def tc(self):
 66 |         return "-"
 67 | 
 68 |     def op(self):
 69 |         return self.op_
 70 | 
 71 |     def mod(self):
 72 |         return self.mod_
 73 | 
 74 |     def bytes(self):
 75 |         return self.input.bytes
 76 | 
 77 |     def flops(self):
 78 |         return 0
 79 | 
 80 | class Fill(OperatorLayerBase):
 81 |     """
 82 |     Support for torch.fill_.
 83 |     Fill a tensor with a specific value.
 84 |     """
 85 | 
 86 |     def __init__(self, d):
 87 |         mod, op, args = readMarker(d)
 88 |         assert mod == "Tensor"
 89 |         assert op == "fill_"
 90 | 
 91 |         self.mod_ = mod
 92 |         self.op_ = op
 93 | 
 94 |         assert(len(args) == 2)
 95 |         arg = args[0]
 96 |         self.input = Tensor(arg['shape'], arg['dtype'])
 97 | 
 98 |     def params(self):
 99 |         return str(self.input)
100 | 
101 |     def tc(self):
102 |         return "-"
103 | 
104 |     def op(self):
105 |         return self.op_
106 | 
107 |     def mod(self):
108 |         return self.mod_
109 | 
110 |     def bytes(self):
111 |         return self.input.bytes
112 | 
113 |     def flops(self):
114 |         return 0
115 | 
116 | class Full(OperatorLayerBase):
117 |     """
118 |     Support for torch.full.
119 |     Create a tensor of specified size and filled with a specified value.
120 |     """
121 | 
122 |     def __init__(self, d):
123 |         mod, op, args = readMarker(d)
124 |         assert mod == "torch"
125 |         assert op == "full"
126 | 
127 |         self.mod_ = mod
128 |         self.op_ = op
129 | 
130 |         assert(len(args) == 2)
131 |         arg1, arg2 = args
132 |         assert arg1['type'] in ['list', 'tuple']
133 |         # TODO: Add more types for arg2
134 |         assert arg2['type'] in ['float', 'int']
135 |         self.output = Tensor(arg1['value'], arg2['type'])
136 | 
137 |     def params(self):
138 |         return str(self.output)
139 | 
140 |     def tc(self):
141 |         return "-"
142 | 
143 |     def op(self):
144 |         return self.op_
145 | 
146 |     def mod(self):
147 |         return self.mod_
148 | 
149 |     def bytes(self):
150 |         return self.output.bytes
151 | 
152 |     def flops(self):
153 |         return 0
154 | 


--------------------------------------------------------------------------------
/pyprof/prof/misc.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | 
  4 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
  5 | #
  6 | # Licensed under the Apache License, Version 2.0 (the "License");
  7 | # you may not use this file except in compliance with the License.
  8 | # You may obtain a copy of the License at
  9 | #
 10 | #     http://www.apache.org/licenses/LICENSE-2.0
 11 | #
 12 | # Unless required by applicable law or agreed to in writing, software
 13 | # distributed under the License is distributed on an "AS IS" BASIS,
 14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 15 | # See the License for the specific language governing permissions and
 16 | # limitations under the License.
 17 | 
 18 | from collections import OrderedDict
 19 | from .utility import Utility
 20 | from .base import OperatorLayerBase
 21 | 
 22 | 
 23 | class Foo(OperatorLayerBase):
 24 |     """
 25 | 	An object of Foo is instantiated when we detect an unsupported operator.
 26 | 	"""
 27 | 
 28 |     def __init__(self, d):
 29 |         marker = eval(d.argMarker[0])
 30 |         mod = marker['mod']
 31 |         op = marker['op']
 32 |         args = marker['args']
 33 | 
 34 |         self.marker = marker
 35 |         self.mod_ = mod
 36 |         self.op_ = op
 37 |         self.args = args
 38 | 
 39 |         shapes = []
 40 |         types = []
 41 | 
 42 |         for arg in args:
 43 |             if arg['type'] == "tensor":
 44 |                 shapes.append(arg['shape'])
 45 |                 types.append(arg['dtype'])
 46 | 
 47 |         self.shape = shapes
 48 |         self.type = types
 49 | 
 50 |     def params(self):
 51 |         p = OrderedDict([('T', self.shape), ('type', self.type)])
 52 |         return p
 53 | 
 54 |     def tc(self):
 55 |         return "-"
 56 | 
 57 |     def op(self):
 58 |         return self.op_
 59 | 
 60 |     def mod(self):
 61 |         return self.mod_
 62 | 
 63 |     def flops(self):
 64 |         return 0
 65 | 
 66 |     def bytes(self):
 67 |         return 0
 68 | 
 69 | 
 70 | class Copy(OperatorLayerBase):
 71 | 
 72 |     def __init__(self, d):
 73 |         marker = eval(d.argMarker[0])
 74 |         mod = marker['mod']
 75 |         op = marker['op']
 76 |         args = marker['args']
 77 | 
 78 |         self.marker = marker
 79 |         self.mod_ = mod
 80 |         self.op_ = op
 81 |         self.args = args
 82 | 
 83 |         assert (mod == "Tensor")
 84 |         assert (op == "copy_")
 85 |         assert (len(args) == 2)
 86 | 
 87 |         dst, src = args
 88 |         assert (src['type'] == dst['type'])
 89 |         assert (src['shape'] == dst['shape'])
 90 | 
 91 |         self.shape = src['shape']
 92 |         self.stype = src['dtype']
 93 |         self.dtype = dst['dtype']
 94 | 
 95 |     def params(self):
 96 |         #The data type might be different
 97 |         p = OrderedDict([('T', self.shape), ('stype', self.stype), ('dtype', self.dtype)])
 98 |         return p
 99 | 
100 |     def tc(self):
101 |         return "-"
102 | 
103 |     def op(self):
104 |         return self.op_
105 | 
106 |     def mod(self):
107 |         return self.mod_
108 | 
109 |     def flops(self):
110 |         return 0
111 | 
112 |     def elems(self):
113 |         return Utility.numElems(self.shape)
114 | 
115 |     def bytes(self):
116 |         return self.elems() * (Utility.typeToBytes(self.stype) + Utility.typeToBytes(self.dtype))
117 | 
118 | 
119 | class Clone(OperatorLayerBase):
120 | 
121 |     def __init__(self, d):
122 |         marker = eval(d.argMarker[0])
123 |         mod = marker['mod']
124 |         op = marker['op']
125 |         args = marker['args']
126 | 
127 |         self.marker = marker
128 |         self.mod_ = mod
129 |         self.op_ = op
130 |         self.args = args
131 | 
132 |         assert (mod == "Tensor")
133 |         assert (op == "clone")
134 |         assert (len(args) == 1)
135 |         t = args[0]
136 |         self.shape = t['shape']
137 |         self.type = t['dtype']
138 | 
139 |     def params(self):
140 |         p = OrderedDict([('T', self.shape), ('type', self.type)])
141 |         return p
142 | 
143 |     def flops(self):
144 |         return 0
145 | 
146 |     def tc(self):
147 |         return "-"
148 | 
149 |     def op(self):
150 |         return self.op_
151 | 
152 |     def mod(self):
153 |         return self.mod_
154 | 
155 |     def elems(self):
156 |         return Utility.numElems(self.shape)
157 | 
158 |     def bytes(self):
159 |         return 2 * self.elems() * Utility.typeToBytes(self.type)
160 | 
161 | 
162 | class Contiguous(OperatorLayerBase):
163 | 
164 |     def __init__(self, d):
165 |         marker = eval(d.argMarker[0])
166 |         mod = marker['mod']
167 |         op = marker['op']
168 |         args = marker['args']
169 | 
170 |         self.marker = marker
171 |         self.mod_ = mod
172 |         self.op_ = op
173 |         self.args = args
174 | 
175 |         assert (mod == "Tensor")
176 |         assert (op == "contiguous")
177 |         assert (len(args) == 1)
178 |         t = args[0]
179 |         self.shape = t['shape']
180 |         self.type = t['dtype']
181 | 
182 |     def params(self):
183 |         p = OrderedDict([('T', self.shape), ('type', self.type)])
184 |         return p
185 | 
186 |     def flops(self):
187 |         return 0
188 | 
189 |     def bytes(self):
190 |         return 2 * Utility.numElems(self.shape) * Utility.typeToBytes(self.type)
191 | 
192 |     def tc(self):
193 |         return "-"
194 | 
195 |     def op(self):
196 |         return self.op_
197 | 
198 |     def mod(self):
199 |         return self.mod_
200 | 
201 | 
202 | class Any(OperatorLayerBase):
203 | 
204 |     def __init__(self, d):
205 |         marker = eval(d.argMarker[0])
206 |         mod = marker['mod']
207 |         op = marker['op']
208 |         args = marker['args']
209 | 
210 |         self.marker = marker
211 |         self.mod_ = mod
212 |         self.op_ = op
213 |         self.args = args
214 | 
215 |         assert (mod == "Tensor")
216 |         assert (op == "any")
217 |         assert (len(args) in [1,2])
218 |         t = args[0]
219 |         # The input can be a tensor or scalar
220 |         assert (t['type'] in ["tensor", "bool"])
221 | 
222 |         if t['type'] == "tensor":
223 |           self.shape = t['shape']
224 |           self.type = t['dtype']
225 |         else:
226 |           self.shape = (1,)
227 |           self.type = t['type']
228 |           
229 |         self.sub = d.sub
230 |         return
231 | 
232 |     def params(self):
233 |         p = OrderedDict([('T', self.shape), ('type', self.type)])
234 |         return p
235 | 
236 |     def op(self):
237 |         return self.op_
238 | 
239 |     def mod(self):
240 |         return self.mod_
241 | 
242 |     def tc(self):
243 |         return "-"
244 | 
245 |     def flops(self):
246 |         return 0
247 | 
248 |     def bytes(self):
249 |         return Utility.numElems(self.shape) * Utility.typeToBytes(self.type)
250 | 


--------------------------------------------------------------------------------
/pyprof/prof/normalization.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding: utf-8 -*-
 3 | 
 4 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
 5 | #
 6 | # Licensed under the Apache License, Version 2.0 (the "License");
 7 | # you may not use this file except in compliance with the License.
 8 | # You may obtain a copy of the License at
 9 | #
10 | #     http://www.apache.org/licenses/LICENSE-2.0
11 | #
12 | # Unless required by applicable law or agreed to in writing, software
13 | # distributed under the License is distributed on an "AS IS" BASIS,
14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | # See the License for the specific language governing permissions and
16 | # limitations under the License.
17 | 
18 | from .base import OperatorLayerBase
19 | from .tensor import Tensor
20 | 
21 | 
22 | class BatchNorm(OperatorLayerBase):
23 | 
24 |     def __init__(self, d):
25 |         marker = eval(d.argMarker[0])
26 |         mod = marker['mod']
27 |         op = marker['op']
28 |         args = marker['args']
29 | 
30 |         self.mod_ = mod
31 |         self.op_ = op
32 | 
33 |         assert (op == "batch_norm")
34 |         assert (len(args) >= 1)
35 |         i = args[0]
36 |         assert (i['type'] == "tensor")
37 | 
38 |         self.input = Tensor(i['shape'], i['dtype'])
39 |         self.dir = d.dir
40 |         self.sub = d.sub
41 | 
42 |     def params(self):
43 |         return str(self.input)
44 | 
45 |     def tc(self):
46 |         return "-"
47 | 
48 |     def op(self):
49 |         return self.op_
50 | 
51 |     def mod(self):
52 |         return self.mod_
53 | 
54 |     def flops(self):
55 |         # Variance algo-dependent, but this is a reasonable value.
56 |         return self.input.size * 8
57 | 
58 |     def bytes(self):
59 |         b = self.input.bytes
60 |         # fprop is 2 reads, 2 writes
61 |         # bprop is 4 reads, 1 write
62 |         if self.dir == "fprop":
63 |             b *= 4
64 |         else:
65 |             b *= 5
66 | 
67 |         if self.sub > 0:
68 |             return 0
69 |         else:
70 |             return b
71 | 


--------------------------------------------------------------------------------
/pyprof/prof/optim.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding: utf-8 -*-
 3 | 
 4 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
 5 | #
 6 | # Licensed under the Apache License, Version 2.0 (the "License");
 7 | # you may not use this file except in compliance with the License.
 8 | # You may obtain a copy of the License at
 9 | #
10 | #     http://www.apache.org/licenses/LICENSE-2.0
11 | #
12 | # Unless required by applicable law or agreed to in writing, software
13 | # distributed under the License is distributed on an "AS IS" BASIS,
14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | # See the License for the specific language governing permissions and
16 | # limitations under the License.
17 | 
18 | from collections import OrderedDict
19 | from .utility import Utility
20 | from .base import OperatorLayerBase
21 | 
22 | #TODO: Add support for other optimizers.
23 | 
24 | 
25 | class Adam(OperatorLayerBase):
26 | 
27 |     def __init__(self, d):
28 |         marker = eval(d.argMarker[0])
29 |         mod = marker['mod']
30 |         op = marker['op']
31 |         args = marker['args']
32 | 
33 |         self.marker = marker
34 |         self.mod_ = mod
35 |         self.op_ = op
36 |         self.args = args
37 |         self.sub = d.sub
38 | 
39 |         assert (op == "adam")
40 |         assert (len(args) == 12) or (len(args) == 14)
41 |         w, hw, m, v, g = args[0:5]
42 |         assert (w['shape'] == m['shape'] == v['shape'] == g['shape'])
43 |         assert (hw['shape'] == w['shape']) or (hw['shape'] == (0, ))  #hw could be null
44 |         assert (w['type'] == m['type'] == v['type'] == g['type'] == hw['type'] == "tensor")
45 |         assert (w['dtype'] == m['dtype'] == v['dtype'] == "float32")
46 | 
47 |         self.w = w
48 |         self.g = g
49 | 
50 |     def params(self):
51 |         p = OrderedDict([('T', self.w['shape']), ('wtype', self.w['dtype']), ('gtype', self.g['dtype'])])
52 |         return p
53 | 
54 |     def flops(self):
55 |         return 0
56 | 
57 |     def bytes(self):
58 |         wshape = self.w['shape']
59 |         wtype = self.w['dtype']
60 |         gtype = self.g['dtype']
61 |         b = 0
62 | 
63 |         elems = Utility.numElems(wshape)
64 | 
65 |         #Get time to stream read/write w, m, v
66 |         b += 6 * elems * Utility.typeToBytes(wtype)
67 | 
68 |         #Get time to read "g"
69 |         b += elems * Utility.typeToBytes(gtype)
70 | 
71 |         if wtype != gtype:  #mixed precision
72 |             #Get time to write "hw
73 |             b += elems * Utility.typeToBytes(gtype)
74 | 
75 |         return b if (self.sub == 0) else 0
76 | 
77 |     def tc(self):
78 |         return "-"
79 | 
80 |     def op(self):
81 |         return self.op_
82 | 
83 |     def mod(self):
84 |         return self.mod_
85 | 


--------------------------------------------------------------------------------
/pyprof/prof/output.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | 
  4 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
  5 | #
  6 | # Licensed under the Apache License, Version 2.0 (the "License");
  7 | # you may not use this file except in compliance with the License.
  8 | # You may obtain a copy of the License at
  9 | #
 10 | #     http://www.apache.org/licenses/LICENSE-2.0
 11 | #
 12 | # Unless required by applicable law or agreed to in writing, software
 13 | # distributed under the License is distributed on an "AS IS" BASIS,
 14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 15 | # See the License for the specific language governing permissions and
 16 | # limitations under the License.
 17 | 
 18 | import errno, os, sys
 19 | 
 20 | 
 21 | class Output():
 22 |     """
 23 | 	This class handles printing of a columed output and a CSV.
 24 | 	"""
 25 | 
 26 |     # The table below is organized as
 27 |     # user_option: [output_header, attribute_in_Data_class, type, min_width_in_columed_output]
 28 |     table = {
 29 |         "idx": ["Idx", "index", int, 7],
 30 |         "seq": ["SeqId", "seqId", str, 7],
 31 |         "altseq": ["AltSeqId", "altSeqId", str, 7],
 32 |         "tid": ["TId", "tid", int, 12],
 33 |         "layer": ["Layer", "layer", str, 10],
 34 |         "trace": ["Trace", "trace", str, 25],
 35 |         "dir": ["Direction", "dir", str, 5],
 36 |         "sub": ["Sub", "sub", int, 3],
 37 |         "mod": ["Module", "mod", str, 15],
 38 |         "op": ["Op", "op", str, 15],
 39 |         "kernel": ["Kernel", "name", str, 0],
 40 |         "params": ["Params", "params", str, 0],
 41 |         "sil": ["Sil(ns)", "sil", int, 10],
 42 |         "tc": ["TC", "tc", str, 2],
 43 |         "device": ["Device", "device", int, 3],
 44 |         "stream": ["Stream", "stream", int, 3],
 45 |         "grid": ["Grid", "grid", str, 12],
 46 |         "block": ["Block", "block", str, 12],
 47 |         "flops": ["FLOPs", "flops", int, 12],
 48 |         "bytes": ["Bytes", "bytes", int, 12]
 49 |     }
 50 | 
 51 |     def __init__(self, args):
 52 |         self.cols = args.c
 53 |         self.csv = args.csv
 54 |         self.col = True if (args.w > 0) else False
 55 |         self.width = args.w
 56 | 
 57 |         w = 0
 58 |         for col in self.cols:
 59 |             assert col in Output.table.keys()
 60 |             w += Output.table[col][3]
 61 | 
 62 |         if ((self.col) and (w > self.width)):
 63 |             print("Minimum width required to print {} = {}. Exiting.".format(",".join(self.cols), w))
 64 |             sys.exit(1)
 65 | 
 66 |         remainder = self.width - w
 67 | 
 68 |         if ("kernel" in self.cols) and ("params" in self.cols):
 69 |             Output.table["kernel"][3] = int(remainder / 2)
 70 |             Output.table["params"][3] = int(remainder / 2)
 71 |         elif ("kernel" in self.cols):
 72 |             Output.table["kernel"][3] = remainder
 73 |         elif ("params" in self.cols):
 74 |             Output.table["params"][3] = remainder
 75 | 
 76 |         #header format
 77 |         cadena = ""
 78 |         for col in self.cols:
 79 |             _, _, t, w = Output.table[col]
 80 |             cadena += "%-{}.{}s ".format(w, w)
 81 | 
 82 |         self.hFormat = cadena
 83 | 
 84 |         #data format
 85 |         cadena = ""
 86 |         for col in self.cols:
 87 |             _, _, t, w = Output.table[col]
 88 |             if (t == str):
 89 |                 cadena += "%-{}.{}s ".format(w, w)
 90 |             elif (t == int):
 91 |                 cadena += "%{}d ".format(w)
 92 | 
 93 |         self.dFormat = cadena
 94 | 
 95 |     def foo(self, cadena, pformat):
 96 |         if self.csv:
 97 |             cadena = ",".join(map(lambda x: '"' + str(x) + '"', cadena))
 98 |         elif self.col:
 99 |             cadena = pformat % cadena
100 |         else:
101 |             cadena = " ".join(map(str, cadena))
102 | 
103 |         try:
104 |             print(cadena)
105 |         except IOError as e:
106 |             #gracefully handle pipes
107 |             if e.errno == errno.EPIPE:
108 |                 # Python flushes standard streams on exit; redirect remaining output
109 |                 # to devnull to avoid another BrokenPipeError at shutdown
110 | 
111 |                 devnull = os.open(os.devnull, os.O_WRONLY)
112 |                 os.dup2(devnull, sys.stdout.fileno())
113 |                 sys.exit(0)
114 |             else:
115 |                 sys.exit(-1)
116 | 
117 |     def header(self):
118 |         cadena = ()
119 |         for col in self.cols:
120 |             h = Output.table[col][0]
121 |             cadena = cadena + (h, )
122 | 
123 |         self.foo(cadena, self.hFormat)
124 | 
125 |     def data(self, a):
126 |         if a.dir == "":
127 |             direc = "na"
128 |         else:
129 |             direc = a.dir
130 | 
131 |         if a.op == "":
132 |             op = "na"
133 |         else:
134 |             op = a.op
135 | 
136 |         if a.mod == "":
137 |             mod = "na"
138 |         else:
139 |             mod = a.mod
140 | 
141 |         cadena = ()
142 |         for col in self.cols:
143 |             attr = Output.table[col][1]
144 |             val = getattr(a, attr)
145 | 
146 |             if col == "layer":
147 |                 assert (type(val) == list)
148 |                 val = ":".join(val)
149 |                 val = "-" if val == "" else val
150 | 
151 |             if col == "trace":
152 |                 assert (type(val) == list)
153 |                 if self.col and len(val):
154 |                     val = val[-1]
155 |                     val = val.split("/")[-1]
156 |                 else:
157 |                     val = ",".join(val)
158 |                     val = "-" if val == "" else val
159 | 
160 |             if col in ["seq", "altseq"]:
161 |                 assert (type(val) == list)
162 |                 val = ",".join(map(str, val))
163 |                 val = "-" if val == "" else val
164 | 
165 |             cadena = cadena + (val, )
166 | 
167 |         self.foo(cadena, self.dFormat)
168 | 


--------------------------------------------------------------------------------
/pyprof/prof/pointwise.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | 
  4 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
  5 | #
  6 | # Licensed under the Apache License, Version 2.0 (the "License");
  7 | # you may not use this file except in compliance with the License.
  8 | # You may obtain a copy of the License at
  9 | #
 10 | #     http://www.apache.org/licenses/LICENSE-2.0
 11 | #
 12 | # Unless required by applicable law or agreed to in writing, software
 13 | # distributed under the License is distributed on an "AS IS" BASIS,
 14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 15 | # See the License for the specific language governing permissions and
 16 | # limitations under the License.
 17 | 
 18 | import numpy as np
 19 | from .base import OperatorLayerBase
 20 | from .tensor import Tensor
 21 | from functools import reduce
 22 | import operator
 23 | 
 24 | class Pointwise(OperatorLayerBase):
 25 | 
 26 |     # TODO: Add more operators.
 27 |     # TODO: Determining the output dtype is tricky.
 28 |     # TODO: Refine calculations based on direction.
 29 |     # TODO: Refine calculations for non-arithmetic ops.
 30 | 
 31 |     # Unary
 32 |     unary = ["abs", "abs_", "neg", "neg_", "reciprocal", "reciprocal_"]
 33 |     unary += ["__abs__", "__neg__"]
 34 | 
 35 |     # Unary bitwise
 36 |     unary += ["__invert__"]
 37 | 
 38 |     # Exponential and log (unary)
 39 |     exp_log = ["exp", "exp_", "exp1m", "exp1m_", "log", "log_",
 40 |                "log10", "log10_", "log1p", "log1p_", "log2", "log2_"]
 41 | 
 42 |     # Sqrt (unary)
 43 |     sqrt = ["rsqrt", "rsqrt_", "sqrt", "sqrt_"]
 44 | 
 45 |     # Representation (unary)
 46 |     representation = ["ceil", "ceil_", "clamp", "clamp_", "floor", "floor_",
 47 |                       "frac", "frac_", "round", "round_", "sign", "sign_",
 48 |                       "trunc", "trunc_"]
 49 | 
 50 |     # Trigonometric and transcendental (unary)
 51 |     trig_trans = ["acos", "acos_", "asin", "asin_", "atan", "atan_",
 52 |                   "atan2", "atan2_", "cos", "cos_", "cosh", "cosh_",
 53 |                   "sin", "sin_", "sinh", "sinh_", "tan", "tan_",
 54 |                   "sigmoid", "sigmoid_", "tanh", "tanh_"]
 55 | 
 56 |     # Error (unary)
 57 |     error = ["erf", "erf_", "erfc", "erfc_", "erfinv", "erfinv_"]
 58 | 
 59 |     # Binary
 60 |     binary = ["add", "add_", "div", "div_", "mul", "mul_",
 61 |               "remainder", "remainder_", "sub", "sub_"]
 62 |     binary += ["__add__", "__sub__", "__mul__", "__floordiv__",
 63 |                "__truediv__", "__mod__"]
 64 |     binary += ["__radd__", "__rsub__", "__rmul__", "__rdiv__",
 65 |                "__rtruediv__", "__rfloordiv__"]
 66 |     binary += ["fmod", "fmod_"]
 67 | 
 68 |     # Binary inplace
 69 |     ibinary = ["__iadd__", "__isub__", "__imul__", "__itruediv__"]
 70 | 
 71 |     # Power (binary)
 72 |     power = ["pow", "pow_", "__pow__", "__rpow__"]
 73 | 
 74 |     # Comparison (binary)
 75 |     comp = ["lt", "lt_", "gt", "gt_", "ge", "ge_", "le", "le_",
 76 |             "eq", "eq_", "ne", "ne_"]
 77 |     comp += ["__lt__", "__gt__", "__ge__", "__le__", "__eq__", "__ne__"]
 78 | 
 79 |     # Logical (binary)
 80 |     logical = ["__and__", "__or__", "__xor__", "__lshift__", "__rshift__"]
 81 | 
 82 |     # Logical inplace (binary)
 83 |     ilogical = ["__iand__", "__ior__", "__ixor__", "__ilshift__", "__irshift__"]
 84 | 
 85 |     # Ternary
 86 |     ternary = ["addcdiv", "addcdiv_", "addcmul", "addcmul_"]
 87 | 
 88 |     # Misc
 89 |     misc = ["digamma", "lerp", "lerp_", "mvlgamma"]
 90 | 
 91 |     ops = unary + binary + ibinary + comp + logical + ilogical + \
 92 |           ternary + exp_log + power + sqrt + representation + trig_trans + \
 93 |           error + misc
 94 | 
 95 |     def __init__(self, d):
 96 |         marker = eval(d.argMarker[0])
 97 |         mod = marker['mod']
 98 |         op = marker['op']
 99 |         args = marker['args']
100 | 
101 |         self.marker = marker
102 |         self.mod_ = mod
103 |         self.op_ = op
104 |         self.args = args
105 | 
106 |         self.dir = d.dir
107 |         assert (d.dir in ["fprop", "bprop"])
108 |         assert (op in Pointwise.ops)
109 | 
110 |         # Filter out all named parameters (kwargs).
111 |         # This might require revisiting in future.
112 |         args = list(filter(lambda x: x['name'] == "", args))
113 | 
114 |         # Filter out non tensors
115 |         #args = list(filter(lambda x: x['type'] == "tensor", args))
116 | 
117 |         assert (len(args) <= 4)
118 |         self.input = []
119 | 
120 |         for arg in args:
121 |             t = arg['type']
122 |             if (t == "tensor"):
123 |                 tensor = Tensor(arg['shape'], arg['dtype'])
124 |             elif t in ['float', 'int']:
125 |                 tensor = Tensor([], t)
126 |             else:
127 |                 assert False
128 | 
129 |             self.input.append(tensor)
130 | 
131 |     def params(self):
132 |         return ";".join([str(t) for t in self.input])
133 | 
134 |     def tc(self):
135 |         return "-"
136 | 
137 |     def op(self):
138 |         return self.op_
139 | 
140 |     def mod(self):
141 |         return self.mod_
142 | 
143 |     def bytes_flops(self):
144 |         b = f = 0
145 | 
146 |         # Unary
147 |         if self.op() in Pointwise.unary + Pointwise.representation:
148 |             # Relaxing assert. clamp has > 1 input arguments.
149 |             assert (len(self.input) >= 1)
150 |             b = 2 * self.input[0].bytes
151 |             f = self.input[0].size
152 | 
153 |         elif self.op() in Pointwise.exp_log + Pointwise.trig_trans + \
154 |                 Pointwise.sqrt + Pointwise.error:
155 |             assert (len(self.input) == 1)
156 |             b = 2 * self.input[0].bytes
157 |             f = self.input[0].size * 20 # estimate
158 | 
159 |         # Binary
160 |         elif self.op() in Pointwise.comp + \
161 |                 Pointwise.binary + Pointwise.ibinary + \
162 |                 Pointwise.logical + Pointwise.ilogical:
163 | 
164 |             assert (len(self.input) == 2)
165 |             out = Tensor.broadcast(self.input)
166 | 
167 |             if self.dir == "fprop":
168 |                 b = reduce(operator.add, [t.bytes for t in self.input])
169 |                 # The output of comparison is bool
170 |                 if self.op() in Pointwise.comp:
171 |                     out = Tensor(out.shape, "bool")
172 |                 b += out.bytes
173 |                 f = out.size
174 |             else:
175 |                 if (self.op() in ["add", "__add__", "sub", "__sub__", "__isub__"]):
176 |                     b = 2 * out.bytes
177 |                     f = 0
178 |                 elif (self.op() in ["__mul__", "__imul__", "__rmul__", "div", "__truediv__"]):
179 |                     b = 3 * out.bytes
180 |                     f = out.size
181 |                 else:
182 |                     e = f'{self.op()} bprop not supported yet. Please file a bug.'
183 |                     assert False, e
184 | 
185 |         elif self.op() in Pointwise.power:
186 |             assert (len(self.input) == 2)
187 |             out = Tensor.broadcast(self.input)
188 |             b = reduce(operator.add, [t.bytes for t in self.input])
189 |             b += out.bytes
190 |             f = out.size * 20 # estimate
191 | 
192 |         # Ternary
193 |         elif self.op() in Pointwise.ternary:
194 |             # Remove scalars
195 |             tensors = list(filter(lambda x: x.shape != [], self.input))
196 |             assert len(tensors) == 3
197 |             out = Tensor.broadcast(tensors)
198 |             b = reduce(operator.add, [t.bytes for t in tensors])
199 |             b += out.bytes
200 |             f = 3 * out.size
201 | 
202 |         else:
203 |             e = f'{self.op()} not supported yet. Please file a bug.'
204 |             assert False, e
205 | 
206 |         return b, f
207 | 
208 |     def bytes(self):
209 |         b, f = self.bytes_flops()
210 |         return b
211 | 
212 |     def flops(self):
213 |         b, f = self.bytes_flops()
214 |         return f
215 | 


--------------------------------------------------------------------------------
/pyprof/prof/pooling.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding: utf-8 -*-
 3 | 
 4 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
 5 | #
 6 | # Licensed under the Apache License, Version 2.0 (the "License");
 7 | # you may not use this file except in compliance with the License.
 8 | # You may obtain a copy of the License at
 9 | #
10 | #     http://www.apache.org/licenses/LICENSE-2.0
11 | #
12 | # Unless required by applicable law or agreed to in writing, software
13 | # distributed under the License is distributed on an "AS IS" BASIS,
14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | # See the License for the specific language governing permissions and
16 | # limitations under the License.
17 | 
18 | from collections import OrderedDict
19 | from .utility import Utility
20 | 
21 | # Work in progress.
22 | 
23 | 
24 | #poolFuncs = ["max_pool2d_with_indices_forward", "max_pool2d_with_indices"]
25 | class MaxPool2d(object):
26 | 
27 |     def parse(marker):
28 | 
29 |         def convert2Tuple(arg):
30 |             assert (arg['type'] in ["int", "tuple"])
31 |             if arg['type'] == "int":
32 |                 return (arg['value'], arg['value'])
33 |             else:
34 |                 return arg['value']
35 | 
36 |         mod = marker['mod']
37 |         op = marker['op']
38 |         args = marker['args']
39 |         assert (mod == "torch.nn.functional")
40 |         assert (op == "max_pool2d")
41 |         assert (len(args) >= 2)
42 | 
43 |         #input
44 |         assert (args[0]['name'] == "")
45 |         inp = args[0]
46 |         assert (inp['type'] == "tensor")
47 |         i = inp['shape']
48 |         t = inp['dtype']
49 |         assert (len(i) == 4)  #nchw tensor
50 | 
51 |         #kernel
52 |         if (args[1]['name'] == ""):
53 |             k = args[1]
54 |         else:
55 |             k = list(filter(lambda x: x['name'] == "kernel_size", args))[0]
56 |         k = convert2Tuple(k)
57 | 
58 |         #stride
59 |         s = k  #default value
60 |         if ((len(args) >= 3) and args[2] == ""):
61 |             s = args[2]
62 |             s = convert2Tuple(s)
63 |         elif any(x['name'] == "stride" for x in args):
64 |             s = list(filter(lambda x: x['name'] == "stride", args))[0]
65 |             s = convert2Tuple(s)
66 | 
67 |         #padding
68 |         p = (0, 0)
69 |         if ((len(args) >= 4) and args[3] == ""):
70 |             p = args[3]
71 |             p = convert2Tuple(p)
72 |         elif any(x['name'] == "padding" for x in args):
73 |             p = list(filter(lambda x: x['name'] == "padding", args))[0]
74 |             p = convert2Tuple(p)
75 | 
76 |         params = OrderedDict([('T', i), ('K', k), ('s', s), ('p', p), ('type', t)])
77 |         return params
78 | 


--------------------------------------------------------------------------------
/pyprof/prof/prof.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | 
  4 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
  5 | #
  6 | # Licensed under the Apache License, Version 2.0 (the "License");
  7 | # you may not use this file except in compliance with the License.
  8 | # You may obtain a copy of the License at
  9 | #
 10 | #     http://www.apache.org/licenses/LICENSE-2.0
 11 | #
 12 | # Unless required by applicable law or agreed to in writing, software
 13 | # distributed under the License is distributed on an "AS IS" BASIS,
 14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 15 | # See the License for the specific language governing permissions and
 16 | # limitations under the License.
 17 | """
 18 | This script reads the output (Python dictionary) created by parse.py.
 19 | For every kernel (line) in the input it determines
 20 | 	module / class name e.g. torch.nn.functional
 21 | 	operator name e.g. linear
 22 | 	kernel parameters e.g. GEMM M, N, K, datatype
 23 | 	bytes
 24 | 	flops
 25 | 	tensor core usage
 26 | 	direction (fprop, bprop)
 27 | 	and other things. Please see the tool usage.
 28 | """
 29 | 
 30 | from .usage import parseArgs
 31 | from .output import Output
 32 | from .utility import Utility
 33 | from .pointwise import Pointwise
 34 | from .convert import Convert
 35 | from .blas import *
 36 | from .embedding import Embedding
 37 | from .reduction import *
 38 | from .dropout import Dropout
 39 | from .softmax import *
 40 | #from pooling import * # work in progress
 41 | from .linear import Linear
 42 | from .optim import Adam
 43 | from .misc import *
 44 | from .conv import Conv
 45 | from .activation import Activation
 46 | from .index_slice_join_mutate import Cat, Reshape, MaskedScatter, Gather, Nonzero, IndexSelect, MaskedSelect
 47 | from .recurrentCell import RNNCell
 48 | from .normalization import BatchNorm
 49 | from .randomSample import RandPerm
 50 | from .loss import MSELoss
 51 | from .data import Data
 52 | from .memory import OneZero, Fill, Full
 53 | 
 54 | 
 55 | def findFpropKernel(seq):
 56 |     #Find the last fprop kernel with the same seqId
 57 |     #First look at seqId and then at altSeqId
 58 |     for idx in reversed(range(len(kernels))):
 59 |         k = kernels[idx]
 60 |         if (seq in k['seqId']) and (k['dir'] == "fprop"):
 61 |             return idx
 62 | 
 63 |     for idx in reversed(range(len(kernels))):
 64 |         k = kernels[idx]
 65 |         if (seq in k['altSeqId']) and (k['dir'] == "fprop"):
 66 |             return idx
 67 | 
 68 |     return -1
 69 |     #print("Error: seqId {} not found.".format(seq), file=sys.stderr)
 70 |     #assert False
 71 | 
 72 | 
 73 | def foo(mod, op, d):
 74 |     if (op[0] == "linear"):
 75 |         xx = Linear(d)
 76 | 
 77 |     # rnncell, lstmcell, grucell
 78 |     elif (mod[0] in ["LSTMCell", "GRUCell"]) and (op[0] == "forward"):
 79 |         xx = RNNCell(d)
 80 | 
 81 |     elif op[0] in [
 82 |             "conv1d",
 83 |             "conv2d",
 84 |     ]:
 85 |         xx = Conv(d)
 86 | 
 87 |     elif (op[0] in Pointwise.ops):
 88 |         xx = Pointwise(d)
 89 | 
 90 |     elif (op[0] in Convert.ops):
 91 |         xx = Convert(d)
 92 | 
 93 |     elif op[0] in ["__matmul__", "matmul"]:
 94 |         xx = Matmul(d)
 95 | 
 96 |     elif op[0] == "embedding":
 97 |         xx = Embedding(d)
 98 | 
 99 |     #reduction
100 |     elif op[0] == "sum":
101 |         xx = Sum(d)
102 | 
103 |     elif op[0] == "mean":
104 |         xx = Mean(d)
105 | 
106 |     elif op[0] == "norm":
107 |         xx = Norm(d)
108 | 
109 |     elif op[0] == "dropout":
110 |         xx = Dropout(d)
111 | 
112 |     #Index, Slice, Join, Mutate
113 |     elif (op[0] == "cat"):
114 |         xx = Cat(d)
115 | 
116 |     elif (op[0] == "reshape"):
117 |         xx = Reshape(d)
118 | 
119 |     elif (op[0] == "masked_scatter_"):
120 |         xx = MaskedScatter(d)
121 | 
122 |     elif (op[0] == "gather"):
123 |         xx = Gather(d)
124 | 
125 |     elif (op[0] == "nonzero"):
126 |         xx = Nonzero(d)
127 | 
128 |     elif (op[0] == "index_select"):
129 |         xx = IndexSelect(d)
130 | 
131 |     elif (op[0] == "masked_select"):
132 |         xx = MaskedSelect(d)
133 | 
134 |     #blas
135 |     elif op[0] in ["addmm", "addmm_"]:
136 |         xx = Addmm(d)
137 | 
138 |     elif op[0] == "mm":
139 |         xx = Mm(d)
140 | 
141 |     elif op[0] == "bmm":
142 |         xx = Bmm(d)
143 | 
144 |     #softmax
145 |     elif op[0] == "softmax":
146 |         xx = Softmax(d)
147 | 
148 |     elif op[0] == "log_softmax":
149 |         xx = LogSoftmax(d)
150 | 
151 |     #loss
152 |     elif op[0] == "mse_loss":
153 |         xx = MSELoss(d)
154 | 
155 |     #optimizers
156 |     elif op[0] == "adam":
157 |         xx = Adam(d)
158 | 
159 |     #normalization
160 |     elif op[0] == "batch_norm":
161 |         xx = BatchNorm(d)
162 | 
163 |     #random
164 |     elif op[0] == "randperm":
165 |         xx = RandPerm(d)
166 | 
167 |     #memory
168 |     elif op[0] in OneZero.ops:
169 |         xx = OneZero(d)
170 | 
171 |     elif op[0] == "fill_":
172 |         xx = Fill(d)
173 | 
174 |     elif op[0] == "full":
175 |         xx = Full(d)
176 | 
177 |     #misc
178 |     elif op[0] == "copy_":
179 |         xx = Copy(d)
180 | 
181 |     elif op[0] == "clone":
182 |         xx = Clone(d)
183 | 
184 |     elif op[0] == "contiguous":
185 |         xx = Contiguous(d)
186 | 
187 |     elif op[0] == "any":
188 |         xx = Any(d)
189 | 
190 |     elif (op[0] in Activation.ops):
191 |         xx = Activation(d)
192 | 
193 |     elif op[0] == "to":
194 |         xx = Convert(d)
195 | 
196 |     else:
197 |         xx = Foo(d)
198 | 
199 |     return xx
200 | 
201 | 
202 | def main():
203 |     #Read cmd line arguments
204 |     cmdArgs = parseArgs()
205 | 
206 |     output = Output(cmdArgs)
207 |     output.header()
208 | 
209 |     idx = -1
210 |     #Read in all the kernel info
211 |     for line in cmdArgs.file:
212 |         idx += 1
213 |         kernel = eval(line)
214 |         assert (kernel)
215 |         kernels.append(kernel)
216 | 
217 |         k = kernel
218 |         d = Data(k)
219 | 
220 |         mod = k['mod']
221 |         op = k['op']
222 | 
223 |         flops = 0
224 |         params = {"na": "na"}
225 |         tc = "na"
226 |         bytes = 0
227 | 
228 |         if (d.dir == "bprop"):
229 |             d.seqMarker = k['seqMarker']
230 |             seq = k['seqId']
231 |             if len(seq) > 1:
232 |                 pass
233 |             seq = k['seqId'][:1]
234 |             assert (len(seq) == 1), seq
235 |             #assert (seq[0] != 0)
236 |             assert (len(d.seqMarker) > 0)
237 |             #If there is no useful marker associated, use the
238 |             #sequence number to find the kernel from fprop
239 |             if len(d.argMarker) == 0:
240 |                 index = findFpropKernel(seq[0])
241 |                 if index >= 0:
242 |                     d.argMarker = kernels[index]['marker']
243 |                     d.modMarker = kernels[index]['reprMarkers']
244 |                     mod = kernels[index]['mod']
245 |                     op = kernels[index]['op']
246 | 
247 |                     d.layer = kernels[index]['layer']
248 |                     d.trace = kernels[index]['trace']
249 | 
250 |         # Check if marker has our annotations
251 |         if len(d.argMarker) and Utility.hasNVTX(d.argMarker[0]):
252 | 
253 |             xx = foo(mod, op, d)
254 | 
255 |             bytes = xx.bytes()
256 |             flops = xx.flops()
257 |             op = xx.op()
258 |             params = xx.params()
259 |             tc = xx.tc()
260 | 
261 |         if type(op) is list:
262 |             if len(op):
263 |                 op = op[0]
264 |             else:
265 |                 op = ""
266 | 
267 |         if type(mod) is list:
268 |             if len(mod):
269 |                 mod = mod[0]
270 |             else:
271 |                 mod = ""
272 | 
273 |         d.index = idx + 1
274 | 
275 |         # The following 8 come from operator class functions.
276 |         d.setParams(params)
277 |         d.tc = tc
278 |         d.flops = flops
279 |         d.bytes = bytes
280 |         d.mod = mod
281 |         d.op = op
282 | 
283 |         output.data(d)
284 | 
285 | 
286 | kernels = []
287 | if __name__ == '__main__':
288 |     main()
289 | 


--------------------------------------------------------------------------------
/pyprof/prof/randomSample.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding: utf-8 -*-
 3 | 
 4 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
 5 | #
 6 | # Licensed under the Apache License, Version 2.0 (the "License");
 7 | # you may not use this file except in compliance with the License.
 8 | # You may obtain a copy of the License at
 9 | #
10 | #     http://www.apache.org/licenses/LICENSE-2.0
11 | #
12 | # Unless required by applicable law or agreed to in writing, software
13 | # distributed under the License is distributed on an "AS IS" BASIS,
14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | # See the License for the specific language governing permissions and
16 | # limitations under the License.
17 | 
18 | from collections import OrderedDict
19 | from .utility import Utility
20 | from .base import OperatorLayerBase
21 | 
22 | 
23 | class RandPerm(OperatorLayerBase):
24 | 
25 |     def __init__(self, d):
26 |         marker = eval(d.argMarker[0])
27 |         mod = marker['mod']
28 |         op = marker['op']
29 |         args = marker['args']
30 | 
31 |         self.marker = marker
32 |         self.mod_ = mod
33 |         self.op_ = op
34 |         self.args = args
35 | 
36 |         assert (mod == "torch")
37 |         assert (op == "randperm")
38 |         assert (len(args) == 1)
39 |         n = args[0]
40 |         assert n['type'] == "int"
41 |         self.n = n['value']
42 | 
43 |     def params(self):
44 |         p = OrderedDict([('N', self.n)])
45 |         return p
46 | 
47 |     def tc(self):
48 |         return "-"
49 | 
50 |     def op(self):
51 |         return self.op_
52 | 
53 |     def mod(self):
54 |         return self.mod_
55 | 
56 |     def bytes(self):
57 |         return self.n * Utility.typeToBytes("int64")
58 | 
59 |     def flops(self):
60 |         # Depends on RNG but this is probably a reasonable assumption.
61 |         return self.n * 3
62 | 


--------------------------------------------------------------------------------
/pyprof/prof/recurrentCell.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | 
  4 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
  5 | #
  6 | # Licensed under the Apache License, Version 2.0 (the "License");
  7 | # you may not use this file except in compliance with the License.
  8 | # You may obtain a copy of the License at
  9 | #
 10 | #     http://www.apache.org/licenses/LICENSE-2.0
 11 | #
 12 | # Unless required by applicable law or agreed to in writing, software
 13 | # distributed under the License is distributed on an "AS IS" BASIS,
 14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 15 | # See the License for the specific language governing permissions and
 16 | # limitations under the License.
 17 | 
 18 | from collections import OrderedDict
 19 | from .tc import TC_Whitelist
 20 | from .utility import Utility
 21 | from .base import OperatorLayerBase
 22 | 
 23 | 
 24 | def hasTileSize(name):
 25 |     if ("sgemm" in name) or ("884gemm" in name) or ("hgemm" in name):
 26 |         return True
 27 |     else:
 28 |         return False
 29 | 
 30 | 
 31 | def ctaTile(name):
 32 |     name = name.split("_")
 33 |     name = list(filter(lambda x: "x" in x, name))
 34 |     name = list(filter(lambda x: "slice" not in x, name))
 35 |     assert (len(name) == 1)
 36 |     name = name[0].split("x")
 37 |     assert (len(name) == 2)
 38 |     name = list(map(int, name))
 39 |     return name[0], name[1]
 40 | 
 41 | 
 42 | class RNNCell(OperatorLayerBase):
 43 |     """
 44 | 	This class supports RNNCell, LSTMCell and GRUCell.
 45 | 	"""
 46 | 
 47 |     def __init__(self, d):
 48 |         marker = eval(d.argMarker[0])
 49 |         mod = marker['mod']
 50 |         op = marker['op']
 51 |         args = marker['args']
 52 | 
 53 |         self.marker = marker
 54 |         self.mod_ = mod
 55 |         self.op_ = op
 56 |         self.args = args
 57 | 
 58 |         self.name = d.name
 59 |         self.dir = d.dir
 60 |         self.sub = d.sub
 61 |         self.grid = d.grid
 62 | 
 63 |         assert (op == "forward")
 64 |         assert (mod in ["LSTMCell", "GRUCell", "RNNCell"])
 65 |         assert (len(args) in [2, 3])
 66 | 
 67 |         x, h = args[0], args[1]
 68 |         b1, ii = x['shape']
 69 |         b2, hh = h['shape']
 70 |         assert b1 == b2
 71 |         assert x['dtype'] == h['dtype']
 72 |         t = x['dtype']
 73 | 
 74 |         self.cell = mod
 75 |         self.inp = ii
 76 |         self.hid = hh
 77 |         self.b = b1
 78 |         self.type = t
 79 | 
 80 |         self.multiple = 1
 81 |         if self.cell == "LSTMCell":
 82 |             self.multiple = 4
 83 |         elif self.cell == "GRUCell":
 84 |             self.multiple = 3
 85 | 
 86 |         self.gemm = None
 87 |         self.m = None
 88 |         self.n = None
 89 |         self.k = None
 90 |         self.elems = 0
 91 | 
 92 |         self.bar()
 93 | 
 94 |     def params(self):
 95 |         if self.gemm is None:
 96 |             p = OrderedDict([('cell', self.cell), ('X', self.inp), ('H', self.hid), ('B', self.b), ('type', self.type)])
 97 |         else:
 98 |             assert self.m is not None
 99 |             assert self.n is not None
100 |             assert self.k is not None
101 |             p = OrderedDict([('gemm', self.gemm), ('M', self.m), ('N', self.n), ('K', self.k), ('type', self.type)])
102 |         return p
103 | 
104 |     def tc(self):
105 |         if "gemm" in self.name:
106 |             if self.name in TC_Whitelist():
107 |                 return 1
108 |             return 0
109 |         else:
110 |             return "-"
111 | 
112 |     def op(self):
113 |         return self.op_
114 | 
115 |     def mod(self):
116 |         return self.mod_
117 | 
118 |     def bytes(self):
119 |         if self.gemm is not None:
120 |             m, n, k, t = self.m, self.n, self.k, self.type
121 |             b = (m * k + k * n + m * n) * Utility.typeToBytes(t)
122 |         elif self.elems != 0:
123 |             b = self.elems * Utility.typeToBytes(self.type)
124 |         else:
125 |             b = 0
126 |         return b
127 | 
128 |     def flops(self):
129 |         if self.gemm is not None:
130 |             m, n, k = self.m, self.n, self.k
131 |             f = 2 * m * n * k
132 |         elif self.elems != 0:
133 |             f = 0  #TODO
134 |         else:
135 |             f = 0
136 |         return f
137 | 
138 |     def bar(self):
139 |         cell = self.cell
140 |         X = self.inp
141 |         H = self.hid
142 |         B = self.b
143 |         t = self.type
144 |         subseqId = self.sub
145 |         direc = self.dir
146 |         name = self.name
147 |         grid = self.grid
148 |         multiple = self.multiple
149 | 
150 |         if direc == "fprop":
151 |             subseqId = subseqId % 3
152 |             if subseqId == 0:  #layer gemm
153 |                 self.gemm = "layer"
154 |                 self.m = multiple * H
155 |                 self.n = B
156 |                 self.k = X
157 |             elif subseqId == 1:  #recurrent gemm
158 |                 self.gemm = "recur"
159 |                 self.m = multiple * H
160 |                 self.n = B
161 |                 self.k = H
162 |             else:
163 |                 layerGemmElems = multiple * H * B
164 |                 recurGemmElems = multiple * H * B
165 |                 cElems = H * B
166 |                 hElems = H * B
167 |                 totElems = layerGemmElems + recurGemmElems + 2 * cElems + hElems
168 |                 self.elems = totElems
169 | 
170 |         else:
171 |             if ("gemm" in name) and hasTileSize(name):  #gemm
172 |                 #Get cta tile size
173 |                 tileX, tileY = ctaTile(name)
174 |                 #Get grid dimensions
175 |                 grid = grid.split(",")
176 |                 gridX, gridY, gridZ = map(lambda x: int(x), grid)
177 | 
178 |                 gemmM = tileX * gridX
179 |                 gemmN = tileY * gridY
180 | 
181 |                 if name[-3:] == "_nn":  # dgrad
182 |                     if (gemmM == H):  # recurrent dgrad
183 |                         #Ideally gemmN = B, but we have a limited set of tile sizes.
184 |                         gemmN = B
185 |                         gemmK = multiple * H
186 | 
187 |                         self.gemm = "recur"
188 |                         self.m = gemmM
189 |                         self.n = gemmN
190 |                         self.k = gemmK
191 | 
192 |                     elif (gemmM == X):  # layer dgrad
193 |                         #assert(gemmN % B == 0)
194 |                         gemmK = multiple * H
195 | 
196 |                         self.gemm = "layer"
197 |                         self.m = gemmM
198 |                         self.n = gemmN
199 |                         self.k = gemmK
200 | 
201 |                     else:
202 |                         pass
203 | 
204 |                 elif name[-3:] == "_nt":  #wgrad
205 |                     if (gemmM == H):  #recurrent wgrad
206 |                         assert (gemmN == multiple * H)
207 |                         gemmK = B
208 | 
209 |                         self.gemm = "recur"
210 |                         self.m = gemmM
211 |                         self.n = gemmN
212 |                         self.k = gemmK
213 | 
214 |                     elif (gemmM == X):  #layer wgrad
215 |                         assert (gemmN == multiple * H)
216 |                         gemmK = B
217 | 
218 |                         self.gemm = "layer"
219 |                         self.m = gemmM
220 |                         self.n = gemmN
221 |                         self.k = gemmK
222 | 
223 |                     else:
224 |                         pass
225 |                 else:
226 |                     pass
227 |             else:
228 |                 pass
229 | 
230 |         return
231 | 


--------------------------------------------------------------------------------
/pyprof/prof/reduction.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | 
  4 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
  5 | #
  6 | # Licensed under the Apache License, Version 2.0 (the "License");
  7 | # you may not use this file except in compliance with the License.
  8 | # You may obtain a copy of the License at
  9 | #
 10 | #     http://www.apache.org/licenses/LICENSE-2.0
 11 | #
 12 | # Unless required by applicable law or agreed to in writing, software
 13 | # distributed under the License is distributed on an "AS IS" BASIS,
 14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 15 | # See the License for the specific language governing permissions and
 16 | # limitations under the License.
 17 | 
 18 | from collections import OrderedDict
 19 | from .utility import Utility
 20 | from .base import OperatorLayerBase
 21 | from .tensor import Tensor
 22 | 
 23 | 
 24 | class Mean(OperatorLayerBase):
 25 | 
 26 |     def __init__(self, d):
 27 |         marker = eval(d.argMarker[0])
 28 |         mod = marker['mod']
 29 |         op = marker['op']
 30 |         args = marker['args']
 31 | 
 32 |         self.mod_ = mod
 33 |         self.op_ = op
 34 | 
 35 |         assert (mod in ["torch", "Tensor"])
 36 |         assert (op == "mean")
 37 | 
 38 |         #Filter out named parameters
 39 |         args = list(filter(lambda x: x['name'] == '', args))
 40 | 
 41 |         assert (len(args) <= 2)
 42 |         i = args[0]
 43 | 
 44 |         # The input can be a scalar or a tensor
 45 |         if 'shape' in i:  # tensor
 46 |             self.input = Tensor(i['shape'], i['dtype'])
 47 |         else:  # scalar
 48 |             assert ('value' in i)
 49 |             self.input = Tensor([], i['type'])
 50 | 
 51 |         self.dir = d.dir
 52 |         self.sub = d.sub
 53 | 
 54 |     def params(self):
 55 |         return str(self.input)
 56 | 
 57 |     def tc(self):
 58 |         return "-"
 59 | 
 60 |     def op(self):
 61 |         return self.op_
 62 | 
 63 |     def mod(self):
 64 |         return self.mod_
 65 | 
 66 |     def bytes(self):
 67 |         if self.sub == 0:
 68 |             return self.input.bytes + self.input.itemsize
 69 |         else:
 70 |             return 0
 71 | 
 72 |     def flops(self):
 73 |         if self.sub == 0:
 74 |             return self.input.size + 1
 75 |         else:
 76 |             return 0
 77 | 
 78 | 
 79 | class Sum(OperatorLayerBase):
 80 | 
 81 |     def __init__(self, d):
 82 |         marker = eval(d.argMarker[0])
 83 |         mod = marker['mod']
 84 |         op = marker['op']
 85 |         args = marker['args']
 86 | 
 87 |         self.marker = marker
 88 |         self.mod_ = mod
 89 |         self.op_ = op
 90 |         self.args = args
 91 | 
 92 |         assert (mod in ["torch", "Tensor"])
 93 |         assert (op == "sum")
 94 |         assert (len(args) >= 1)
 95 | 
 96 |         #Get input
 97 |         if (args[0]['name'] == ""):
 98 |             i = args[0]
 99 |         else:
100 |             i = list(filter(lambda x: x['name'] == "input", args))[0]
101 | 
102 |         self.shape = i['shape']
103 |         self.type = i['dtype']
104 |         self.sub = d.sub
105 | 
106 |     def params(self):
107 |         p = OrderedDict([('T', self.shape), ('type', self.type)])
108 |         return p
109 | 
110 |     def tc(self):
111 |         return "-"
112 | 
113 |     def op(self):
114 |         return self.op_
115 | 
116 |     def mod(self):
117 |         return self.mod_
118 | 
119 |     def elems(self):
120 |         return Utility.numElems(self.shape)
121 | 
122 |     def flops(self):
123 |         # Note: This is incorrect, need to calculate actual flops (say via nvprof)
124 |         return self.elems()
125 | 
126 |     def bytes(self):
127 |         b = self.elems() * Utility.typeToBytes(self.type)
128 |         if self.sub == 0:
129 |             return b
130 |         else:
131 |             return 0
132 | 
133 | 
134 | class Norm(OperatorLayerBase):
135 | 
136 |     def __init__(self, d):
137 |         marker = eval(d.argMarker[0])
138 |         mod = marker['mod']
139 |         op = marker['op']
140 |         args = marker['args']
141 | 
142 |         self.marker = marker
143 |         self.mod_ = mod
144 |         self.op_ = op
145 |         self.args = args
146 | 
147 |         assert (mod in ["torch", "Tensor"])
148 |         assert (op == "norm")
149 |         #assert (len(args) == 1)
150 |         i = args[0]
151 |         self.shape = i['shape']
152 |         self.type = i['dtype']
153 |         self.sub = d.sub
154 | 
155 |     def params(self):
156 |         p = OrderedDict([('T', self.shape), ('type', self.type)])
157 |         return p
158 | 
159 |     def elems(self):
160 |         return Utility.numElems(self.shape)
161 | 
162 |     def bytes(self):
163 |         b = self.elems() * Utility.typeToBytes(self.type)
164 |         if self.sub == 0:
165 |             return b
166 |         else:
167 |             return 0
168 | 
169 |     def flops(self):
170 |         # square and add plus sqrt
171 |         f = 2 * self.elems() + 1
172 |         if self.sub == 0:
173 |             return f
174 |         else:
175 |             return 0
176 | 
177 |     def tc(self):
178 |         return "-"
179 | 
180 |     def op(self):
181 |         return self.op_
182 | 
183 |     def mod(self):
184 |         return self.mod_
185 | 


--------------------------------------------------------------------------------
/pyprof/prof/softmax.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | 
  4 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
  5 | #
  6 | # Licensed under the Apache License, Version 2.0 (the "License");
  7 | # you may not use this file except in compliance with the License.
  8 | # You may obtain a copy of the License at
  9 | #
 10 | #     http://www.apache.org/licenses/LICENSE-2.0
 11 | #
 12 | # Unless required by applicable law or agreed to in writing, software
 13 | # distributed under the License is distributed on an "AS IS" BASIS,
 14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 15 | # See the License for the specific language governing permissions and
 16 | # limitations under the License.
 17 | 
 18 | from .base import OperatorLayerBase
 19 | from .tensor import Tensor
 20 | 
 21 | 
 22 | class Softmax(OperatorLayerBase):
 23 | 
 24 |     def __init__(self, d):
 25 |         marker = eval(d.argMarker[0])
 26 |         mod = marker['mod']
 27 |         op = marker['op']
 28 |         args = marker['args']
 29 | 
 30 |         self.mod_ = mod
 31 |         self.op_ = op
 32 | 
 33 |         assert (mod == "torch.nn.functional")
 34 |         assert (op == "softmax")
 35 | 
 36 |         #Filter out named parameters
 37 |         args = list(filter(lambda x: x['name'] == '', args))
 38 | 
 39 |         assert (len(args) <= 2)
 40 |         arg = args[0]
 41 |         self.input = Tensor(arg['shape'], arg['dtype'])
 42 |         self.dir = d.dir
 43 |         return
 44 | 
 45 |     def op(self):
 46 |         return self.op_
 47 | 
 48 |     def mod(self):
 49 |         return self.mod_
 50 | 
 51 |     def tc(self):
 52 |         return "-"
 53 | 
 54 |     def params(self):
 55 |         return str(self.input)
 56 | 
 57 |     def flops(self):
 58 |         # An approximation
 59 |         # http://ai.stanford.edu/~paskin/slam/javadoc/javaslam/util/Flops.html#exp()
 60 |         # TODO: consider direction
 61 |         e = self.input.size
 62 |         f = e * 20  # denominator, exp all elements and reduce
 63 |         f += e * 20  # numerator, exp all elements and divide
 64 |         return f
 65 | 
 66 |     def bytes(self):
 67 |         # TODO: verify
 68 |         b = self.input.bytes
 69 |         # fprop is 2 reads, 1 write
 70 |         # bprop is 4 reads, 1 write
 71 |         b *= 3 if self.dir == "fprop" else 5
 72 |         return b
 73 | 
 74 | 
 75 | class LogSoftmax(OperatorLayerBase):
 76 | 
 77 |     def __init__(self, d):
 78 |         marker = eval(d.argMarker[0])
 79 |         mod = marker['mod']
 80 |         op = marker['op']
 81 |         args = marker['args']
 82 | 
 83 |         self.mod_ = mod
 84 |         self.op_ = op
 85 | 
 86 |         assert (mod in ["torch", "Tensor", "torch.nn.functional"])
 87 |         assert (op == "log_softmax")
 88 | 
 89 |         #Filter out named parameters
 90 |         args = list(filter(lambda x: x['name'] == '', args))
 91 | 
 92 |         assert (len(args) <= 2)
 93 | 
 94 |         #Get input
 95 |         if (args[0]['name'] == ""):
 96 |             i = args[0]
 97 |         else:
 98 |             i = list(filter(lambda x: x['name'] == "input", args))[0]
 99 | 
100 |         self.input = Tensor(i['shape'], i['dtype'])
101 |         self.dir = d.dir
102 |         return
103 | 
104 |     def op(self):
105 |         return self.op_
106 | 
107 |     def mod(self):
108 |         return self.mod_
109 | 
110 |     def tc(self):
111 |         return "-"
112 | 
113 |     def params(self):
114 |         return str(self.input)
115 | 
116 |     def flops(self):
117 |         # An approximation
118 |         # http://ai.stanford.edu/~paskin/slam/javadoc/javaslam/util/Flops.html#exp()
119 |         # TODO: consider direction
120 |         e = self.input.size
121 |         f = e * 20  # denominator, exp all elements and reduce
122 |         f += e  # numerator, just a subtraction
123 |         return f
124 | 
125 |     def bytes(self):
126 |         # TODO: verify
127 |         b = self.input.bytes
128 |         # fprop is 2 reads, 1 write
129 |         # bprop is 4 reads, 1 write
130 |         b *= 3 if self.dir == "fprop" else 5
131 |         return b
132 | 


--------------------------------------------------------------------------------
/pyprof/prof/tc.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding: utf-8 -*-
 3 | 
 4 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
 5 | #
 6 | # Licensed under the Apache License, Version 2.0 (the "License");
 7 | # you may not use this file except in compliance with the License.
 8 | # You may obtain a copy of the License at
 9 | #
10 | #     http://www.apache.org/licenses/LICENSE-2.0
11 | #
12 | # Unless required by applicable law or agreed to in writing, software
13 | # distributed under the License is distributed on an "AS IS" BASIS,
14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | # See the License for the specific language governing permissions and
16 | # limitations under the License.
17 | 
18 | 
19 | class TC_Whitelist:
20 |     whitelist = ['h884', 's884', 'h1688', 's1688', 'hmma', 'i8816', '16816',
21 |                  'dgrad_1x1_stride_2x2', 'first_layer_wgrad_kernel', 'conv1x1',
22 |                  'conv2d_c1_k1', 'direct_group', 'xmma_implicit_gemm',
23 |                  'xmma_sparse_conv', 'xmma_warp_specialized_implicit_gemm',
24 |                  'xmma_gemm', 'xmma_sparse_gemm', 'c1688']
25 |     def __contains__(self, item):
26 |         for pattern in self.whitelist:
27 |             if pattern in item:
28 |                 return True
29 |         return False
30 | 


--------------------------------------------------------------------------------
/pyprof/prof/tensor.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | # Copyright (c) 2020, Aditya Agrawal.
 4 | #
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | #
 9 | #     http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | 
17 | from functools import reduce
18 | import numpy as np
19 | from .dtype import Dtype
20 | 
21 | class Tensor(object):
22 |     def __init__(self, shape, dtype):
23 |         assert type(shape) in [tuple, list]
24 |         assert dtype in Dtype.types()
25 |         self._shape = list(shape)
26 |         self._dtype = dtype
27 | 
28 |     def __str__(self):
29 |         t = Dtype(self.dtype)
30 |         return str(self.shape).replace(" ", "") + str(t)
31 | 
32 |     @property
33 |     def ndim(self):
34 |         # can be 0 for scalars
35 |         return len(self._shape)
36 | 
37 |     @property
38 |     def shape(self):
39 |         # can be () for scalars
40 |         return self._shape
41 | 
42 |     @property
43 |     def size(self):
44 |         # number of elements
45 |         return reduce(lambda x, y: x * y, self.shape, 1)
46 | 
47 |     @property
48 |     def dtype(self):
49 |         return self._dtype
50 | 
51 |     @property
52 |     def itemsize(self):
53 |         return Dtype(self.dtype).itemsize
54 | 
55 |     @property
56 |     def bytes(self):
57 |         return self.size * self.itemsize
58 | 
59 |     @staticmethod
60 |     def broadcast(tensors):
61 |         r'''
62 |         The input is a list of Tensors.
63 |         The output is a Tensor.
64 |         '''
65 | 
66 |         assert len(tensors) > 1
67 |         shape = tensors[0].shape
68 |         # TODO: Assume the output dtype is the same as the first arg
69 |         dt = tensors[0].dtype
70 | 
71 |         # Check if shapes are different
72 |         if any(t.shape != shape for t in tensors):
73 |             x = [np.empty(t.shape, t.dtype) for t in tensors]
74 |             try:
75 |                 out = np.broadcast(*x)
76 |             except:
77 |                 assert False # not broadcastable
78 |             return Tensor(out.shape, dt)
79 |         else:
80 |             return Tensor(shape, dt)
81 | 
82 | def main():
83 |     for shape in [(), (1,), (3,7), (3,7,11)]:
84 |         for dt in Dtype.types():
85 |             t = Tensor(shape, dt)
86 |             print(t.ndim, str(t.shape).replace(" ", ""), \
87 |                     t.size, t.dtype, t.itemsize, t.bytes, t)
88 | 
89 |     # Broadcast test
90 |     a = Tensor([1,3], "int")
91 |     b = Tensor([3,1], "float")
92 |     c = Tensor([1,3], "float64")
93 |     d = np.ones([], "float64")
94 |     out = Tensor.broadcast([a,b,c,d])
95 |     print(out.shape)
96 | 
97 | if __name__ == '__main__':
98 |     main()
99 | 


--------------------------------------------------------------------------------
/pyprof/prof/usage.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding: utf-8 -*-
 3 | 
 4 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
 5 | #
 6 | # Licensed under the Apache License, Version 2.0 (the "License");
 7 | # you may not use this file except in compliance with the License.
 8 | # You may obtain a copy of the License at
 9 | #
10 | #     http://www.apache.org/licenses/LICENSE-2.0
11 | #
12 | # Unless required by applicable law or agreed to in writing, software
13 | # distributed under the License is distributed on an "AS IS" BASIS,
14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | # See the License for the specific language governing permissions and
16 | # limitations under the License.
17 | 
18 | import sys
19 | import argparse
20 | 
21 | 
22 | def parseArgs():
23 |     """
24 | 	Print usage and parse arguments.
25 | 	"""
26 | 
27 |     def check_cols(value):
28 |         valid = [
29 |             "idx", "seq", "altseq", "tid", "layer", "trace", "dir", "sub", "mod", "op", "kernel", "params", "sil", "tc",
30 |             "device", "stream", "grid", "block", "flops", "bytes"
31 |         ]
32 |         cols = value.split(",")
33 |         for col in cols:
34 |             if col not in valid:
35 |                 raise argparse.ArgumentTypeError(
36 |                     "{} is not a valid column name. Valid column names are {}.".format(col, ",".join(valid))
37 |                 )
38 |         return cols
39 | 
40 |     def openFile(f):
41 |         try:
42 |             d = open(f, "r")
43 |             return d
44 |         except IOError:
45 |             print("Error opening file {}. Exiting.".format(f), file=sys.stderr)
46 |             sys.exit(1)
47 | 
48 |     parser = argparse.ArgumentParser(
49 |         prog=sys.argv[0], description="PyTorch Profiler", formatter_class=argparse.RawTextHelpFormatter
50 |     )
51 |     parser.add_argument("file", nargs='?', type=str, default=None, help="Output of parse.py (Python dictionary).")
52 | 
53 |     parser.add_argument(
54 |         "-c", type=check_cols, default="idx,dir,sub,mod,op,kernel,params,sil",
55 |         help='''Comma seperated names of columns to print.
56 | idx:      Index
57 | seq:      PyTorch Sequence Id
58 | altseq:   PyTorch Alternate Sequence Id
59 | tid:      Thread Id
60 | layer:    User annotated NVTX string (can be nested)
61 | trace:    Function Call Trace
62 | dir:      Direction
63 | sub:      Sub Sequence Id
64 | mod:      Module
65 | op:       Operattion
66 | kernel:   Kernel Name
67 | params:   Parameters
68 | sil:      Silicon Time (in ns)
69 | tc:       Tensor Core Usage
70 | device:   GPU Device Id
71 | stream:   Stream Id
72 | grid:     Grid Dimensions
73 | block:    Block Dimensions
74 | flops:    Floating point ops (FMA = 2 FLOPs)
75 | bytes:    Number of bytes in and out of DRAM
76 | e.g. -c idx,kernel,sil'''
77 |     )
78 | 
79 |     group = parser.add_mutually_exclusive_group()
80 |     group.add_argument("--csv", action="store_true", default=False, help="Print a CSV output.")
81 |     group.add_argument("-w", type=int, default=0, help="Width of columnated output.")
82 | 
83 |     args = parser.parse_args()
84 |     if args.file is None:
85 |         args.file = sys.stdin
86 |     else:
87 |         args.file = openFile(args.file)
88 |     return args
89 | 


--------------------------------------------------------------------------------
/pyprof/prof/utility.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding: utf-8 -*-
 3 | 
 4 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
 5 | #
 6 | # Licensed under the Apache License, Version 2.0 (the "License");
 7 | # you may not use this file except in compliance with the License.
 8 | # You may obtain a copy of the License at
 9 | #
10 | #     http://www.apache.org/licenses/LICENSE-2.0
11 | #
12 | # Unless required by applicable law or agreed to in writing, software
13 | # distributed under the License is distributed on an "AS IS" BASIS,
14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | # See the License for the specific language governing permissions and
16 | # limitations under the License.
17 | 
18 | from functools import reduce
19 | 
20 | 
21 | class Utility(object):
22 | 
23 |     @staticmethod
24 |     def numElems(shape):
25 |         assert (type(shape) == tuple)
26 |         return reduce(lambda x, y: x * y, shape, 1)
27 | 
28 |     @staticmethod
29 |     def typeToBytes(t):
30 |         if (t in ["uint8", "int8", "byte", "char", "bool"]):
31 |             return 1
32 |         elif (t in ["float16", "half", "int16", "short"]):
33 |             return 2
34 |         elif (t in ["float32", "float", "int32", "int"]):
35 |             return 4
36 |         elif (t in ["int64", "long", "float64", "double"]):
37 |             return 8
38 |         assert False
39 | 
40 |     @staticmethod
41 |     def typeToString(t):
42 |         if (t in ["uint8", "byte", "char"]):
43 |             return "uint8"
44 |         elif (t in [
45 |                 "int8",
46 |         ]):
47 |             return "int8"
48 |         elif (t in [
49 |                 "int16",
50 |                 "short",
51 |         ]):
52 |             return "int16"
53 |         elif (t in ["float16", "half"]):
54 |             return "fp16"
55 |         elif (t in ["float32", "float"]):
56 |             return "fp32"
57 |         elif (t in [
58 |                 "int32",
59 |                 "int",
60 |         ]):
61 |             return "int32"
62 |         elif (t in ["int64", "long"]):
63 |             return "int64"
64 |         elif (t in [
65 |                 "float64",
66 |                 "double",
67 |         ]):
68 |             return "fp64"
69 |         elif (t in [
70 |                 "bool",
71 |         ]):
72 |             return "bool"
73 |         assert False
74 | 
75 |     @staticmethod
76 |     def hasNVTX(marker):
77 |         if type(marker) is str:
78 |             try:
79 |                 marker = eval(marker)
80 |             except:
81 |                 return False
82 | 
83 |         if type(marker) is dict:
84 |             keys = marker.keys()
85 |             return ("mod" in keys) and ("op" in keys) and ("args" in keys)
86 |         else:
87 |             return False
88 | 
89 |     @staticmethod
90 |     def isscalar(t):
91 |         return (t in ["float", "int"])
92 | 


--------------------------------------------------------------------------------
/qa/L0_docs/test.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | TEST_LOG="./docs.log"
17 | 
18 | rm -f $TEST_LOG
19 | RET=0
20 | 
21 | apt-get update && \
22 |     apt-get install -y --no-install-recommends python3-pip zip doxygen && \
23 |     rm -rf /root/.cache/pip && \
24 |     pip uninstall -y Sphinx && \
25 |     pip3 install --upgrade setuptools wheel && \
26 |     pip3 install --upgrade sphinx==2.4.4 sphinx-rtd-theme==0.4.3 \
27 |          nbsphinx==0.6.0 breathe==4.14.1
28 | 
29 | set +e
30 | 
31 | # Set visitor script to be included on every HTML page
32 | export VISITS_COUNTING_SCRIPT=//assets.adobedtm.com/b92787824f2e0e9b68dc2e993f9bd995339fe417/satelliteLib-7ba51e58dc61bcb0e9311aadd02a0108ab24cc6c.js
33 | 
34 | (cd docs && rm -f pyprof_docs.zip && \
35 |         make BUILDDIR=/opt/pytorch/pyprof/qa/L0_docs/build clean html) > $TEST_LOG 2>&1
36 | if [ $? -ne 0 ]; then
37 |     RET=1
38 | fi
39 | 
40 | (cd build && zip -r ../pyprof_docs.zip html)
41 | if [ $? -ne 0 ]; then
42 |     RET=1
43 | fi
44 | 
45 | set -e
46 | 
47 | if [ $RET -eq 0 ]; then
48 |     echo -e "\n***\n*** Test Passed\n***"
49 | else
50 |     cat $TEST_LOG
51 |     echo -e "\n***\n*** Test FAILED\n***"
52 | fi
53 | 
54 | exit $RET
55 | 


--------------------------------------------------------------------------------
/qa/L0_lenet/test.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 |  # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
 3 |  #
 4 |  # Licensed under the Apache License, Version 2.0 (the "License");
 5 |  # you may not use this file except in compliance with the License.
 6 |  # You may obtain a copy of the License at
 7 |  #
 8 |  #     http://www.apache.org/licenses/LICENSE-2.0
 9 |  # 
10 |  # Unless required by applicable law or agreed to in writing, software
11 |  # distributed under the License is distributed on an "AS IS" BASIS,
12 |  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  # See the License for the specific language governing permissions and
14 |  # limitations under the License.
15 | 
16 | TEST_LOG="./data.log"
17 | 
18 | 
19 | rm -f $TEST_LOG
20 | RET=0
21 | 
22 | set +e
23 | 
24 | ./test_lenet.py > $TEST_LOG 2>&1
25 | if [ $? -ne 0 ]; then
26 |     RET=1
27 | fi
28 | 
29 | set -e
30 | 
31 | if [ $RET -eq 0 ]; then
32 |     echo -e "\n***\n*** Test Passed\n***"
33 | else
34 |     cat $TEST_LOG
35 |     echo -e "\n***\n*** Test FAILED\n***"
36 | fi
37 | 
38 | exit $RET
39 | 


--------------------------------------------------------------------------------
/qa/L0_lenet/test_lenet.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | 
  4 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
  5 | #
  6 | # Licensed under the Apache License, Version 2.0 (the "License");
  7 | # you may not use this file except in compliance with the License.
  8 | # You may obtain a copy of the License at
  9 | #
 10 | #     http://www.apache.org/licenses/LICENSE-2.0
 11 | #
 12 | # Unless required by applicable law or agreed to in writing, software
 13 | # distributed under the License is distributed on an "AS IS" BASIS,
 14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 15 | # See the License for the specific language governing permissions and
 16 | # limitations under the License.
 17 | '''
 18 | This test runs lenet through the 3 steps on pyprof. 
 19 | It ensures:
 20 | - A database is created from nsys
 21 | - A dict is created from pyprof.parse
 22 | - A csv with valid data is created from pyprof.prof
 23 | '''
 24 | 
 25 | import subprocess
 26 | from pathlib import Path
 27 | import unittest
 28 | import csv
 29 | 
 30 | unittest.TestLoader.sortTestMethodsUsing = None
 31 | 
 32 | 
 33 | class TestPyprofWithLenet(unittest.TestCase):
 34 | 
 35 |     @classmethod
 36 |     def setUpClass(cls):
 37 |         cls.pyprof_path = Path("/opt/pytorch/pyprof/pyprof/examples")
 38 | 
 39 |     def test_run_nsys(self):
 40 |         # Print a blank line to make the test output more readable
 41 |         print()
 42 |         command = "nsys profile -f true -o lenet --export sqlite python " + self.pyprof_path.as_posix() + "/lenet.py"
 43 |         command_tokens = command.split()
 44 | 
 45 |         ret_val = subprocess.run(command_tokens)
 46 | 
 47 |         self.assertEqual(ret_val.returncode, 0)
 48 |         db_path = Path('./lenet.sqlite')
 49 |         self.assertTrue(db_path.exists())
 50 | 
 51 |     def test_run_parse(self):
 52 |         command = "python -m pyprof.parse lenet.sqlite"
 53 |         command_tokens = command.split()
 54 | 
 55 |         with open("lenet.dict", "w") as f:
 56 |             ret_val = subprocess.run(command_tokens, stdout=f)
 57 | 
 58 |         self.assertEqual(ret_val.returncode, 0)
 59 |         dict_path = Path('./lenet.dict')
 60 |         self.assertTrue(dict_path.exists())
 61 | 
 62 |     def test_run_profile(self):
 63 |         lenet_csv = "./lenet.csv"
 64 |         command = "python -m pyprof.prof --csv lenet.dict"
 65 |         command_tokens = command.split()
 66 |         with open(lenet_csv, "w") as f:
 67 |             ret_val = subprocess.run(command_tokens, stdout=f)
 68 | 
 69 |         self.assertEqual(ret_val.returncode, 0)
 70 |         csv_path = Path(lenet_csv)
 71 |         self.assertTrue(csv_path.exists())
 72 | 
 73 |         directions = ["bprop", "fprop"]
 74 |         ops = [
 75 |             "",  # covers the "reduce_kernel" kernel, op will be an empty string in the report
 76 |             "add_",
 77 |             "backward",
 78 |             "bias",
 79 |             "conv2d",
 80 |             "linear",
 81 |             "max_pool2d",
 82 |             "mse_loss",
 83 |             "relu",
 84 |             "sum",
 85 |         ]
 86 | 
 87 |         with open("lenet.csv", "r") as csvfile:
 88 |             reader = csv.DictReader(csvfile)
 89 |             for row in reader:
 90 |                 # verify direction
 91 |                 self.assertTrue(row['Direction'] in directions, f"Row direction: {row['Direction']}")
 92 |                 # verify op
 93 |                 self.assertTrue(row['Op'] in ops, f"Row op: {row['Op']}")
 94 |             # verify final id is in the range
 95 |             # Which kernel cuDNN uses is nondeterministic.
 96 |             # While the exact number of kernels is not clear, for this network, it should be [60, 70]
 97 |             self.assertTrue(int(row['Idx']) in range(65, 75), f"Final Idx: {row['Idx']}")
 98 | 
 99 | 
100 | if __name__ == '__main__':
101 |     unittest.main(verbosity=2)
102 | 


--------------------------------------------------------------------------------
/qa/L0_nvtx/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License. 
14 | 
15 | import test_pyprof_nvtx.TestPyProfNvtx as TestPyProfNvtx
16 | 


--------------------------------------------------------------------------------
/qa/L0_nvtx/test.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 |  # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
 3 |  #
 4 |  # Licensed under the Apache License, Version 2.0 (the "License");
 5 |  # you may not use this file except in compliance with the License.
 6 |  # You may obtain a copy of the License at
 7 |  #
 8 |  #     http://www.apache.org/licenses/LICENSE-2.0
 9 |  # 
10 |  # Unless required by applicable law or agreed to in writing, software
11 |  # distributed under the License is distributed on an "AS IS" BASIS,
12 |  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  # See the License for the specific language governing permissions and
14 |  # limitations under the License.
15 | 
16 | TEST_LOG="./nvtx.log"
17 | 
18 | 
19 | apt-get update && \
20 |     apt-get install -y --no-install-recommends python3
21 | 
22 | rm -f $TEST_LOG
23 | RET=0
24 | 
25 | ./test_pyprof_nvtx.py > $TEST_LOG 2>&1
26 | if [ $? -ne 0 ]; then
27 |     RET=1
28 | fi
29 | 
30 | set -e
31 | 
32 | if [ $RET -eq 0 ]; then
33 |     echo -e "\n***\n*** Test Passed\n***"
34 | else
35 |     cat $TEST_LOG
36 |     echo -e "\n***\n*** Test FAILED\n***"
37 | fi
38 | 
39 | exit $RET


--------------------------------------------------------------------------------
/qa/L0_pyprof_data/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVIDIA/PyProf/218dcc183bf7fdf97dbfc648878a3d09aea3b199/qa/L0_pyprof_data/__init__.py


--------------------------------------------------------------------------------
/qa/L0_pyprof_data/test.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 |  # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
 3 |  #
 4 |  # Licensed under the Apache License, Version 2.0 (the "License");
 5 |  # you may not use this file except in compliance with the License.
 6 |  # You may obtain a copy of the License at
 7 |  #
 8 |  #     http://www.apache.org/licenses/LICENSE-2.0
 9 |  # 
10 |  # Unless required by applicable law or agreed to in writing, software
11 |  # distributed under the License is distributed on an "AS IS" BASIS,
12 |  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 |  # See the License for the specific language governing permissions and
14 |  # limitations under the License.
15 | 
16 | TEST_LOG="./data.log"
17 | 
18 | 
19 | apt-get update && \
20 |     apt-get install -y --no-install-recommends python
21 | 
22 | rm -f $TEST_LOG
23 | RET=0
24 | 
25 | ./test_pyprof_data.py > $TEST_LOG 2>&1
26 | if [ $? -ne 0 ]; then
27 |     RET=1
28 | fi
29 | 
30 | set -e
31 | 
32 | if [ $RET -eq 0 ]; then
33 |     echo -e "\n***\n*** Test Passed\n***"
34 | else
35 |     cat $TEST_LOG
36 |     echo -e "\n***\n*** Test FAILED\n***"
37 | fi
38 | 
39 | exit $RET


--------------------------------------------------------------------------------
/qa/L0_pyprof_data/test_pyprof_data.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | 
  4 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
  5 | #
  6 | # Licensed under the Apache License, Version 2.0 (the "License");
  7 | # you may not use this file except in compliance with the License.
  8 | # You may obtain a copy of the License at
  9 | #
 10 | #     http://www.apache.org/licenses/LICENSE-2.0
 11 | #
 12 | # Unless required by applicable law or agreed to in writing, software
 13 | # distributed under the License is distributed on an "AS IS" BASIS,
 14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 15 | # See the License for the specific language governing permissions and
 16 | # limitations under the License.
 17 | '''
 18 | This test creates 2 kernels and exercises the pyprof code for generating their representation. 
 19 | '''
 20 | import inspect
 21 | import unittest
 22 | 
 23 | from pyprof.prof.data import Data
 24 | from pyprof.prof.prof import foo
 25 | 
 26 | 
 27 | class TestPyProfData(unittest.TestCase):
 28 | 
 29 |     def __init__(self, testName):
 30 |         super().__init__(testName)
 31 | 
 32 |     def setUp(self):
 33 |         pass
 34 | 
 35 |     def tearDown(self):
 36 |         pass
 37 | 
 38 |     def test_data(self):
 39 |         kernels = [
 40 |             {
 41 |                 'kShortName':
 42 |                     'elementwise_kernel',
 43 |                 'kDuration':
 44 |                     2848,
 45 |                 'layer': [],
 46 |                 'trace': [],
 47 |                 'reprMarkers': [],
 48 |                 'marker':
 49 |                     [
 50 |                         "{'mod': 'Tensor', 'op': 'float', 'args': [{'name': '', 'type': 'tensor', 'shape': (18, 104, 160), 'dtype': 'bool'}]}"
 51 |                     ],
 52 |                 'seqMarker': ['to, seq = 60471'],
 53 |                 'seqId': [60471],
 54 |                 'subSeqId':
 55 |                     0,
 56 |                 'altSeqId': [],
 57 |                 'dir':
 58 |                     'fprop',
 59 |                 'mod': ['Tensor'],
 60 |                 'op': ['float'],
 61 |                 'tid':
 62 |                     1431533376,
 63 |                 'device':
 64 |                     0,
 65 |                 'stream':
 66 |                     7,
 67 |                 'grid': (585, 1, 1),
 68 |                 'block': (512, 1, 1),
 69 |                 'kLongName':
 70 |                     'void at::native::elementwise_kernel<512, 1, void at::native::gpu_kernel_impl<void at::native::copy_kernel_impl<float, bool>(at::TensorIterator&)::{lambda(bool)#1}>(at::TensorIterator&, void at::native::copy_kernel_impl<float, bool>(at::TensorIterator&)::{lambda(bool)#1} const&)::{lambda(int)#1}>(int, void at::native::gpu_kernel_impl<void at::native::copy_kernel_impl<float, bool>(at::TensorIterator&)::{lambda(bool)#1}>(at::TensorIterator&, void at::native::copy_kernel_impl<float, bool>(at::TensorIterator&)::{lambda(bool)#1} const&)::{lambda(int)#1})'
 71 |             },
 72 |             {
 73 |                 'kShortName':
 74 |                     'elementwise_kernel',
 75 |                 'kDuration':
 76 |                     201182,
 77 |                 'layer': [],
 78 |                 'trace': [],
 79 |                 'reprMarkers': [],
 80 |                 'marker':
 81 |                     [
 82 |                         "{'mod': 'Tensor', 'op': 'clone', 'args': [{'name': '', 'type': 'tensor', 'shape': (18, 4, 416, 640), 'dtype': 'float32'}]}"
 83 |                     ],
 84 |                 'seqMarker': ['clone, seq = 60161'],
 85 |                 'seqId': [60161],
 86 |                 'subSeqId':
 87 |                     0,
 88 |                 'altSeqId': [],
 89 |                 'dir':
 90 |                     'fprop',
 91 |                 'mod': ['Tensor'],
 92 |                 'op': ['clone'],
 93 |                 'tid':
 94 |                     1431533376,
 95 |                 'device':
 96 |                     0,
 97 |                 'stream':
 98 |                     7,
 99 |                 'grid': (37440, 1, 1),
100 |                 'block': (128, 1, 1),
101 |                 'kLongName':
102 |                     'void at::native::elementwise_kernel<128, 4, void at::native::gpu_kernel_impl<void at::native::copy_kernel_impl<float, float>(at::TensorIterator&)::{lambda(float)#1}>(at::TensorIterator&, void at::native::copy_kernel_impl<float, float>(at::TensorIterator&)::{lambda(float)#1} const&)::{lambda(int)#2}>(int, void at::native::gpu_kernel_impl<void at::native::copy_kernel_impl<float, float>(at::TensorIterator&)::{lambda(float)#1}>(at::TensorIterator&, void at::native::copy_kernel_impl<float, float>(at::TensorIterator&)::{lambda(float)#1} const&)::{lambda(int)#2})'
103 |             },
104 |         ]
105 | 
106 |         for k in kernels:
107 |             d = Data(k)
108 |             mod = k['mod']
109 |             op = k['op']
110 |             xx = foo(mod, op, d)
111 |             d.setParams(xx.params())
112 | 
113 | 
114 | def run_tests(test_name):
115 |     dummy = TestPyProfData(test_name)
116 |     test_cases = list(
117 |         filter(lambda x: 'test_' in x, map(lambda x: x[0], inspect.getmembers(dummy, predicate=inspect.ismethod)))
118 |     )
119 |     print(f'Running tests for {test_name}')
120 |     suite = unittest.TestSuite()
121 |     for test_case in test_cases:
122 |         suite.addTest(TestPyProfData(test_case))
123 |     result = unittest.TextTestRunner(verbosity=2).run(suite)
124 |     if result.wasSuccessful():
125 |         exit(0)
126 |     else:
127 |         exit(1)
128 | 
129 | 
130 | if __name__ == '__main__':
131 |     run_tests('test_data')
132 | 


--------------------------------------------------------------------------------
/qa/common/check_copyright.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python
  2 | 
  3 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
  4 | #
  5 | # Licensed under the Apache License, Version 2.0 (the "License");
  6 | # you may not use this file except in compliance with the License.
  7 | # You may obtain a copy of the License at
  8 | #
  9 | #     http://www.apache.org/licenses/LICENSE-2.0
 10 | #
 11 | # Unless required by applicable law or agreed to in writing, software
 12 | # distributed under the License is distributed on an "AS IS" BASIS,
 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 14 | # See the License for the specific language governing permissions and
 15 | # limitations under the License.
 16 | 
 17 | import argparse
 18 | import os
 19 | import re
 20 | 
 21 | FLAGS = None
 22 | SKIP_EXTS = ('jpeg', 'jpg', 'pgm', 'png',
 23 |              'log', 'serverlog',
 24 |              'preprocessed', 'jmx', 'gz',
 25 |              'caffemodel', 'json')
 26 | SKIP_PATHS = ('requirements.txt',
 27 |               'requirements/requirements_nsys.txt',
 28 |               'requirements/requirements.txt',
 29 |               'qa/L0_docs/VERSION',
 30 |               'LICENSE',
 31 |               'VERSION',
 32 |               'MANIFEST.in',
 33 |               'build/',
 34 |               'dist/',
 35 |               'nvidia_pyprof.egg-info/')
 36 | 
 37 | COPYRIGHT_YEAR_RE0 = 'Copyright \\(c\\) (20[0-9][0-9]),'
 38 | COPYRIGHT_YEAR_RE1 = 'Copyright \\(c\\) (20[0-9][0-9])-(20[0-9][0-9]),'
 39 | 
 40 | COPYRIGHT ='''
 41 | 
 42 | Licensed under the Apache License, Version 2.0 (the "License");
 43 | you may not use this file except in compliance with the License.
 44 | You may obtain a copy of the License at
 45 | 
 46 |     http://www.apache.org/licenses/LICENSE-2.0
 47 | 
 48 | Unless required by applicable law or agreed to in writing, software
 49 | distributed under the License is distributed on an "AS IS" BASIS,
 50 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 51 | See the License for the specific language governing permissions and
 52 | limitations under the License.
 53 | '''
 54 | 
 55 | single_re = re.compile(COPYRIGHT_YEAR_RE0)
 56 | range_re = re.compile(COPYRIGHT_YEAR_RE1)
 57 | 
 58 | def visit(path):
 59 |     if FLAGS.verbose:
 60 |         print("visiting " + path)
 61 | 
 62 |     for skip in SKIP_EXTS:
 63 |         if path.endswith('.' + skip):
 64 |             if FLAGS.verbose:
 65 |                 print("skipping due to extension: " + path)
 66 |             return True
 67 | 
 68 |     for skip in SKIP_PATHS:
 69 |         if path.startswith(skip):
 70 |             if FLAGS.verbose:
 71 |                 print("skipping due to path prefix: " + path)
 72 |             return True
 73 | 
 74 |     with open(path, 'r') as f:
 75 |         first_line = True
 76 |         second_line = True
 77 |         line = None
 78 |         try:
 79 |             for fline in f:
 80 |                 line = fline
 81 | 
 82 |                 # Skip any '#!', '..', '<!--', or '{{/*' lines at the
 83 |                 # start of the file
 84 |                 if first_line:
 85 |                     first_line = False
 86 |                     if (fline.startswith("#!") or fline.startswith("..") or
 87 |                         fline.startswith("<!--")  or fline.startswith("{{/*")):
 88 |                         continue
 89 |                 # Skip any '# -*-' liines as the second line
 90 |                 if second_line:
 91 |                     second_line = False
 92 |                     if (fline.startswith("# -*-")):
 93 |                         continue
 94 |                 # Skip empty lines...
 95 |                 if len(fline.strip()) != 0:
 96 |                     break
 97 |         except UnicodeDecodeError as ex:
 98 |             # If we get this exception on the first line then assume a
 99 |             # non-text file.
100 |             if not first_line:
101 |                 raise ex
102 |             if FLAGS.verbose:
103 |                 print("skipping binary file: " + path)
104 |             return True
105 | 
106 |         if line is None:
107 |             if FLAGS.verbose:
108 |                 print("skipping empty file: " + path)
109 |             return True
110 | 
111 |         line = line.strip()
112 | 
113 |         # The next line must be the copyright line with a single year
114 |         # or a year range. It is optionally allowed to have '# ' or
115 |         # '// ' prefix.
116 |         prefix = ""
117 |         if line.startswith('# '):
118 |             prefix = '# '
119 |         elif line.startswith('// '):
120 |             prefix = '// '
121 |         elif not line.startswith(COPYRIGHT_YEAR_RE0[0]):
122 |             print("incorrect prefix for copyright line, allowed prefixes '# ' or '// ', for " +
123 |                   path + ": " + line)
124 |             return False
125 | 
126 |         start_year = 0
127 |         end_year = 0
128 | 
129 |         m = single_re.match(line[len(prefix):])
130 |         if m and len(m.groups()) == 1:
131 |             start_year = end_year = int(m.group(1))
132 |         else:
133 |             m = range_re.match(line[len(prefix):])
134 |             if m and len(m.groups()) == 2:
135 |                 start_year = int(m.group(1))
136 |                 end_year = int(m.group(2))
137 |             else:
138 |                 print("copyright year is not recognized for " + path + ": " + line)
139 |                 return False
140 | 
141 |         if start_year > FLAGS.year:
142 |             print("copyright start year greater than current year for " + path + ": " + line)
143 |             return False
144 |         if end_year > FLAGS.year:
145 |             print("copyright end year greater than current year for " + path + ": " + line)
146 |             return False
147 |         if end_year < start_year:
148 |             print("copyright start year greater than end year for " + path + ": " + line)
149 |             return False
150 | 
151 |         # Subsequent lines must match the copyright body.
152 |         copyright_body = [l.rstrip() for i, l in enumerate(COPYRIGHT.splitlines()) if i > 0]
153 |         copyright_idx = 0
154 |         for line in f:
155 |             if copyright_idx >= len(copyright_body):
156 |                 break
157 | 
158 |             if len(prefix) == 0:
159 |                 line = line.rstrip()
160 |             else:
161 |                 line = line.strip()
162 | 
163 |             if len(copyright_body[copyright_idx]) == 0:
164 |                 expected = prefix.strip()
165 |             else:
166 |                 expected = (prefix + copyright_body[copyright_idx])
167 |             if line != expected:
168 |                 print("incorrect copyright body for " + path)
169 |                 print("  expected: '" + expected + "'")
170 |                 print("       got: '" + line + "'")
171 |                 return False
172 |             copyright_idx += 1
173 | 
174 |         if copyright_idx != len(copyright_body):
175 |             print("missing " + str(len(copyright_body) - copyright_idx) +
176 |                   " lines of the copyright body")
177 |             return False
178 | 
179 |     if FLAGS.verbose:
180 |         print("copyright correct for " + path)
181 |     return True
182 | 
183 | if __name__ == '__main__':
184 |     parser = argparse.ArgumentParser()
185 |     parser.add_argument('-v', '--verbose', action="store_true", required=False, default=False,
186 |                         help='Enable verbose output')
187 |     parser.add_argument('-y', '--year', type=int, required=True,
188 |                         help='Copyright year')
189 |     parser.add_argument('paths', type=str, nargs='*', default=None,
190 |                         help='Directories or files to check')
191 |     FLAGS = parser.parse_args()
192 | 
193 |     if FLAGS.paths is None or len(FLAGS.paths) == 0:
194 |         parser.print_help()
195 |         exit(1)
196 | 
197 |     ret = True
198 |     for path in FLAGS.paths:
199 |         if not os.path.isdir(path):
200 |             if not visit(path):
201 |                 ret = False
202 |         else:
203 |             for root, dirs, files in os.walk(path):
204 |                 for name in files:
205 |                     if not visit(os.path.join(root, name)):
206 |                         ret = False
207 | 
208 |     exit(0 if ret else 1)
209 | 


--------------------------------------------------------------------------------
/qa/common/run_test.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding: utf-8 -*-
 3 | 
 4 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
 5 | #
 6 | # Licensed under the Apache License, Version 2.0 (the "License");
 7 | # you may not use this file except in compliance with the License.
 8 | # You may obtain a copy of the License at
 9 | #
10 | #     http://www.apache.org/licenses/LICENSE-2.0
11 | #
12 | # Unless required by applicable law or agreed to in writing, software
13 | # distributed under the License is distributed on an "AS IS" BASIS,
14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | # See the License for the specific language governing permissions and
16 | # limitations under the License.
17 | '''
18 | This is a test script to run the L0 tests.
19 | '''
20 | import unittest
21 | import sys
22 | 
23 | test_dirs = ["run_pyprof_nvtx", "run_pyprof_data"]
24 | 
25 | runner = unittest.TextTestRunner(verbosity=2)
26 | 
27 | errcode = 0
28 | 
29 | for test_dir in test_dirs:
30 |     suite = unittest.TestLoader().discover(test_dir)
31 | 
32 |     print("\nExecuting tests from " + test_dir)
33 | 
34 |     result = runner.run(suite)
35 | 
36 |     if not result.wasSuccessful():
37 |         errcode = 1
38 | 
39 | sys.exit(errcode)
40 | 


--------------------------------------------------------------------------------
/requirements/requirements.txt:
--------------------------------------------------------------------------------
1 | torch>=1.2.0
2 | cxxfilt>=0.2.0
3 | tqdm>=4.35.0
4 | numpy>=1.17.2
5 | 


--------------------------------------------------------------------------------
/requirements/requirements_nsys.txt:
--------------------------------------------------------------------------------
1 | nvidia-nsys-cli>=2020.4.1.117
2 | 


--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | [yapf]
16 | based_on_style=google
17 | 
18 | # Align closing bracket with visual indentation.
19 | align_closing_bracket_with_visual_indent = False
20 | 
21 | # Allow dictionary keys to exist on multiple lines.
22 | allow_multiline_dictionary_keys = True
23 | 
24 | # Allow lambdas to be formatted on more than one line.
25 | allow_multiline_lambdas = True
26 | 
27 | blank_line_before_nested_class_or_def = True
28 | 
29 | # Do not split consecutive brackets. Only relevant when DEDENT_CLOSING_BRACKETS is set
30 | coalesce_brackets = False
31 | 
32 | # The column limit.
33 | column_limit=120
34 | 
35 | # Put closing brackets on a separate line, dedented, if the bracketed
36 | # expression can't fit in a single line. Applies to all kinds of brackets,
37 | # including function definitions and calls. For example:
38 | #
39 | #   config = {
40 | #       'key1': 'value1',
41 | #       'key2': 'value2',
42 | #   }        # <--- this bracket is dedented and on a separate line
43 | #
44 | #   time_series = self.remote_client.query_entity_counters(
45 | #       entity='dev3246.region1',
46 | #       key='dns.query_latency_tcp',
47 | #       transform=Transformation.AVERAGE(window=timedelta(seconds=60)),
48 | #       start_ts=now()-timedelta(days=3),
49 | #       end_ts=now(),
50 | #   )        # <--- this bracket is dedented and on a separate line
51 | dedent_closing_brackets=True
52 | 
53 | # Place each dictionary entry onto its own line.
54 | each_dict_entry_on_separate_line = True
55 | 
56 | # The number of columns to use for indentation.
57 | indent_width = 4
58 | 
59 | # Join short lines into one line. E.g., single line if statements.
60 | join_multiple_lines = True
61 | 
62 | # Do not include spaces around selected binary operators.
63 | # Example: 1 + 2 * 3 - 4 / 5     =>     1 + 2*3 - 4/5
64 | no_spaces_around_selected_binary_operators = True
65 | 
66 | # Insert a space between the ending comma and closing bracket of a list, etc.
67 | space_between_ending_comma_and_closing_bracket = True
68 | 
69 | # Split before arguments if the argument list is terminated by a comma.
70 | split_arguments_when_comma_terminated = False
71 | 
72 | # If an argument / parameter list is going to be split, then split before the first argument.
73 | split_before_first_argument = True
74 | 
75 | # Split named assignments onto individual lines.
76 | split_before_named_assigns = False
77 | 
78 | split_penalty_after_opening_bracket = 500
79 | 
80 | split_penalty_for_added_line_split = 10


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding: utf-8 -*-
 3 | 
 4 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
 5 | #
 6 | # Licensed under the Apache License, Version 2.0 (the "License");
 7 | # you may not use this file except in compliance with the License.
 8 | # You may obtain a copy of the License at
 9 | #
10 | #     http://www.apache.org/licenses/LICENSE-2.0
11 | #
12 | # Unless required by applicable law or agreed to in writing, software
13 | # distributed under the License is distributed on an "AS IS" BASIS,
14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | # See the License for the specific language governing permissions and
16 | # limitations under the License.
17 | 
18 | import os
19 | from setuptools import setup, find_packages
20 | 
21 | with open('VERSION', 'r') as f:
22 |     version = f.read().strip()
23 |     if version.endswith("dev"):
24 |         version = version[:-3]
25 | 
26 | 
27 | def req_file(filename, folder="requirements"):
28 |     with open(os.path.join(folder, filename)) as f:
29 |         content = f.readlines()
30 |     # you may also want to remove whitespace characters
31 |     # Example: `\n` at the end of each line
32 |     return [x.strip() for x in content]
33 | 
34 | 
35 | install_requires = req_file("requirements.txt")
36 | 
37 | extras_require = {
38 |     # User packages
39 |     'nsys': req_file("requirements_nsys.txt"),
40 | }
41 | 
42 | 
43 | setup(
44 |     name='nvidia-pyprof',
45 |     version=version,
46 |     packages=find_packages(),
47 |     author="Aditya Agrawal,Marek Kolodziej",
48 |     author_email="aditya.iitb@gmail.com,mkolod@gmail.com",
49 |     maintainer="Elias Bermudez",
50 |     maintainer_email="dbermudez13@gmail.com",
51 |     url="https://github.com/NVIDIA/PyProf",
52 |     download_url="https://github.com/NVIDIA/PyProf",
53 |     license="BSD 3-Clause License",
54 |     description='NVIDIA Pytorch Profiler',
55 |     classifiers=[
56 |             'Development Status :: 5 - Production/Stable',
57 |             'Intended Audience :: Developers',
58 |             'Intended Audience :: Science/Research',
59 |             'Intended Audience :: Information Technology',
60 |             'Topic :: Scientific/Engineering',
61 |             'Topic :: Scientific/Engineering :: Artificial Intelligence',
62 |             'Topic :: Utilities',
63 |             'Programming Language :: Python :: 3',
64 |             'Programming Language :: Python :: 3.6',
65 |             'Environment :: Console',
66 |             'Natural Language :: English',
67 |             'Operating System :: OS Independent',
68 |     ],
69 |     keywords='nvidia, profiling, deep learning, ' \
70 |              'machine learning, supervised learning, ' \
71 |              'unsupervised learning, reinforcement learning, ',
72 |     platforms=["Linux"],
73 |     include_package_data=True,
74 |     install_requires=install_requires,
75 |     extras_require=extras_require,
76 | )
77 | 


--------------------------------------------------------------------------------