├── .dockerignore
├── .gitignore
├── CONTRIBUTING.md
├── Dockerfile
├── LICENSE
├── MANIFEST.in
├── README.rst
├── VERSION
├── docs
├── Makefile
├── _templates
│ └── layout.html
├── advanced.rst
├── conf.py
├── examples.rst
├── faqs.rst
├── index.rst
├── install.rst
├── profile.rst
└── quickstart.rst
├── pyprof
├── __init__.py
├── examples
│ ├── .gitignore
│ ├── custom_func_module
│ │ ├── README.md
│ │ ├── custom_function.py
│ │ ├── custom_module.py
│ │ └── test.sh
│ ├── imagenet
│ │ ├── imagenet.py
│ │ └── test.sh
│ ├── jit
│ │ ├── README.md
│ │ ├── jit_script_function.py
│ │ ├── jit_script_method.py
│ │ ├── jit_trace_function.py
│ │ ├── jit_trace_method.py
│ │ └── test.sh
│ ├── lenet.py
│ ├── operators.py
│ ├── simple.py
│ └── user_annotation
│ │ ├── README.md
│ │ ├── resnet.py
│ │ └── test.sh
├── nvtx
│ ├── __init__.py
│ └── nvmarker.py
├── parse
│ ├── __init__.py
│ ├── __main__.py
│ ├── db.py
│ ├── kernel.py
│ ├── nsight.py
│ ├── nvvp.py
│ └── parse.py
└── prof
│ ├── __init__.py
│ ├── __main__.py
│ ├── activation.py
│ ├── base.py
│ ├── blas.py
│ ├── conv.py
│ ├── convert.py
│ ├── data.py
│ ├── dropout.py
│ ├── dtype.py
│ ├── embedding.py
│ ├── index_slice_join_mutate.py
│ ├── linear.py
│ ├── loss.py
│ ├── memory.py
│ ├── misc.py
│ ├── normalization.py
│ ├── optim.py
│ ├── output.py
│ ├── pointwise.py
│ ├── pooling.py
│ ├── prof.py
│ ├── randomSample.py
│ ├── recurrentCell.py
│ ├── reduction.py
│ ├── softmax.py
│ ├── tc.py
│ ├── tensor.py
│ ├── usage.py
│ └── utility.py
├── qa
├── L0_docs
│ └── test.sh
├── L0_lenet
│ ├── test.sh
│ └── test_lenet.py
├── L0_nvtx
│ ├── __init__.py
│ ├── test.sh
│ └── test_pyprof_nvtx.py
├── L0_pyprof_data
│ ├── __init__.py
│ ├── test.sh
│ └── test_pyprof_data.py
└── common
│ ├── check_copyright.py
│ └── run_test.py
├── requirements
├── requirements.txt
└── requirements_nsys.txt
├── setup.cfg
└── setup.py
/.dockerignore:
--------------------------------------------------------------------------------
1 | .git*
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | __pycache__
2 | *.sql
3 | *.sqlite
4 | *.qdrep
5 | *.dict
6 | *.csv
7 | *.log
8 | *.pyc
9 |
10 | build/
11 | dist/
12 | nvidia_pyprof.egg-info/
13 |
--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
1 |
16 |
17 | # Contribution Rules
18 |
19 | - The code style convention is enforced by clang-format. See the
20 | Developer Guide for instructions on how to ensure your contributions
21 | conform. In general please follow the existing conventions in the
22 | relevant file, submodule, module, and project when you add new code
23 | or when you extend/fix existing functionality.
24 |
25 | - Avoid introducing unnecessary complexity into existing code so that
26 | maintainability and readability are preserved.
27 |
28 | - Try to keep pull requests (PRs) as concise as possible:
29 |
30 | - Avoid committing commented-out code.
31 |
32 | - Wherever possible, each PR should address a single concern. If
33 | there are several otherwise-unrelated things that should be fixed
34 | to reach a desired endpoint, it is perfectly fine to open several
35 | PRs and state in the description which PR depends on another
36 | PR. The more complex the changes are in a single PR, the more time
37 | it will take to review those changes.
38 |
39 | - Make sure that the build log is clean, meaning no warnings or
40 | errors should be present.
41 |
42 | - Make sure all `L0_*` tests pass:
43 |
44 | - In the `qa/` directory, there are basic sanity tests scripted in
45 | directories named `L0_...`. See the Testing section in the
46 | Developer Guide for instructions on running these tests.
47 |
48 | - PyProf's default build assumes recent versions of
49 | dependencies (CUDA, PyTorch, Nsight Systems, etc.). Contributions
50 | that add compatibility with older versions of those dependencies
51 | will be considered, but NVIDIA cannot guarantee that all possible
52 | build configurations work, are not broken by future contributions,
53 | and retain highest performance.
54 |
55 | - Make sure that you can contribute your work to open source (no
56 | license and/or patent conflict is introduced by your code). You need
57 | to [`sign`](#Sign) your commit.
58 |
59 | - Thanks in advance for your patience as we review your contributions;
60 | we do appreciate them!
61 |
62 | Sign Your Work
63 | --------------
64 |
65 | We require that all contributors "sign-off" on their commits. This
66 | certifies that the contribution is your original work, or you have
67 | rights to submit it under the same license, or a compatible license.
68 |
69 | Any contribution which contains commits that are not Signed-Off will
70 | not be accepted.
71 |
72 | To sign off on a commit you simply use the `--signoff` (or `-s`)
73 | option when committing your changes:
74 |
75 | $ git commit -s -m "Add cool feature."
76 |
77 | This will append the following to your commit message:
78 |
79 | Signed-off-by: Your Name
80 |
81 | By doing this you certify the below:
82 |
83 | Developer Certificate of Origin
84 | Version 1.1
85 |
86 | Copyright (C) 2004, 2006 The Linux Foundation and its contributors.
87 | 1 Letterman Drive
88 | Suite D4700
89 | San Francisco, CA, 94129
90 |
91 | Everyone is permitted to copy and distribute verbatim copies of
92 | this license document, but changing it is not allowed.
93 |
94 |
95 | Developer's Certificate of Origin 1.1
96 |
97 | By making a contribution to this project, I certify that:
98 |
99 | (a) The contribution was created in whole or in part by me and I
100 | have the right to submit it under the open source license
101 | indicated in the file; or
102 |
103 | (b) The contribution is based upon previous work that, to the best
104 | of my knowledge, is covered under an appropriate open source
105 | license and I have the right under that license to submit that
106 | work with modifications, whether created in whole or in part by
107 | me, under the same open source license (unless I am permitted to
108 | submit under a different license), as indicated in the file; or
109 |
110 | (c) The contribution was provided directly to me by some other
111 | person who certified (a), (b) or (c) and I have not modified it.
112 |
113 | (d) I understand and agree that this project and the contribution
114 | are public and that a record of the contribution (including all
115 | personal information I submit with it, including my sign-off) is
116 | maintained indefinitely and may be redistributed consistent with
117 | this project or the open source license(s) involved.
--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | ARG BASE_IMAGE=nvcr.io/nvidia/pytorch:21.04-py3
16 |
17 | ############################################################################
18 | ## Install PyProf
19 | ############################################################################
20 | FROM $BASE_IMAGE
21 |
22 | ARG PYPROF_VERSION=3.11.0dev
23 | ARG PYPROF_CONTAINER_VERSION=21.06dev
24 |
25 | # Copy entire repo into container even though some is not needed for the
26 | # build itself... because we want to be able to copyright check on
27 | # files that aren't directly needed for build.
28 | WORKDIR /opt/pytorch/pyprof
29 | RUN rm -fr *
30 | COPY . .
31 |
32 | RUN pip uninstall -y pyprof
33 | RUN pip install --no-cache-dir .
34 |
35 | # Generating the docs requires the docs source so copy that into the L0_docs so
36 | # that it is available when the test runs.
37 | RUN cp VERSION qa/L0_docs/. && \
38 | cp README.rst qa/L0_docs/. && \
39 | cp -r docs qa/L0_docs/.
40 |
41 |
--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include *.py
2 | recursive-include pyprof *
3 |
--------------------------------------------------------------------------------
/README.rst:
--------------------------------------------------------------------------------
1 | ..
2 | # Copyright (c) 2020-2021, NVIDIA CORPORATION. All rights reserved.
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
16 | |License|
17 |
18 | PyProf - PyTorch Profiling tool
19 | ===============================
20 |
21 | **ANNOUNCEMENT:**
22 | On June 30th 2021, NVIDIA will no longer make contributions to the PyProf repository.
23 |
24 | To profile models in PyTorch, please use `NVIDIA Deep Learning Profiler (DLProf) `_
25 |
26 | DLProf can help data scientists, engineers, and researchers understand and improve performance of their models by analyzing text reports or visualizing the reports in a web browser with the DLProf Viewer
27 |
28 | DLProf is available on NGC or as a python PIP wheel installation.
29 |
30 | To look for continued development on PyProf, please use https://github.com/adityaiitb/PyProf
31 |
32 | .. overview-begin-marker-do-not-remove
33 |
34 | PyProf is a tool that profiles and analyzes the GPU performance of PyTorch
35 | models. PyProf aggregates kernel performance from `Nsight Systems
36 | `_ or `NvProf
37 | `_ and provides the
38 | following additional features:
39 |
40 | * Identifies the layer that launched a kernel: e.g. the association of
41 | `ComputeOffsetsKernel` with a concrete PyTorch layer or API is not obvious.
42 |
43 | * Identifies the tensor dimensions and precision: without knowing the tensor
44 | dimensions and precision, it's impossible to reason about whether the actual
45 | (silicon) kernel time is close to maximum performance of such a kernel on
46 | the GPU. Knowing the tensor dimensions and precision, we can figure out the
47 | FLOPs and bandwidth required by a layer, and then determine how close to
48 | maximum performance the kernel is for that operation.
49 |
50 | * Forward-backward correlation: PyProf determines what the forward pass step
51 | is that resulted in the particular weight and data gradients (wgrad, dgrad),
52 | which makes it possible to determine the tensor dimensions required by these
53 | backprop steps to assess their performance.
54 |
55 | * Determines Tensor Core usage: PyProf can highlight the kernels that use
56 | `Tensor Cores `_.
57 |
58 | * Correlate the line in the user's code that launched a particular kernel (program trace).
59 |
60 | .. overview-end-marker-do-not-remove
61 |
62 | The current release of PyProf is 3.10.0 and is available in the 21.04 release of
63 | the PyTorch container on `NVIDIA GPU Cloud (NGC) `_. The
64 | branch for this release is `r21.04
65 | `_.
66 |
67 | Quick Installation Instructions
68 | -------------------------------
69 |
70 | .. quick-install-start-marker-do-not-remove
71 |
72 | * Clone the git repository ::
73 |
74 | $ git clone https://github.com/NVIDIA/PyProf.git
75 |
76 | * Navigate to the top level PyProf directory
77 |
78 | * Install PyProf ::
79 |
80 | $ pip install .
81 |
82 | * Verify installation is complete with pip list ::
83 |
84 | $ pip list | grep pyprof
85 |
86 | * Should display ::
87 |
88 | pyprof 3.11.0.dev0
89 |
90 | .. quick-install-end-marker-do-not-remove
91 |
92 | Quick Start Instructions
93 | ------------------------
94 |
95 | .. quick-start-start-marker-do-not-remove
96 |
97 | * Add the following lines to the PyTorch network you want to profile: ::
98 |
99 | import torch.cuda.profiler as profiler
100 | import pyprof
101 | pyprof.init()
102 |
103 | * Profile with NVProf or Nsight Systems to generate a SQL file. ::
104 |
105 | $ nsys profile -f true -o net --export sqlite python net.py
106 |
107 | * Run the parse.py script to generate the dictionary. ::
108 |
109 | $ python -m pyprof.parse net.sqlite > net.dict
110 |
111 | * Run the prof.py script to generate the reports. ::
112 |
113 | $ python -m pyprof.prof --csv net.dict
114 |
115 | .. quick-start-end-marker-do-not-remove
116 |
117 | Documentation
118 | -------------
119 |
120 | The User Guide can be found in the
121 | `documentation for current release
122 | `_, and
123 | provides instructions on how to install and profile with PyProf.
124 |
125 | A complete `Quick Start Guide `_
126 | provides step-by-step instructions to get you quickly started using PyProf.
127 |
128 | An `FAQ `_ provides
129 | answers for frequently asked questions.
130 |
131 | The `Release Notes
132 | `_
133 | indicate the required versions of the NVIDIA Driver and CUDA, and also describe
134 | which GPUs are supported by PyProf
135 |
136 | Presentation and Papers
137 | ^^^^^^^^^^^^^^^^^^^^^^^
138 |
139 | * `Automating End-toEnd PyTorch Profiling `_.
140 | * `Presentation slides `_.
141 |
142 | Contributing
143 | ------------
144 |
145 | Contributions to PyProf are more than welcome. To
146 | contribute make a pull request and follow the guidelines outlined in
147 | the `Contributing `_ document.
148 |
149 | Reporting problems, asking questions
150 | ------------------------------------
151 |
152 | We appreciate any feedback, questions or bug reporting regarding this
153 | project. When help with code is needed, follow the process outlined in
154 | the Stack Overflow (https://stackoverflow.com/help/mcve)
155 | document. Ensure posted examples are:
156 |
157 | * minimal – use as little code as possible that still produces the
158 | same problem
159 |
160 | * complete – provide all parts needed to reproduce the problem. Check
161 | if you can strip external dependency and still show the problem. The
162 | less time we spend on reproducing problems the more time we have to
163 | fix it
164 |
165 | * verifiable – test the code you're about to provide to make sure it
166 | reproduces the problem. Remove all other problems that are not
167 | related to your request/question.
168 |
169 | .. |License| image:: https://img.shields.io/badge/License-Apache2-green.svg
170 | :target: http://www.apache.org/licenses/LICENSE-2.0
171 |
--------------------------------------------------------------------------------
/VERSION:
--------------------------------------------------------------------------------
1 | 3.11.0dev
2 |
--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | # Makefile for Sphinx documentation
16 | #
17 |
18 | # You can set these variables from the command line.
19 | SPHINXOPTS =
20 | SPHINXBUILD = sphinx-build
21 | SPHINXPROJ = PyProf
22 | SOURCEDIR = .
23 | BUILDDIR = build
24 |
25 | # Put it first so that "make" without argument is like "make help".
26 | help:
27 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(O)
28 |
29 | clean:
30 | @rm -fr $(BUILDDIR)
31 |
32 | # Catch-all target: route all unknown targets to Sphinx using the new
33 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS).
34 | %: Makefile
35 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
36 |
37 | .PHONY: help clean Makefile
38 |
--------------------------------------------------------------------------------
/docs/_templates/layout.html:
--------------------------------------------------------------------------------
1 |
16 | {% extends "!layout.html" %}
17 | {% block sidebartitle %} {{ super() }}
18 |
19 |
47 | {% endblock %}
48 |
49 | {% block footer %} {{ super() }}
50 |
51 |
66 | {% endblock %}
67 |
--------------------------------------------------------------------------------
/docs/advanced.rst:
--------------------------------------------------------------------------------
1 | ..
2 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
16 | Advanced PyProf Usage
17 | =====================
18 |
19 | This section demonstrates some advanced techniques to get even more from your
20 | PyProf profiles.
21 |
22 | .. _section-layer-annotation:
23 |
24 | Layer Annotation
25 | ----------------
26 |
27 | Adding custom NVTX ranges to the model layers will allow PyProf to aggregate
28 | profile results based on the ranges. ::
29 |
30 | # examples/user_annotation/resnet.py
31 | # Use the “layer:” prefix
32 |
33 | class Bottleneck(nn.Module):
34 | def forward(self, x):
35 | nvtx.range_push("layer:Bottleneck_{}".format(self.id)) # NVTX push marker
36 |
37 | nvtx.range_push("layer:Conv1") # Nested NVTX push/pop markers
38 | out = self.conv1(x)
39 | nvtx.range_pop()
40 |
41 | nvtx.range_push("layer:BN1") # Use the “layer:” prefix
42 | out = self.bn1(out)
43 | nvtx.range_pop()
44 |
45 | nvtx.range_push("layer:ReLU")
46 | out = self.relu(out)
47 | nvtx.range_pop()
48 |
49 | ...
50 |
51 | nvtx.range_pop() # NVTX pop marker.return out
52 |
53 | .. _section-custom-function:
54 |
55 | Custom Function
56 | ---------------
57 |
58 | The following is example of how to enable Torch Autograd to profile a custom
59 | function. ::
60 |
61 | # examples/custom_func_module/custom_function.py
62 |
63 | import torch
64 | import pyprof
65 | pyprof.init()
66 |
67 | class Foo(torch.autograd.Function):
68 | @staticmethoddef forward(ctx, in1, in2):
69 | out = in1 + in2 # This could be a custom C++ function
70 | return out
71 | @staticmethod
72 | def backward(ctx, grad):
73 | in1_grad, in2_grad = grad, grad # This could be a custom C++ function
74 | return in1_grad, in2_grad
75 |
76 | # Hook the forward and backward functions to pyprof
77 | pyprof.wrap(Foo, 'forward')
78 | pyprof.wrap(Foo, 'backward')
79 |
80 | .. _section-custom-module:
81 |
82 | Custom Module
83 | ---------------
84 |
85 | The following is example of how to enable Torch Autograd to profile a custom
86 | module. ::
87 |
88 | # examples/custom_func_module/custom_module.py
89 |
90 | import torch
91 | import pyprof
92 | pyprof.init()
93 |
94 | class Foo(torch.nn.Module):
95 | def __init__(self, size):
96 | super(Foo, self).__init__()
97 | self.n = torch.nn.Parameter(torch.ones(size))
98 | self.m = torch.nn.Parameter(torch.ones(size))
99 |
100 | def forward(self, input):
101 | return self.n*input + self.m # This could be a custom C++ function.
102 |
103 | # Hook the forward function to pyprof
104 | pyprof.wrap(Foo, 'forward')
105 |
106 | Extensibility
107 | -------------
108 |
109 | * For custom functions and modules, users can add flops and bytes calculation
110 |
111 | * Python code is easy to extend - no need to recompile, no need to change the
112 | PyTorch backend and resolve merge conflicts on every version upgrade
113 |
114 | Actionable Items
115 | ----------------
116 |
117 | The following list provides some common actionable items to consider when
118 | analyzing profile results and deciding on how best to improve the performance.
119 | For more customized and directed actionable items, consider using the `NVIDIA
120 | Deep Learning Profiler `_
121 | that provide direct *Expert Systems* feedback based on the profile.
122 |
123 | * NvProf/ NsightSystems tell us what the hotspots are, but not if we can act on
124 | them.
125 |
126 | * If a kernel runs close to max perf based on FLOPs and bytes (and maximum FLOPs
127 | and bandwidth of the GPU), then there’s no point in optimizing it even if it’s
128 | a hotspot.
129 |
130 | * If the ideal timing based on FLOPs and bytes (max(compute_time,
131 | bandwidth_time)) is much shorter than the silicon time, there’s scope for
132 | improvement.
133 |
134 | * Tensor Core usage (conv): for Volta, convolutions should have the input
135 | channel count (C) and the output channel count (K) divisible by 8, in order to
136 | use tensor cores. For Turing, it’s optimal for C and K to be divisible by 16.
137 |
138 | * Tensor core usage (GEMM): M, N and K divisible by 8 (Volta) or 16 (Turing) (https://docs.nvidia.com/deeplearning/sdk/dl-performance-guide/index.html)
139 |
--------------------------------------------------------------------------------
/docs/examples.rst:
--------------------------------------------------------------------------------
1 | ..
2 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
16 | .. _section-examples:
17 |
18 | Examples
19 | ========
20 |
21 | This section provides several real examples on how to profile with PyPRrof.
22 |
23 | Profile Lenet
24 | -------------
25 |
26 | Navigate to the lenet example. ::
27 |
28 | $ cd pyprof/examples
29 |
30 | Run nsight systems to profile the network. ::
31 |
32 | $ nsys profile -f true -o lenet --export sqlite python lenet.py
33 |
34 | Parse the resulting lenet.sqlite database. ::
35 |
36 | $ python -m pyprof.parse lenet.sqlite > lenet.dict
37 |
38 | Run the prof script on the resulting dictionary. ::
39 |
40 | $ python -m pyprof.prof --csv lenet.dict > lenet.csv
41 |
--------------------------------------------------------------------------------
/docs/faqs.rst:
--------------------------------------------------------------------------------
1 | ..
2 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
16 | .. _section-faqs:
17 |
18 | PyProf FAQs
19 | ===========
20 |
21 | **How do I intercept the Adam optimizer in APEX?** ::
22 |
23 | import pyprof
24 | import fused_adam_cuda
25 | pyprof.nvtx.wrap(fused_adam_cuda, 'adam')
26 |
27 | **What is the correct initialization if you are using JIT and/or AMP?**
28 |
29 | #. Let any JIT to finish.
30 | #. Initlialize pyprof ``pyprof.init()``.
31 | #. Initialize AMP.
32 |
33 | **How do I profile with ``torch.distributed.launch``?** ::
34 |
35 | nvprof -f -o net%p.sql --profile-from-start off --profile-child-processes \
36 | python -m torch.distributed.launch net.py
37 |
--------------------------------------------------------------------------------
/docs/index.rst:
--------------------------------------------------------------------------------
1 | ..
2 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
16 | NVIDIA PyProf - Pytorch Profiler
17 | ================================
18 |
19 | .. include:: ../README.rst
20 | :start-after: overview-begin-marker-do-not-remove
21 | :end-before: overview-end-marker-do-not-remove
22 |
23 | .. toctree::
24 | :hidden:
25 |
26 | Documentation home
27 |
28 | .. toctree::
29 | :maxdepth: 2
30 | :caption: User Guide
31 |
32 | quickstart
33 | install
34 | profile
35 | advanced
36 | examples
37 | faqs
--------------------------------------------------------------------------------
/docs/install.rst:
--------------------------------------------------------------------------------
1 | ..
2 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
16 | .. _section-install:
17 |
18 | Installing PyProf
19 | =================
20 |
21 | PyProf is available from GitHub.
22 |
23 | .. _section-installing-from-github:
24 |
25 | Installing from GitHub
26 | ----------------------
27 |
28 | .. include:: ../README.rst
29 | :start-after: quick-install-start-marker-do-not-remove
30 | :end-before: quick-install-end-marker-do-not-remove
31 |
32 | .. _section-installing-from-ngc:
33 |
34 | Install from NGC Container
35 | --------------------------
36 |
37 | PyProf is available in the PyTorch container on the `NVIDIA GPU Cloud (NGC)
38 | `_.
39 |
40 | Before you can pull a container from the NGC container registry, you
41 | must have Docker and nvidia-docker installed. For DGX users, this is
42 | explained in `Preparing to use NVIDIA Containers Getting Started Guide
43 | `_.
44 | For users other than DGX, follow the `nvidia-docker installation
45 | documentation `_ to install
46 | the most recent version of CUDA, Docker, and nvidia-docker.
47 |
48 | After performing the above setup, you can pull the PyProf container
49 | using the following command::
50 |
51 | docker pull nvcr.io/nvidia/pytorch:20.12-py3
52 |
53 | Replace *20.12* with the version of PyTorch container that you want to pull.
54 |
--------------------------------------------------------------------------------
/docs/quickstart.rst:
--------------------------------------------------------------------------------
1 | ..
2 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
16 | .. _section-quickstart:
17 |
18 | Quickstart
19 | ==========
20 |
21 | PyProf is available in the following ways:
22 |
23 | * As :ref:`installable python code located in GitHub `.
24 |
25 | * As a pre-built Docker container available from the `NVIDIA GPU Cloud (NGC)
26 | `_. For more information, see :ref:`section-installing-from-ngc`.
27 |
28 | * As a buildable docker container. You can :ref:`build your
29 | own container using Docker `
30 |
31 | .. _section-quickstart-prerequisites:
32 |
33 | Prerequisites
34 | -------------
35 |
36 | * If you are installing directly from GitHub or building your own docker
37 | container, you will need to clone the PyProf GitHub repo. Go to
38 | https://github.com/NVIDIA/PyProf and then select the *clone* or *download*
39 | drop down button. After cloning the repo be sure to select the r
40 | release branch that corresponds to the version of PyProf want to use::
41 |
42 | $ git checkout r20.12
43 |
44 | * If you are starting with a pre-built NGC container, you will need to install
45 | Docker and nvidia-docker. For DGX users, see `Preparing to use NVIDIA Containers
46 | `_.
47 | For users other than DGX, see the `nvidia-docker installation documentation
48 | `_.
49 |
50 | .. _section-quickstart-using-a-prebuilt-docker-container:
51 |
52 | Using a Prebuilt Docker Containers
53 | ----------------------------------
54 |
55 | Use docker pull to get the PyTorch container from NGC::
56 |
57 | $ docker pull nvcr.io/nvidia/pytorch:-py3
58 |
59 | Where is the version of PyProf that you want to pull. Once you have the
60 | container, you can run the container with the following command::
61 |
62 | $ docker run --gpus=1 --rm --shm-size=1g --ulimit memlock=-1 --ulimit stack=67108864 -v/full/path/to/example/model/repository:/models
63 |
64 | Where is *nvcr.io/nvidia/pytorch:-py3*.
65 |
66 | .. _section-quickstart-building-with-docker:
67 |
68 | Building With Docker
69 | --------------------
70 |
71 | Make sure you complete the step in
72 | :ref:`section-quickstart-prerequisites` before attempting to build the PyProf
73 | container. To build PyProf from source, change to the root directory of
74 | the GitHub repo and checkout the release version of the branch that
75 | you want to build (or the `main` branch if you want to build the
76 | under-development version)::
77 |
78 | $ git checkout r20.12
79 |
80 | Then use docker to build::
81 |
82 | $ docker build --pull -t pyprof .
83 |
84 | After the build completes you can run the container with the following command::
85 |
86 | $ docker run --gpus=1 --rm --shm-size=1g --ulimit memlock=-1 --ulimit stack=67108864 -v/full/path/to/example/model/repository:/models
87 |
88 | Where is *pyprof*.
89 |
90 | .. _section-quickstart-profile-with-pyprof:
91 |
92 | Profile with PyProf
93 | -------------------
94 |
95 | .. include:: ../README.rst
96 | :start-after: quick-start-start-marker-do-not-remove
97 | :end-before: quick-start-end-marker-do-not-remove
98 |
--------------------------------------------------------------------------------
/pyprof/__init__.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # -*- coding: utf-8 -*-
3 |
4 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
5 | #
6 | # Licensed under the Apache License, Version 2.0 (the "License");
7 | # you may not use this file except in compliance with the License.
8 | # You may obtain a copy of the License at
9 | #
10 | # http://www.apache.org/licenses/LICENSE-2.0
11 | #
12 | # Unless required by applicable law or agreed to in writing, software
13 | # distributed under the License is distributed on an "AS IS" BASIS,
14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | # See the License for the specific language governing permissions and
16 | # limitations under the License.
17 |
18 | import warnings
19 |
20 | from .nvtx.nvmarker import init
21 | from .nvtx.nvmarker import add_wrapper as wrap
22 |
--------------------------------------------------------------------------------
/pyprof/examples/.gitignore:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | __pycache__
16 | *.sql
17 | *.dict
18 | *.csv
19 |
--------------------------------------------------------------------------------
/pyprof/examples/custom_func_module/README.md:
--------------------------------------------------------------------------------
1 |
16 |
17 | This directory has examples which show how to intercept (monkey patch) custom
18 | functions and modules with `pyprof`. No changes are required in `pyprof/parse`,
19 | however, users can add support for bytes and flops calculation for custom
20 | functions and modules in `pyprof/prof` by extending the `OperatorLayerBase`
21 | class.
22 |
--------------------------------------------------------------------------------
/pyprof/examples/custom_func_module/custom_function.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # -*- coding: utf-8 -*-
3 |
4 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
5 | #
6 | # Licensed under the Apache License, Version 2.0 (the "License");
7 | # you may not use this file except in compliance with the License.
8 | # You may obtain a copy of the License at
9 | #
10 | # http://www.apache.org/licenses/LICENSE-2.0
11 | #
12 | # Unless required by applicable law or agreed to in writing, software
13 | # distributed under the License is distributed on an "AS IS" BASIS,
14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | # See the License for the specific language governing permissions and
16 | # limitations under the License.
17 |
18 | import torch
19 | import torch.cuda.profiler as profiler
20 | import pyprof
21 | #Initialize pyprof
22 | pyprof.init()
23 |
24 |
25 | class Foo(torch.autograd.Function):
26 |
27 | @staticmethod
28 | def forward(ctx, in1, in2):
29 | out = in1 + in2 #This could be a custom C/C++ function.
30 | return out
31 |
32 | @staticmethod
33 | def backward(ctx, grad):
34 | in1_grad = grad #This could be a custom C/C++ function.
35 | in2_grad = grad #This could be a custom C/C++ function.
36 | return in1_grad, in2_grad
37 |
38 |
39 | #Hook the forward and backward functions to pyprof
40 | pyprof.nvtx.wrap(Foo, 'forward')
41 | pyprof.nvtx.wrap(Foo, 'backward')
42 |
43 | foo = Foo.apply
44 |
45 | x = torch.ones(4, 4).cuda()
46 | y = torch.ones(4, 4).cuda()
47 |
48 | with torch.autograd.profiler.emit_nvtx():
49 | profiler.start()
50 | z = foo(x, y)
51 | profiler.stop()
52 |
--------------------------------------------------------------------------------
/pyprof/examples/custom_func_module/custom_module.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # -*- coding: utf-8 -*-
3 |
4 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
5 | #
6 | # Licensed under the Apache License, Version 2.0 (the "License");
7 | # you may not use this file except in compliance with the License.
8 | # You may obtain a copy of the License at
9 | #
10 | # http://www.apache.org/licenses/LICENSE-2.0
11 | #
12 | # Unless required by applicable law or agreed to in writing, software
13 | # distributed under the License is distributed on an "AS IS" BASIS,
14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | # See the License for the specific language governing permissions and
16 | # limitations under the License.
17 |
18 | import torch
19 | import torch.cuda.profiler as profiler
20 | import pyprof
21 | pyprof.init()
22 |
23 |
24 | class Foo(torch.nn.Module):
25 |
26 | def __init__(self, size):
27 | super(Foo, self).__init__()
28 | self.n = torch.nn.Parameter(torch.ones(size))
29 | self.m = torch.nn.Parameter(torch.ones(size))
30 |
31 | def forward(self, input):
32 | return self.n * input + self.m
33 |
34 |
35 | # Hook the forward function to pyprof
36 | pyprof.nvtx.wrap(Foo, 'forward')
37 |
38 | foo = Foo(4)
39 | foo.cuda()
40 | x = torch.ones(4).cuda()
41 |
42 | with torch.autograd.profiler.emit_nvtx():
43 | profiler.start()
44 | z = foo(x)
45 | profiler.stop()
46 |
--------------------------------------------------------------------------------
/pyprof/examples/custom_func_module/test.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
16 | set -e
17 |
18 | SCRIPT=`realpath $0`
19 | SCRIPTPATH=`dirname $SCRIPT`
20 | PYPROF="$SCRIPTPATH/../.."
21 |
22 | parse="python $PYPROF/parse/parse.py"
23 | prof="python $PYPROF/prof/prof.py"
24 |
25 | for f in *.py
26 | do
27 | base=`basename $f .py`
28 | sql=$base.sql
29 | dict=$base.dict
30 |
31 | #NVprof
32 | echo "nvprof -fo $sql python $f"
33 | nvprof -fo $sql python $f
34 |
35 | #Parse
36 | echo $parse $sql
37 | $parse $sql > $dict
38 |
39 | #Prof
40 | echo $prof $dict
41 | $prof -w 130 $dict
42 | \rm $sql $dict
43 | done
44 |
--------------------------------------------------------------------------------
/pyprof/examples/imagenet/imagenet.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # -*- coding: utf-8 -*-
3 |
4 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
5 | #
6 | # Licensed under the Apache License, Version 2.0 (the "License");
7 | # you may not use this file except in compliance with the License.
8 | # You may obtain a copy of the License at
9 | #
10 | # http://www.apache.org/licenses/LICENSE-2.0
11 | #
12 | # Unless required by applicable law or agreed to in writing, software
13 | # distributed under the License is distributed on an "AS IS" BASIS,
14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | # See the License for the specific language governing permissions and
16 | # limitations under the License.
17 | """
18 | Example to run pyprof with imagenet models.
19 | """
20 |
21 | import sys
22 | import torch
23 | import torch.nn as nn
24 | import torchvision.models as models
25 | import torch.cuda.profiler as profiler
26 | import argparse
27 |
28 | import pyprof
29 | from apex.optimizers import FusedAdam
30 |
31 |
32 | def parseArgs():
33 | parser = argparse.ArgumentParser(prog=sys.argv[0], description="Run popular imagenet models.")
34 |
35 | parser.add_argument(
36 | "-m", type=str, default="resnet50", choices=[
37 | "alexnet", "densenet121", "densenet161", "densenet169", "densenet201", "googlenet", "mnasnet0_5",
38 | "mnasnet0_75", "mnasnet1_0", "mnasnet1_3", "mobilenet_v2", "resnet18", "resnet34", "resnet50", "resnet101",
39 | "resnet152", "resnext50_32x4d", "resnext101_32x8d", "wide_resnet50_2", "wide_resnet101_2",
40 | "shufflenet_v2_x0_5", "shufflenet_v2_x1_0", "shufflenet_v2_x1_5", "shufflenet_v2_x2_0", "squeezenet1_0",
41 | "squeezenet1_1", "vgg11", "vgg11_bn", "vgg13", "vgg13_bn", "vgg16", "vgg16_bn", "vgg19", "vgg19_bn",
42 | "inception_v3"
43 | ], help="Model."
44 | )
45 |
46 | parser.add_argument("-b", type=int, default=32, help="Batch size.")
47 |
48 | parser.add_argument("-o", type=str, default="adam", choices=["adam", "sgd"], help="Optimizer.")
49 |
50 | args = parser.parse_args()
51 | return args
52 |
53 |
54 | d = {
55 | "alexnet": {
56 | 'H': 224,
57 | 'W': 224,
58 | 'opts': {}
59 | },
60 | "densenet121": {
61 | 'H': 224,
62 | 'W': 224,
63 | 'opts': {}
64 | },
65 | "densenet161": {
66 | 'H': 224,
67 | 'W': 224,
68 | 'opts': {}
69 | },
70 | "densenet169": {
71 | 'H': 224,
72 | 'W': 224,
73 | 'opts': {}
74 | },
75 | "densenet201": {
76 | 'H': 224,
77 | 'W': 224,
78 | 'opts': {}
79 | },
80 | "googlenet": {
81 | 'H': 224,
82 | 'W': 224,
83 | 'opts': {
84 | 'aux_logits': False
85 | }
86 | },
87 | "mnasnet0_5": {
88 | 'H': 224,
89 | 'W': 224,
90 | 'opts': {}
91 | },
92 | "mnasnet0_75": {
93 | 'H': 224,
94 | 'W': 224,
95 | 'opts': {}
96 | },
97 | "mnasnet1_0": {
98 | 'H': 224,
99 | 'W': 224,
100 | 'opts': {}
101 | },
102 | "mnasnet1_3": {
103 | 'H': 224,
104 | 'W': 224,
105 | 'opts': {}
106 | },
107 | "mobilenet_v2": {
108 | 'H': 224,
109 | 'W': 224,
110 | 'opts': {}
111 | },
112 | "resnet18": {
113 | 'H': 224,
114 | 'W': 224,
115 | 'opts': {}
116 | },
117 | "resnet34": {
118 | 'H': 224,
119 | 'W': 224,
120 | 'opts': {}
121 | },
122 | "resnet50": {
123 | 'H': 224,
124 | 'W': 224,
125 | 'opts': {}
126 | },
127 | "resnet101": {
128 | 'H': 224,
129 | 'W': 224,
130 | 'opts': {}
131 | },
132 | "resnet152": {
133 | 'H': 224,
134 | 'W': 224,
135 | 'opts': {}
136 | },
137 | "resnext50_32x4d": {
138 | 'H': 224,
139 | 'W': 224,
140 | 'opts': {}
141 | },
142 | "resnext101_32x8d": {
143 | 'H': 224,
144 | 'W': 224,
145 | 'opts': {}
146 | },
147 | "wide_resnet50_2": {
148 | 'H': 224,
149 | 'W': 224,
150 | 'opts': {}
151 | },
152 | "wide_resnet101_2": {
153 | 'H': 224,
154 | 'W': 224,
155 | 'opts': {}
156 | },
157 | "shufflenet_v2_x0_5": {
158 | 'H': 224,
159 | 'W': 224,
160 | 'opts': {}
161 | },
162 | "shufflenet_v2_x1_0": {
163 | 'H': 224,
164 | 'W': 224,
165 | 'opts': {}
166 | },
167 | "shufflenet_v2_x1_5": {
168 | 'H': 224,
169 | 'W': 224,
170 | 'opts': {}
171 | },
172 | "shufflenet_v2_x2_0": {
173 | 'H': 224,
174 | 'W': 224,
175 | 'opts': {}
176 | },
177 | "squeezenet1_0": {
178 | 'H': 224,
179 | 'W': 224,
180 | 'opts': {}
181 | },
182 | "squeezenet1_1": {
183 | 'H': 224,
184 | 'W': 224,
185 | 'opts': {}
186 | },
187 | "vgg11": {
188 | 'H': 224,
189 | 'W': 224,
190 | 'opts': {}
191 | },
192 | "vgg11_bn": {
193 | 'H': 224,
194 | 'W': 224,
195 | 'opts': {}
196 | },
197 | "vgg13": {
198 | 'H': 224,
199 | 'W': 224,
200 | 'opts': {}
201 | },
202 | "vgg13_bn": {
203 | 'H': 224,
204 | 'W': 224,
205 | 'opts': {}
206 | },
207 | "vgg16": {
208 | 'H': 224,
209 | 'W': 224,
210 | 'opts': {}
211 | },
212 | "vgg16_bn": {
213 | 'H': 224,
214 | 'W': 224,
215 | 'opts': {}
216 | },
217 | "vgg19": {
218 | 'H': 224,
219 | 'W': 224,
220 | 'opts': {}
221 | },
222 | "vgg19_bn": {
223 | 'H': 224,
224 | 'W': 224,
225 | 'opts': {}
226 | },
227 | "inception_v3": {
228 | 'H': 299,
229 | 'W': 299,
230 | 'opts': {
231 | 'aux_logits': False
232 | }
233 | },
234 | }
235 |
236 |
237 | def main():
238 | args = parseArgs()
239 |
240 | pyprof.init()
241 |
242 | N = args.b
243 | C = 3
244 | H = d[args.m]['H']
245 | W = d[args.m]['W']
246 | opts = d[args.m]['opts']
247 | classes = 1000
248 |
249 | net = getattr(models, args.m)
250 | net = net(**opts).cuda().half()
251 | net.train()
252 |
253 | x = torch.rand(N, C, H, W).cuda().half()
254 | target = torch.empty(N, dtype=torch.long).random_(classes).cuda()
255 |
256 | criterion = nn.CrossEntropyLoss().cuda()
257 | if (args.o == "sgd"):
258 | optimizer = torch.optim.SGD(net.parameters(), lr=0.01, momentum=0.9)
259 | elif (args.o == "adam"):
260 | optimizer = FusedAdam(net.parameters())
261 | else:
262 | assert False
263 |
264 | #Warm up without profiler
265 | for i in range(2):
266 | output = net(x)
267 | loss = criterion(output, target)
268 | optimizer.zero_grad()
269 | loss.backward()
270 | optimizer.step()
271 |
272 | with torch.autograd.profiler.emit_nvtx():
273 | profiler.start()
274 | output = net(x)
275 | loss = criterion(output, target)
276 | optimizer.zero_grad()
277 | loss.backward()
278 | optimizer.step()
279 | profiler.stop()
280 |
281 |
282 | if __name__ == "__main__":
283 | main()
284 |
--------------------------------------------------------------------------------
/pyprof/examples/imagenet/test.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
16 | set -e
17 |
18 | SCRIPT=`realpath $0`
19 | SCRIPTPATH=`dirname $SCRIPT`
20 | PYPROF="$SCRIPTPATH/../.."
21 |
22 | parse="python $PYPROF/parse/parse.py"
23 | prof="python $PYPROF/prof/prof.py"
24 |
25 | for net in "resnet50"
26 | do
27 | for optim in adam sgd
28 | do
29 | for batch in 32 64
30 | do
31 | base="torchvision".$net.$optim.$batch
32 | sql=$base.sql
33 | dict=$base.dict
34 |
35 | #NVprof
36 | echo "nvprof -fo $sql --profile-from-start off python imagenet.py -m ${net} -o $optim -b $batch"
37 | sudo env "PATH=$PATH" nvprof -fo $sql --profile-from-start off python imagenet.py -m ${net} -o $optim -b $batch
38 |
39 | #Parse
40 | echo $parse $sql
41 | $parse $sql > $dict
42 |
43 | #Prof
44 | echo $prof $dict
45 | $prof -w 130 $dict
46 | # \rm $sql $dict
47 | done
48 | done
49 | done
50 |
--------------------------------------------------------------------------------
/pyprof/examples/jit/README.md:
--------------------------------------------------------------------------------
1 |
16 |
17 | *As of this writing, these examples do not work
18 | because of changes being proposed in PyTorch.*
19 |
20 | There are two ways to use PyTorch JIT
21 | - Scripting
22 | - Tracing
23 |
24 | In addition, we can JIT a
25 | - Stand alone function
26 | - Class / class method
27 |
28 | This directory has an example for each of the 4 cases.
29 | Intercepting (monkey patching) JITted code has a few extra steps,
30 | which are explained through comments.
31 |
--------------------------------------------------------------------------------
/pyprof/examples/jit/jit_script_function.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # -*- coding: utf-8 -*-
3 |
4 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
5 | #
6 | # Licensed under the Apache License, Version 2.0 (the "License");
7 | # you may not use this file except in compliance with the License.
8 | # You may obtain a copy of the License at
9 | #
10 | # http://www.apache.org/licenses/LICENSE-2.0
11 | #
12 | # Unless required by applicable law or agreed to in writing, software
13 | # distributed under the License is distributed on an "AS IS" BASIS,
14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | # See the License for the specific language governing permissions and
16 | # limitations under the License.
17 |
18 | import torch
19 | import torch.cuda.profiler as profiler
20 | import pyprof
21 |
22 | #The following creates an object "foo" of type ScriptModule
23 | #The new object has a function called "forward"
24 |
25 |
26 | @torch.jit.script
27 | def foo(x, y):
28 | return torch.sigmoid(x) + y
29 |
30 |
31 | #Initialize pyprof after the JIT step
32 | pyprof.init()
33 |
34 | #Assign a name to the object "foo"
35 | foo.__name__ = "foo"
36 |
37 | #Hook up the forward function to pyprof
38 | pyprof.nvtx.wrap(foo, 'forward')
39 |
40 | x = torch.zeros(4, 4).cuda()
41 | y = torch.ones(4, 4).cuda()
42 |
43 | with torch.autograd.profiler.emit_nvtx():
44 | profiler.start()
45 | z = foo(x, y)
46 | profiler.stop()
47 | print(z)
48 |
--------------------------------------------------------------------------------
/pyprof/examples/jit/jit_script_method.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # -*- coding: utf-8 -*-
3 |
4 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
5 | #
6 | # Licensed under the Apache License, Version 2.0 (the "License");
7 | # you may not use this file except in compliance with the License.
8 | # You may obtain a copy of the License at
9 | #
10 | # http://www.apache.org/licenses/LICENSE-2.0
11 | #
12 | # Unless required by applicable law or agreed to in writing, software
13 | # distributed under the License is distributed on an "AS IS" BASIS,
14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | # See the License for the specific language governing permissions and
16 | # limitations under the License.
17 |
18 | import torch
19 | import torch.cuda.profiler as profiler
20 | import pyprof
21 |
22 |
23 | class Foo(torch.jit.ScriptModule):
24 |
25 | def __init__(self, size):
26 | super(Foo, self).__init__()
27 | self.n = torch.nn.Parameter(torch.ones(size))
28 | self.m = torch.nn.Parameter(torch.ones(size))
29 |
30 | @torch.jit.script_method
31 | def forward(self, input):
32 | return self.n * input + self.m
33 |
34 |
35 | #Initialize pyprof after the JIT step
36 | pyprof.init()
37 |
38 | #Hook up the forward function to pyprof
39 | pyprof.nvtx.wrap(Foo, 'forward')
40 |
41 | foo = Foo(4)
42 | foo.cuda()
43 | x = torch.ones(4).cuda()
44 |
45 | with torch.autograd.profiler.emit_nvtx():
46 | profiler.start()
47 | z = foo(x)
48 | profiler.stop()
49 | print(z)
50 |
--------------------------------------------------------------------------------
/pyprof/examples/jit/jit_trace_function.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # -*- coding: utf-8 -*-
3 |
4 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
5 | #
6 | # Licensed under the Apache License, Version 2.0 (the "License");
7 | # you may not use this file except in compliance with the License.
8 | # You may obtain a copy of the License at
9 | #
10 | # http://www.apache.org/licenses/LICENSE-2.0
11 | #
12 | # Unless required by applicable law or agreed to in writing, software
13 | # distributed under the License is distributed on an "AS IS" BASIS,
14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | # See the License for the specific language governing permissions and
16 | # limitations under the License.
17 |
18 | import torch
19 | import torch.cuda.profiler as profiler
20 | import pyprof
21 |
22 |
23 | def foo(x, y):
24 | return torch.sigmoid(x) + y
25 |
26 |
27 | x = torch.zeros(4, 4).cuda()
28 | y = torch.ones(4, 4).cuda()
29 |
30 | #JIT the function using tracing
31 | #This returns an object of type ScriptModule with a forward method.
32 | traced_foo = torch.jit.trace(foo, (x, y))
33 |
34 | #Initialize pyprof after the JIT step
35 | pyprof.init()
36 |
37 | #Assign a name to the object "traced_foo"
38 | traced_foo.__dict__['__name__'] = "foo"
39 |
40 | #Hook up the forward function to pyprof
41 | pyprof.nvtx.wrap(traced_foo, 'forward')
42 |
43 | with torch.autograd.profiler.emit_nvtx():
44 | profiler.start()
45 | z = traced_foo(x, y)
46 | profiler.stop()
47 | print(z)
48 |
--------------------------------------------------------------------------------
/pyprof/examples/jit/jit_trace_method.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # -*- coding: utf-8 -*-
3 |
4 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
5 | #
6 | # Licensed under the Apache License, Version 2.0 (the "License");
7 | # you may not use this file except in compliance with the License.
8 | # You may obtain a copy of the License at
9 | #
10 | # http://www.apache.org/licenses/LICENSE-2.0
11 | #
12 | # Unless required by applicable law or agreed to in writing, software
13 | # distributed under the License is distributed on an "AS IS" BASIS,
14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | # See the License for the specific language governing permissions and
16 | # limitations under the License.
17 |
18 | import torch
19 | import torch.cuda.profiler as profiler
20 | import pyprof
21 |
22 |
23 | class Foo(torch.nn.Module):
24 |
25 | def __init__(self, size):
26 | super(Foo, self).__init__()
27 | self.n = torch.nn.Parameter(torch.ones(size))
28 | self.m = torch.nn.Parameter(torch.ones(size))
29 |
30 | def forward(self, input):
31 | return self.n * input + self.m
32 |
33 |
34 | foo = Foo(4)
35 | foo.cuda()
36 | x = torch.ones(4).cuda()
37 |
38 | #JIT the class using tracing
39 | traced_foo = torch.jit.trace(foo, x)
40 |
41 | #Initialize pyprof after the JIT step
42 | pyprof.init()
43 |
44 | #Assign a name to the object "traced_foo"
45 | traced_foo.__dict__['__name__'] = "foo"
46 |
47 | #Hook up the forward function to pyprof
48 | pyprof.nvtx.wrap(traced_foo, 'forward')
49 |
50 | with torch.autograd.profiler.emit_nvtx():
51 | profiler.start()
52 | z = traced_foo(x)
53 | profiler.stop()
54 | print(z)
55 |
--------------------------------------------------------------------------------
/pyprof/examples/jit/test.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
16 | set -e
17 |
18 | SCRIPT=`realpath $0`
19 | SCRIPTPATH=`dirname $SCRIPT`
20 | PYPROF="$SCRIPTPATH/../.."
21 |
22 | parse="python $PYPROF/parse/parse.py"
23 | prof="python $PYPROF/prof/prof.py"
24 |
25 | for f in *.py
26 | do
27 | base=`basename $f .py`
28 | sql=$base.sql
29 | dict=$base.dict
30 |
31 | #NVprof
32 | echo "nvprof -fo $sql python $f"
33 | nvprof -fo $sql python $f
34 |
35 | #Parse
36 | echo $parse $sql
37 | $parse $sql > $dict
38 |
39 | #Prof
40 | echo $prof $dict
41 | $prof -w 130 $dict
42 | \rm $sql $dict
43 | done
44 |
--------------------------------------------------------------------------------
/pyprof/examples/lenet.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # -*- coding: utf-8 -*-
3 |
4 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
5 | #
6 | # Licensed under the Apache License, Version 2.0 (the "License");
7 | # you may not use this file except in compliance with the License.
8 | # You may obtain a copy of the License at
9 | #
10 | # http://www.apache.org/licenses/LICENSE-2.0
11 | #
12 | # Unless required by applicable law or agreed to in writing, software
13 | # distributed under the License is distributed on an "AS IS" BASIS,
14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | # See the License for the specific language governing permissions and
16 | # limitations under the License.
17 |
18 | import torch
19 | import torch.nn as nn
20 | import torch.nn.functional as F
21 | import torch.cuda.profiler as profiler
22 | import torch.optim as optim
23 |
24 | import pyprof
25 | pyprof.init()
26 |
27 |
28 | class LeNet5(nn.Module):
29 |
30 | def __init__(self):
31 | super(LeNet5, self).__init__()
32 | # 1 input image channel, 6 output channels, 5x5 square convolution
33 | # kernel
34 | self.conv1 = nn.Conv2d(1, 6, 5)
35 | self.conv2 = nn.Conv2d(6, 16, 5)
36 | # an affine operation: y = Wx + b
37 | self.fc1 = nn.Linear(16 * 5 * 5, 120)
38 | self.fc2 = nn.Linear(120, 84)
39 | self.fc3 = nn.Linear(84, 10)
40 |
41 | def forward(self, x):
42 | # Max pooling over a (2, 2) window
43 | x = F.max_pool2d(F.relu(self.conv1(x)), (2, 2))
44 | # If the size is a square you can only specify a single number
45 | x = F.max_pool2d(F.relu(self.conv2(x)), 2)
46 | x = x.view(-1, self.num_flat_features(x))
47 | x = F.relu(self.fc1(x))
48 | x = F.relu(self.fc2(x))
49 | x = self.fc3(x)
50 | return x
51 |
52 | def num_flat_features(self, x):
53 | size = x.size()[1:] # all dimensions except the batch dimension
54 | num_features = 1
55 | for s in size:
56 | num_features *= s
57 | return num_features
58 |
59 |
60 | with torch.autograd.profiler.emit_nvtx():
61 |
62 | net = LeNet5().cuda()
63 |
64 | input = torch.randn(1, 1, 32, 32).cuda()
65 | out = net(input)
66 |
67 | target = torch.randn(10) # a dummy target, for example
68 | target = target.view(1, -1).cuda() # make it the same shape as output
69 | criterion = nn.MSELoss()
70 |
71 | # create your optimizer
72 | optimizer = optim.SGD(net.parameters(), lr=0.01)
73 |
74 | # in your training loop:
75 | optimizer.zero_grad() # zero the gradient buffers
76 |
77 | profiler.start()
78 | output = net(input)
79 | loss = criterion(output, target)
80 | loss.backward()
81 | optimizer.step() # Does the update
82 | profiler.stop()
83 |
--------------------------------------------------------------------------------
/pyprof/examples/operators.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # -*- coding: utf-8 -*-
3 |
4 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
5 | #
6 | # Licensed under the Apache License, Version 2.0 (the "License");
7 | # you may not use this file except in compliance with the License.
8 | # You may obtain a copy of the License at
9 | #
10 | # http://www.apache.org/licenses/LICENSE-2.0
11 | #
12 | # Unless required by applicable law or agreed to in writing, software
13 | # distributed under the License is distributed on an "AS IS" BASIS,
14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | # See the License for the specific language governing permissions and
16 | # limitations under the License.
17 | """
18 | This file checks all Python operators.
19 | """
20 |
21 | import sys
22 | import torch
23 | import torch.cuda.profiler as profiler
24 | import operator
25 | import inspect
26 |
27 | #Import and initialize pyprof
28 | import pyprof
29 | pyprof.init()
30 |
31 | X = 1024
32 | Y = 1024
33 |
34 | fa = torch.rand(X, Y).cuda()
35 | fb = torch.rand(X, Y).cuda()
36 | fc = torch.rand(X, Y).cuda()
37 |
38 | ia = torch.randint(0, 100, (X, Y)).cuda()
39 | ib = torch.randint(0, 100, (X, Y)).cuda()
40 |
41 | sa = torch.ones(1, 1).cuda()
42 | sb = torch.ones(1, 1).cuda()
43 |
44 | ba = fa.byte()
45 |
46 | unaryOps = [
47 | "abs",
48 | "__abs__",
49 | "neg",
50 | "__neg__",
51 | ]
52 | invertOps = [
53 | "inv",
54 | "invert",
55 | "__inv__",
56 | "__invert__",
57 | ] #imlemented only for byte tensors
58 | #pos, __pos__ is not implemented for tensors
59 |
60 | binaryOps = []
61 | binaryOps += ["lt", "__lt__", "le", "__le__", "eq", "__eq__", "ne", "__ne__", "ge", "__ge__", "gt", "__gt__"]
62 | binaryOps += [
63 | "add", "__add__", "sub", "__sub__", "mul", "__mul__", "floordiv", "__floordiv__", "truediv", "__truediv__", "pow",
64 | "__pow__", "mod", "__mod__"
65 | ]
66 | binaryOps += ["and_", "__and__", "or_", "__or__", "xor", "__xor__", "lshift", "__lshift__", "rshift", "__rshift__"]
67 |
68 | inplaceOps = []
69 | inplaceOps += [
70 | "iadd",
71 | "__iadd__",
72 | "isub",
73 | "__isub__",
74 | "imul",
75 | "__imul__",
76 | "ifloordiv",
77 | "__ifloordiv__",
78 | "itruediv",
79 | "__itruediv__",
80 | "imod",
81 | "__imod__",
82 | ]
83 | #ipow, __ipow__ is not implemented in pytorch
84 | inplaceOps += [
85 | "iand",
86 | "__iand__",
87 | "ior",
88 | "__ior__",
89 | "ixor",
90 | "__ixor__",
91 | "ilshift",
92 | "__ilshift__",
93 | "irshift",
94 | "__irshift__",
95 | ]
96 |
97 | matmulOps = ["matmul", "__matmul__"]
98 | inplacematmulOps = ["imatmul", "__imatmul__"]
99 |
100 | reverseIntBinaryOps = [
101 | "__radd__",
102 | "__rsub__",
103 | "__rmul__",
104 | "__rfloordiv__",
105 | "__rpow__",
106 | ]
107 | reverseFloatBinaryOps = [
108 | "__radd__",
109 | "__rsub__",
110 | "__rmul__",
111 | "__rdiv__",
112 | "__rtruediv__",
113 | "__rfloordiv__",
114 | "__rpow__",
115 | ]
116 | '''
117 | TODO
118 | .concat(a, b)
119 | .__concat__(a, b)
120 | .contains(a, b)
121 | .__contains__(a, b)
122 | .countOf(a, b)
123 | .delitem(a, b)
124 | .__delitem__(a, b)
125 | .getitem(a, b)
126 | .__getitem__(a, b)
127 | .indexOf(a, b)
128 | .setitem(a, b, c)
129 | .__setitem__(a, b, c)
130 | .length_hint(obj, default=0)
131 | .iconcat(a, b)
132 | .__iconcat__(a, b)
133 | .index(a)
134 | .__index__(a)
135 | '''
136 |
137 | #Context manager
138 | with torch.autograd.profiler.emit_nvtx():
139 |
140 | #Start profiler
141 | profiler.start()
142 |
143 | for op in unaryOps:
144 | assert hasattr(operator, op)
145 | f = getattr(operator, op)
146 | assert inspect.isbuiltin(f)
147 | c = f(ia)
148 |
149 | for op in invertOps:
150 | assert hasattr(operator, op)
151 | f = getattr(operator, op)
152 | assert inspect.isbuiltin(f)
153 | c = f(ba)
154 |
155 | for op in binaryOps:
156 | assert hasattr(operator, op)
157 | f = getattr(operator, op)
158 | assert inspect.isbuiltin(f)
159 | c = f(ia, ib)
160 | c = f(ia, 2)
161 |
162 | for op in inplaceOps:
163 | assert hasattr(operator, op)
164 | f = getattr(operator, op)
165 | assert inspect.isbuiltin(f)
166 | ia = f(ia, ib)
167 | ia = f(ia, 2)
168 |
169 | for op in matmulOps:
170 | assert hasattr(operator, op)
171 | f = getattr(operator, op)
172 | assert inspect.isbuiltin(f)
173 | c = f(fa, fb)
174 |
175 | for op in inplacematmulOps:
176 | assert hasattr(operator, op)
177 | f = getattr(operator, op)
178 | assert inspect.isbuiltin(f)
179 | fa = f(fa, fb)
180 |
181 | for op in reverseIntBinaryOps:
182 | assert hasattr(torch.Tensor, op)
183 | f = getattr(torch.Tensor, op)
184 | ia = f(ia, ib)
185 |
186 | for op in reverseFloatBinaryOps:
187 | assert hasattr(torch.Tensor, op)
188 | f = getattr(torch.Tensor, op)
189 | fa = f(fa, fb)
190 | '''
191 | #c = fa[3]
192 | #c = fa[3][3]
193 | #c = torch.min(fa, 3)
194 | c = torch.sum(fa)
195 | c = torch.max(fa)
196 | c = -fa
197 | #fc[2][2] = fa[2][2]
198 |
199 | c = a_scalar and b_scalar
200 | c = a_scalar or b_scalar
201 | c = not a_scalar
202 |
203 | c = a is b
204 | c = a is not b
205 | '''
206 |
207 | #Stop profiler
208 | profiler.stop()
209 |
--------------------------------------------------------------------------------
/pyprof/examples/simple.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # -*- coding: utf-8 -*-
3 |
4 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
5 | #
6 | # Licensed under the Apache License, Version 2.0 (the "License");
7 | # you may not use this file except in compliance with the License.
8 | # You may obtain a copy of the License at
9 | #
10 | # http://www.apache.org/licenses/LICENSE-2.0
11 | #
12 | # Unless required by applicable law or agreed to in writing, software
13 | # distributed under the License is distributed on an "AS IS" BASIS,
14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | # See the License for the specific language governing permissions and
16 | # limitations under the License.
17 | """
18 | This simple file provides an example of how to
19 | - import the pyprof library and initialize it
20 | - use the emit_nvtx context manager
21 | - start and stop the profiler
22 |
23 | Only kernels within profiler.start and profiler.stop calls are profiled.
24 | To profile
25 | $ nvprof -f -o simple.sql --profile-from-start off ./simple.py
26 | """
27 |
28 | import sys
29 | import torch
30 | import torch.cuda.profiler as profiler
31 |
32 | #Import and initialize pyprof
33 | import pyprof
34 | pyprof.init()
35 |
36 | a = torch.randn(5, 5).cuda()
37 | b = torch.randn(5, 5).cuda()
38 |
39 | #Context manager
40 | with torch.autograd.profiler.emit_nvtx():
41 |
42 | #Start profiler
43 | profiler.start()
44 |
45 | c = a + b
46 | c = torch.mul(a, b)
47 | c = torch.matmul(a, b)
48 | c = torch.argmax(a, dim=1)
49 | c = torch.nn.functional.pad(a, (1, 1))
50 |
51 | #Stop profiler
52 | profiler.stop()
53 |
--------------------------------------------------------------------------------
/pyprof/examples/user_annotation/README.md:
--------------------------------------------------------------------------------
1 |
16 |
17 | Nvidia NVTX range markers (https://docs.nvidia.com/gameworks/content/gameworkslibrary/nvtx/nvidia_tools_extension_library_nvtx.htm)
18 | are a useful tool to capture and observe events and code ranges etc.
19 | Using PyTorch APIs e.g, `torch.cuda.nvtx.range_push("xxx")` and `torch.cuda.nvtx.range_pop()` users can easily add their own NVTX range markers. These markers can then be observed in the Nvidia Visual Profiler (NVVP).
20 |
21 | While inserting NVTX markers (strings), if the users follow a specific string pattern `"layer:your_string_here"` e.g. `"layer:conv1"` or `"layer:encoder_layer_3_self_attention`, then `pyprof` will display the strings `conv1` and `encoder_layer_3_self_attention` next to the associated kernels in the output of `prof.py` when used with the `-c layer` option.
22 |
23 | NVTX range markers can be nested and if users follow the above string pattern, the output of `prof.py` will show all the markers associated with a kernel.
24 |
25 | The file `resnet.py` (a simplified version of the torchvision model) shows an example of how users can add (nested) NVTX markers with information which can greatly aid in understanding and analysis of networks.
26 |
27 | Note that the pattern `"layer:your_string_here"` was chosen to aid information extraction by `pyprof`. The tool will work seamlessly even if there are other markers or no markers at all.
28 |
29 | ### To run
30 |
31 | ```sh
32 | nvprof -fo resnet.sql --profile-from-start off python resnet.py
33 | parse.py resnet.sql > resnet.dict
34 | prof.py --csv -c idx,layer,dir,mod,op,kernel,params,sil resnet.dict
35 | ```
36 |
37 | The file `resnet.sql` can also be opened with NVVP as usual.
38 |
--------------------------------------------------------------------------------
/pyprof/examples/user_annotation/test.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
16 | set -e
17 |
18 | SCRIPT=`realpath $0`
19 | SCRIPTPATH=`dirname $SCRIPT`
20 | PYPROF="$SCRIPTPATH/../.."
21 |
22 | parse="python $PYPROF/parse/parse.py"
23 | prof="python $PYPROF/prof/prof.py"
24 |
25 | for f in *.py
26 | do
27 | base=`basename $f .py`
28 | sql=$base.sql
29 | dict=$base.dict
30 |
31 | #NVprof
32 | echo "nvprof -fo --profile-from-start off $sql python $f"
33 | nvprof -fo $sql --profile-from-start off python $f
34 |
35 | #Parse
36 | echo $parse $sql
37 | $parse $sql > $dict
38 |
39 | #Prof
40 | echo $prof $dict
41 | #$prof -w 130 $dict
42 | $prof --csv -c idx,layer,dir,mod,op,kernel,params,sil $dict
43 | \rm $sql $dict
44 | done
45 |
--------------------------------------------------------------------------------
/pyprof/nvtx/__init__.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # -*- coding: utf-8 -*-
3 |
4 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
5 | #
6 | # Licensed under the Apache License, Version 2.0 (the "License");
7 | # you may not use this file except in compliance with the License.
8 | # You may obtain a copy of the License at
9 | #
10 | # http://www.apache.org/licenses/LICENSE-2.0
11 | #
12 | # Unless required by applicable law or agreed to in writing, software
13 | # distributed under the License is distributed on an "AS IS" BASIS,
14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | # See the License for the specific language governing permissions and
16 | # limitations under the License.
17 |
18 | from .nvmarker import init
19 | from .nvmarker import add_wrapper as wrap
20 |
--------------------------------------------------------------------------------
/pyprof/parse/__init__.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # -*- coding: utf-8 -*-
3 |
4 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
5 | #
6 | # Licensed under the Apache License, Version 2.0 (the "License");
7 | # you may not use this file except in compliance with the License.
8 | # You may obtain a copy of the License at
9 | #
10 | # http://www.apache.org/licenses/LICENSE-2.0
11 | #
12 | # Unless required by applicable law or agreed to in writing, software
13 | # distributed under the License is distributed on an "AS IS" BASIS,
14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | # See the License for the specific language governing permissions and
16 | # limitations under the License.
17 |
--------------------------------------------------------------------------------
/pyprof/parse/__main__.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # -*- coding: utf-8 -*-
3 |
4 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
5 | #
6 | # Licensed under the Apache License, Version 2.0 (the "License");
7 | # you may not use this file except in compliance with the License.
8 | # You may obtain a copy of the License at
9 | #
10 | # http://www.apache.org/licenses/LICENSE-2.0
11 | #
12 | # Unless required by applicable law or agreed to in writing, software
13 | # distributed under the License is distributed on an "AS IS" BASIS,
14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | # See the License for the specific language governing permissions and
16 | # limitations under the License.
17 |
18 | from .parse import main
19 |
20 | if __name__ == '__main__':
21 | main()
22 |
--------------------------------------------------------------------------------
/pyprof/parse/db.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # -*- coding: utf-8 -*-
3 |
4 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
5 | #
6 | # Licensed under the Apache License, Version 2.0 (the "License");
7 | # you may not use this file except in compliance with the License.
8 | # You may obtain a copy of the License at
9 | #
10 | # http://www.apache.org/licenses/LICENSE-2.0
11 | #
12 | # Unless required by applicable law or agreed to in writing, software
13 | # distributed under the License is distributed on an "AS IS" BASIS,
14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | # See the License for the specific language governing permissions and
16 | # limitations under the License.
17 |
18 | import sys, sqlite3
19 |
20 |
21 | class DB(object):
22 | """
23 | This class provides functions for DB operations
24 | with exception handling.
25 | """
26 |
27 | def __init__(self, dbFile):
28 | try:
29 | conn = sqlite3.connect(dbFile)
30 | conn.row_factory = sqlite3.Row
31 | c = conn.cursor()
32 | except:
33 | print("Error opening {}".format(dbFile))
34 | sys.exit(1)
35 |
36 | self.conn = conn
37 | self.c = c
38 |
39 | def select(self, cmd):
40 | try:
41 | self.c.execute(cmd)
42 | #rows = self.c.fetchall()
43 | rows = [dict(row) for row in self.c.fetchall()]
44 | except sqlite3.Error as e:
45 | print(e)
46 | sys.exit(1)
47 | except:
48 | print("Uncaught error in SQLite access while executing {}".format(cmd))
49 | sys.exit(1)
50 |
51 | #print(rows)
52 | return rows
53 |
54 | def insert(self, cmd, data):
55 | try:
56 | self.c.execute(cmd, data)
57 | except sqlite3.Error as e:
58 | print(e)
59 | sys.exit(1)
60 | except:
61 | print("Uncaught error in SQLite access while executing {}".format(cmd))
62 | sys.exit(1)
63 |
64 | def execute(self, cmd):
65 | try:
66 | self.c.execute(cmd)
67 | except sqlite3.Error as e:
68 | print(e)
69 | sys.exit(1)
70 | except:
71 | print("Uncaught error in SQLite access while executing {}".format(cmd))
72 | sys.exit(1)
73 |
74 | def commit(self):
75 | self.conn.commit()
76 |
77 | def close(self):
78 | self.c.close()
79 | self.conn.close()
80 |
--------------------------------------------------------------------------------
/pyprof/parse/kernel.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # -*- coding: utf-8 -*-
3 |
4 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
5 | #
6 | # Licensed under the Apache License, Version 2.0 (the "License");
7 | # you may not use this file except in compliance with the License.
8 | # You may obtain a copy of the License at
9 | #
10 | # http://www.apache.org/licenses/LICENSE-2.0
11 | #
12 | # Unless required by applicable law or agreed to in writing, software
13 | # distributed under the License is distributed on an "AS IS" BASIS,
14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | # See the License for the specific language governing permissions and
16 | # limitations under the License.
17 |
18 | import cxxfilt, struct, binascii
19 |
20 | #Helper functions
21 |
22 |
23 | def demangle(name):
24 | """
25 | Demangle a C++ string
26 | """
27 | result = name
28 | try:
29 | result = cxxfilt.demangle(name)
30 | except:
31 | pass
32 | return result
33 |
34 |
35 | def getShortName(name):
36 | """
37 | Returns a shorter kernel name
38 | """
39 | sname = name.split("<")[0] \
40 | .replace("void ", "") \
41 | .replace("at::","") \
42 | .replace("cuda::", "") \
43 | .replace("native::","") \
44 | .replace("(anonymous namespace)::", "")
45 | sname = sname.split("(")[0]
46 | return sname
47 |
48 |
49 | class Kernel(object):
50 | """
51 | This class stores information about a kernel.
52 | """
53 |
54 | kernels = []
55 | profStart = 0
56 |
57 | def __init__(self):
58 | self.kNameId = None
59 | self.kShortName = None
60 | self.kLongName = None
61 | self.kStartTime = None #GPU start time
62 | self.kEndTime = None #GPU end time
63 | self.kDuration = None
64 | self.device = None
65 | self.stream = None
66 | self.grid = ()
67 | self.block = ()
68 | self.corrId = None
69 | self.rStartTime = None #CPU start time
70 | self.rEndTime = None #CPU end time
71 | self.rDuration = None
72 | self.tid = None
73 | self.pid = None
74 | self.objId = None
75 | self.timeOffset = None
76 |
77 | self.layerMarkers = []
78 | self.traceMarkers = []
79 | self.reprMarkers = []
80 | self.pyprofMarkers = []
81 | self.seqMarkers = []
82 | self.otherMarkers = []
83 | self.altMarkers = []
84 | self.seqId = []
85 | self.altSeqId = []
86 | self.layer = []
87 |
88 | self.subSeqId = None
89 | self.dir = None
90 | self.mod = []
91 | self.op = []
92 |
93 | def setKernelInfo(self, info):
94 | self.kNameId = info['kNameId']
95 | self.corrId = int(info['correlationId'])
96 | start = int(info['start'])
97 | end = int(info['end'])
98 | assert end > start, "This assertion can fail for very large profiles. It usually fails when start = end = 0."
99 | self.kStartTime = start
100 | self.kEndTime = end
101 | self.kDuration = end - start
102 | assert (start > Kernel.profStart)
103 | self.device = int(info['deviceId'])
104 | self.stream = int(info['streamId'])
105 | self.grid = (info['gridX'], info['gridY'], info['gridZ'])
106 | self.block = (info['blockX'], info['blockY'], info['blockZ'])
107 | self.timeOffset = Kernel.profStart
108 | self.setKernelName(info['name'])
109 | self.setRunTimeInfo(info)
110 |
111 | def setKernelName(self, name):
112 | cadena = demangle(name)
113 | self.kLongName = cadena
114 | self.kShortName = getShortName(cadena)
115 |
116 | def setRunTimeInfo(self, info):
117 | self.rStartTime = info['rStart']
118 | self.rEndTime = info['rEnd']
119 | self.rDuration = info['rEnd'] - info['rStart']
120 | self.pid = info['pid']
121 | self.tid = info['tid']
122 | self.objId = info['objId']
123 | assert (self.rStartTime < self.rEndTime)
124 | assert (self.rStartTime < self.kStartTime)
125 |
126 | def setMarkerInfo(self, info):
127 | self.layerMarkers, self.traceMarkers, self.reprMarkers, self.pyprofMarkers, self.seqMarkers, self.otherMarkers, self.altMarkers, self.seqId, self.altSeqId, self.layer = info
128 | self.subSeqId = 0
129 |
130 | def setDirection(self):
131 | """
132 | Set direction (fprop, bprop) based on PyTorch sequence markers.
133 | It is a heuristic and not a foolproof method.
134 | """
135 | if any("Backward, seq = " in x for x in self.seqMarkers) or \
136 | any("backward, seq = " in x for x in self.seqMarkers) or \
137 | any("Backward0, seq = " in x for x in self.seqMarkers):
138 | self.dir = "bprop"
139 | else:
140 | self.dir = "fprop"
141 |
142 | def setOp(self):
143 | """
144 | Detect and set the class/module (mod) and operation (op)
145 | of the kernel e.g. torch.nn.functional / linear, torch / sigmoid.
146 | The lookup sequence we use is
147 | NVTX markers inserted by pyprof
148 | NVTX markers inserted by PyTorch in bprop
149 | NVTX markers inserted by PyTorch in fprop
150 | It is a heuristic and not a foolproof method.
151 | """
152 |
153 | def sanitize(name):
154 | name = name.replace("torch","") \
155 | .replace("autograd","") \
156 | .replace("_backward","") \
157 | .replace("::","") \
158 | .replace("jit","") \
159 | .replace("(anonymous namespace)","")
160 | head, sep, tail = name.partition("Backward")
161 | return head
162 |
163 | #Check pyprof markers
164 | for m in self.pyprofMarkers:
165 | assert ("mod" in m) and ("op" in m) and ("args" in m)
166 | t = eval(m)
167 | self.op.append(t['op'])
168 | self.mod.append(t['mod'])
169 |
170 | if len(self.op):
171 | return
172 |
173 | #Check bprop kernel markers
174 | for m in self.seqMarkers:
175 | if ("backward, seq = " in m) or ("Backward, seq = " in m):
176 | op = m.split(",")[0]
177 | op = sanitize(op)
178 | self.op.append(op)
179 | self.mod.append('na')
180 |
181 | if len(self.op):
182 | return
183 |
184 | #Check markers with "seq = "
185 | for m in self.seqMarkers:
186 | if ", seq = " in m:
187 | op = m.split(",")[0]
188 | self.op.append(op)
189 | self.mod.append('na')
190 |
191 | if len(self.op):
192 | return
193 |
194 | #If nothing else
195 | if len(self.otherMarkers):
196 | self.op.append(self.otherMarkers[0])
197 | self.mod.append('na')
198 |
199 | def print(self):
200 | """
201 | Print kernel information. This is used by prof.py.
202 | """
203 |
204 | a = lambda: None
205 | a.kShortName = self.kShortName
206 | a.kDuration = self.kDuration
207 | #a.layerMarkers = self.layerMarkers
208 | a.layer = self.layer
209 | a.trace = self.traceMarkers
210 | a.reprMarkers = self.reprMarkers
211 | a.marker = self.pyprofMarkers
212 | a.seqMarker = self.seqMarkers
213 |
214 | a.seqId = self.seqId
215 | a.subSeqId = self.subSeqId
216 | a.altSeqId = self.altSeqId
217 |
218 | a.dir = self.dir
219 | a.mod = self.mod
220 | a.op = self.op
221 |
222 | a.tid = self.tid
223 | a.device = self.device
224 | a.stream = self.stream
225 | a.grid = self.grid
226 | a.block = self.block
227 | a.kLongName = self.kLongName
228 |
229 | print(a.__dict__)
230 |
--------------------------------------------------------------------------------
/pyprof/parse/parse.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # -*- coding: utf-8 -*-
3 |
4 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
5 | #
6 | # Licensed under the Apache License, Version 2.0 (the "License");
7 | # you may not use this file except in compliance with the License.
8 | # You may obtain a copy of the License at
9 | #
10 | # http://www.apache.org/licenses/LICENSE-2.0
11 | #
12 | # Unless required by applicable law or agreed to in writing, software
13 | # distributed under the License is distributed on an "AS IS" BASIS,
14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | # See the License for the specific language governing permissions and
16 | # limitations under the License.
17 | """
18 | Parse the SQLite3 database from NVprof or Nsight and print a dictionary for every kernel.
19 | """
20 |
21 | import sys
22 | import os
23 | import argparse
24 | from tqdm import tqdm
25 |
26 | from .db import DB
27 | from .kernel import Kernel
28 | from .nvvp import NVVP
29 | from .nsight import Nsight
30 |
31 |
32 | def parseArgs():
33 | parser = argparse.ArgumentParser(prog=sys.argv[0], description="Parse SQLite3 DB from NVprof or Nsight.")
34 | parser.add_argument("file", type=str, default=None, help="SQLite3 database.")
35 |
36 | args = parser.parse_args()
37 |
38 | if not os.path.isfile(args.file):
39 | raise parser.error("No such file '{}'.".format(args.file))
40 |
41 | return args
42 |
43 |
44 | def dbIsNvvp(db):
45 | cmd = "SELECT * FROM sqlite_master where type='table' AND name='StringTable'"
46 | result = db.select(cmd)
47 | return True if len(result) == 1 else False
48 |
49 |
50 | def main():
51 | args = parseArgs()
52 |
53 | db = DB(args.file)
54 | nvvp = None
55 | if dbIsNvvp(db):
56 | nvvp = NVVP(db)
57 | else:
58 | nvvp = Nsight(db)
59 |
60 | kInfo = nvvp.getKernelInfo()
61 | if len(kInfo) == 0:
62 | print("Found 0 kernels. Exiting.", file=sys.stderr)
63 | db.close()
64 | sys.exit(0)
65 | else:
66 | print("Found {} kernels. Getting info for each kernel.".format(len(kInfo)), file=sys.stderr)
67 |
68 | nvvp.createMarkerTable()
69 |
70 | prevSeqId = -1
71 | prevSubSeqId = -1
72 | prevOp = "na"
73 |
74 | Kernel.profStart = nvvp.getProfileStart()
75 |
76 | for i in tqdm(range(len(kInfo)), ascii=True):
77 | info = kInfo[i]
78 | k = Kernel()
79 |
80 | #Calculate/encode object ID
81 | nvvp.encode_object_id(info)
82 |
83 | #Set kernel info
84 | k.setKernelInfo(info)
85 |
86 | #Get and set marker and seqid info
87 | info = nvvp.getMarkerInfo(k.objId, k.rStartTime, k.rEndTime)
88 | k.setMarkerInfo(info)
89 |
90 | #If the seqId contains both 0 and non zero integers, remove 0.
91 | if any(seq != 0 for seq in k.seqId) and (0 in k.seqId):
92 | k.seqId.remove(0)
93 |
94 | #Set direction (it uses seq id)
95 | k.setDirection()
96 |
97 | #Set op
98 | k.setOp()
99 |
100 | #The following code is based on heuristics.
101 | #TODO: Refactor.
102 | #Assign subSeqId, adjust seqId and altSeqId
103 | #seqId can be 0.
104 | #A kernel can have multiple seqIds both in fprop and bprop.
105 | #In bprop, seqIds might not decrease monotonically. I have observed a few blips.
106 | if len(k.seqId):
107 | assert (k.dir in ["fprop", "bprop"])
108 | if (k.dir == "fprop"):
109 | #Check if there is a sequence id larger than the previous
110 | inc = (k.seqId[-1] > prevSeqId)
111 | if inc:
112 | currSeqId = [x for x in k.seqId if x > prevSeqId][0]
113 | else:
114 | currSeqId = prevSeqId
115 | else:
116 | currSeqId = k.seqId[0]
117 |
118 | #if ((currSeqId == prevSeqId) and (k.op == prevOp)):
119 | if ((currSeqId == prevSeqId) and (k.op == prevOp)) or ((k.op[0] == "forward") and (k.op == prevOp) and
120 | (k.mod[0] in ["LSTMCell", "GRUCell", "RNNCell"])):
121 | #The second condition is to trap cases when pytorch does not use cudnn for a LSTMCell.
122 | k.subSeqId = prevSubSeqId + 1
123 |
124 | prevSeqId = currSeqId
125 | prevSubSeqId = k.subSeqId
126 | prevOp = k.op
127 |
128 | #Keep currSeqId in k.seqId, move everything else to k.altSeqId
129 | for s in k.seqId:
130 | if s != currSeqId:
131 | k.seqId.remove(s)
132 | k.altSeqId.append(s)
133 |
134 | for s in k.altSeqId:
135 | if s == currSeqId:
136 | k.altSeqId.remove(s)
137 |
138 | k.altSeqId = list(set(k.altSeqId))
139 | if (len(k.altSeqId)):
140 | (k.altSeqId).sort()
141 |
142 | k.print()
143 |
144 | db.close()
145 |
146 |
147 | if __name__ == '__main__':
148 | main()
149 |
--------------------------------------------------------------------------------
/pyprof/prof/__init__.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # -*- coding: utf-8 -*-
3 |
4 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
5 | #
6 | # Licensed under the Apache License, Version 2.0 (the "License");
7 | # you may not use this file except in compliance with the License.
8 | # You may obtain a copy of the License at
9 | #
10 | # http://www.apache.org/licenses/LICENSE-2.0
11 | #
12 | # Unless required by applicable law or agreed to in writing, software
13 | # distributed under the License is distributed on an "AS IS" BASIS,
14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | # See the License for the specific language governing permissions and
16 | # limitations under the License.
17 |
--------------------------------------------------------------------------------
/pyprof/prof/__main__.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # -*- coding: utf-8 -*-
3 |
4 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
5 | #
6 | # Licensed under the Apache License, Version 2.0 (the "License");
7 | # you may not use this file except in compliance with the License.
8 | # You may obtain a copy of the License at
9 | #
10 | # http://www.apache.org/licenses/LICENSE-2.0
11 | #
12 | # Unless required by applicable law or agreed to in writing, software
13 | # distributed under the License is distributed on an "AS IS" BASIS,
14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | # See the License for the specific language governing permissions and
16 | # limitations under the License.
17 |
18 | from .prof import main
19 |
20 | if __name__ == '__main__':
21 | main()
22 |
--------------------------------------------------------------------------------
/pyprof/prof/activation.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # -*- coding: utf-8 -*-
3 |
4 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
5 | #
6 | # Licensed under the Apache License, Version 2.0 (the "License");
7 | # you may not use this file except in compliance with the License.
8 | # You may obtain a copy of the License at
9 | #
10 | # http://www.apache.org/licenses/LICENSE-2.0
11 | #
12 | # Unless required by applicable law or agreed to in writing, software
13 | # distributed under the License is distributed on an "AS IS" BASIS,
14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | # See the License for the specific language governing permissions and
16 | # limitations under the License.
17 |
18 | from .base import OperatorLayerBase
19 | from .tensor import Tensor
20 |
21 |
22 | class Activation(OperatorLayerBase):
23 | """
24 | This class handles the various activation functions.
25 | """
26 |
27 | ops = [
28 | "celu", "elu", "elu_", "hardshrink", "hardtanh", "hardtanh_", "leaky_relu", "leaky_relu_", "logsigmoid",
29 | "prelu", "relu", "relu_", "relu6", "rrelu", "rrelu_", "selu", "sigmoid", "softplus", "softshrink", "softsign",
30 | "tanh", "tanhshrink", "threshold", "threshold_"
31 | ]
32 |
33 | def __init__(self, d):
34 | marker = eval(d.argMarker[0])
35 | mod = marker['mod']
36 | op = marker['op']
37 | args = marker['args']
38 |
39 | self.mod_ = mod
40 | self.op_ = op
41 |
42 | assert (mod in ["torch.nn.functional", "torch", "Tensor"])
43 |
44 | #Filter out named parameters
45 | args = list(filter(lambda x: x['name'] == '', args))
46 |
47 | assert (len(args) >= 1)
48 | arg = args[0]
49 | assert (arg['type'] == "tensor")
50 |
51 | self.input = Tensor(arg['shape'], arg['dtype'])
52 | self.dir = d.dir
53 |
54 | def params(self):
55 | return str(self.input)
56 |
57 | def flops(self):
58 | # TODO: revise based on op
59 | return self.input.size
60 |
61 | def bytes(self):
62 | # TODO: revise based on op
63 | direction = self.dir
64 | b = self.input.bytes
65 | # fprop is 1 read, 1 write
66 | # bprop is 2 reads, 1 write
67 | b *= 2 if direction == "fprop" else 3
68 | return b
69 |
70 | def tc(self):
71 | return "-"
72 |
73 | def op(self):
74 | return self.op_
75 |
76 | def mod(self):
77 | return self.mod_
78 |
--------------------------------------------------------------------------------
/pyprof/prof/base.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # -*- coding: utf-8 -*-
3 |
4 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
5 | #
6 | # Licensed under the Apache License, Version 2.0 (the "License");
7 | # you may not use this file except in compliance with the License.
8 | # You may obtain a copy of the License at
9 | #
10 | # http://www.apache.org/licenses/LICENSE-2.0
11 | #
12 | # Unless required by applicable law or agreed to in writing, software
13 | # distributed under the License is distributed on an "AS IS" BASIS,
14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | # See the License for the specific language governing permissions and
16 | # limitations under the License.
17 |
18 | from abc import ABC, abstractmethod
19 |
20 |
21 | class OperatorLayerBase(ABC):
22 | """
23 | Base class for all layers and operators.
24 | Every derived class should have the following functions.
25 | """
26 |
27 | @abstractmethod
28 | def tc(self):
29 | """
30 | Tensor core usage by the kernel.
31 | Return "1" (yes), "0" (no, but possible), "-" (not applicable)
32 | """
33 | pass
34 |
35 | @abstractmethod
36 | def params(self):
37 | """
38 | Kernel parameters to be printed.
39 | """
40 | pass
41 |
42 | @abstractmethod
43 | def flops(self):
44 | """
45 | Note that 1 FMA = 2 flops.
46 | """
47 | pass
48 |
49 | @abstractmethod
50 | def bytes(self):
51 | pass
52 |
53 | @abstractmethod
54 | def mod(self):
55 | """
56 | Name of the module/class e.g. torch.nn.functional.
57 | """
58 | pass
59 |
60 | @abstractmethod
61 | def op(self):
62 | """
63 | Name of the operator e.g. sigmoid.
64 | """
65 | pass
66 |
--------------------------------------------------------------------------------
/pyprof/prof/convert.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # -*- coding: utf-8 -*-
3 |
4 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
5 | #
6 | # Licensed under the Apache License, Version 2.0 (the "License");
7 | # you may not use this file except in compliance with the License.
8 | # You may obtain a copy of the License at
9 | #
10 | # http://www.apache.org/licenses/LICENSE-2.0
11 | #
12 | # Unless required by applicable law or agreed to in writing, software
13 | # distributed under the License is distributed on an "AS IS" BASIS,
14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | # See the License for the specific language governing permissions and
16 | # limitations under the License.
17 |
18 | from .base import OperatorLayerBase
19 | from .tensor import Tensor
20 |
21 |
22 | class Convert(OperatorLayerBase):
23 | """
24 | Class to handle convert operations.
25 | """
26 | ops = ["byte", "char", "double", "float", "half", "int", "long", "short", "to"]
27 |
28 | def __init__(self, d):
29 | marker = eval(d.argMarker[0])
30 | mod = marker['mod']
31 | op = marker['op']
32 | args = marker['args']
33 |
34 | self.mod_ = mod
35 | self.op_ = op
36 |
37 | assert (mod == "Tensor")
38 | assert (op in Convert.ops)
39 | assert (len(args) == 1)
40 |
41 | t = args[0]
42 | if t['type'] == "tensor":
43 | self.input = Tensor(t['shape'], t['dtype'])
44 | else: # scalar
45 | self.input = Tensor([], t['type'])
46 |
47 | if op == "to":
48 | # the output dtype is unknown
49 | self.output = self.input
50 | else:
51 | self.output = Tensor(self.input.shape, op)
52 |
53 | def params(self):
54 | return str(self.input)
55 |
56 | def op(self):
57 | return self.op_
58 |
59 | def mod(self):
60 | return self.mod_
61 |
62 | def tc(self):
63 | return "-"
64 |
65 | def flops(self):
66 | return 0
67 |
68 | def bytes(self):
69 | return self.input.bytes + self.output.bytes
70 |
--------------------------------------------------------------------------------
/pyprof/prof/data.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # -*- coding: utf-8 -*-
3 |
4 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
5 | #
6 | # Licensed under the Apache License, Version 2.0 (the "License");
7 | # you may not use this file except in compliance with the License.
8 | # You may obtain a copy of the License at
9 | #
10 | # http://www.apache.org/licenses/LICENSE-2.0
11 | #
12 | # Unless required by applicable law or agreed to in writing, software
13 | # distributed under the License is distributed on an "AS IS" BASIS,
14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | # See the License for the specific language governing permissions and
16 | # limitations under the License.
17 |
18 | from .utility import Utility
19 |
20 |
21 | class Data(object):
22 | """
23 | Class to store all the data for every kernel e.g. name, bytes, flops, device, stream etc.
24 | """
25 |
26 | def __init__(self, kernel):
27 | #Available from NVprof
28 | self.tid = kernel['tid']
29 | self.device = kernel['device']
30 | self.stream = kernel['stream']
31 | self.grid = str(kernel['grid']).replace(" ", "").replace("(", "").replace(")", "")
32 | self.block = str(kernel['block']).replace(" ", "").replace("(", "").replace(")", "")
33 | self.name = kernel['kShortName'].replace(" ", "_")
34 | self.lName = kernel['kLongName']
35 | self.sil = kernel['kDuration'] #units ns
36 |
37 | self.index = None
38 |
39 | #Markers
40 | self.argMarker = kernel['marker']
41 | self.modMarker = kernel['reprMarkers']
42 | self.seqMarker = kernel['seqMarker']
43 |
44 | self.layer = kernel['layer']
45 | self.trace = kernel['trace']
46 |
47 | self.seqId = kernel['seqId']
48 | self.altSeqId = kernel['altSeqId']
49 |
50 | self.dir = kernel['dir']
51 | self.sub = kernel['subSeqId']
52 |
53 | self.mod = "na"
54 | self.op = "na"
55 | self.params = {"na": "na"}
56 | self.tc = "na"
57 | self.flops = 0
58 | self.bytes = 0
59 |
60 | def setParams(self, params):
61 | # TODO: Remove the else block after refactoring.
62 | if type(params) == str:
63 | self.params = params
64 | else:
65 | #Remove space from params
66 | qaz = ""
67 | for key, value in params.items():
68 | if "type" not in key:
69 | qaz += "{}={},".format(key, value)
70 | else:
71 | if type(value) is str:
72 | qaz += "{},".format(Utility.typeToString(value))
73 | else:
74 | qaz += "{}".format(value)
75 |
76 | self.params = qaz.replace(" ", "")
77 |
--------------------------------------------------------------------------------
/pyprof/prof/dropout.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # -*- coding: utf-8 -*-
3 |
4 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
5 | #
6 | # Licensed under the Apache License, Version 2.0 (the "License");
7 | # you may not use this file except in compliance with the License.
8 | # You may obtain a copy of the License at
9 | #
10 | # http://www.apache.org/licenses/LICENSE-2.0
11 | #
12 | # Unless required by applicable law or agreed to in writing, software
13 | # distributed under the License is distributed on an "AS IS" BASIS,
14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | # See the License for the specific language governing permissions and
16 | # limitations under the License.
17 |
18 | from .base import OperatorLayerBase
19 | from .tensor import Tensor
20 |
21 | class Dropout(OperatorLayerBase):
22 |
23 | def __init__(self, d):
24 | marker = eval(d.argMarker[0])
25 | mod = marker['mod']
26 | op = marker['op']
27 | args = marker['args']
28 |
29 | self.marker = marker
30 | self.mod_ = mod
31 | self.op_ = op
32 | self.args = args
33 |
34 | assert (mod == "torch.nn.functional")
35 | assert (op == "dropout")
36 |
37 | self.inp = Tensor(args[0]['shape'], args[0]['dtype'])
38 | self.dir = d.dir
39 |
40 | return
41 |
42 | def params(self):
43 | return str(self.inp)
44 |
45 | def op(self):
46 | return self.op_
47 |
48 | def mod(self):
49 | return self.mod_
50 |
51 | def tc(self):
52 | return "-"
53 |
54 | def bytes(self):
55 | #Ignoring the cost of writing and reading the mask
56 | return self.inp.bytes * 2
57 |
58 | def flops(self):
59 | # Note: This is approximate and depends on the RNG
60 | return 5 * self.inp.size
61 |
--------------------------------------------------------------------------------
/pyprof/prof/dtype.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | # Copyright (c) 2020, Aditya Agrawal.
4 | #
5 | # Licensed under the Apache License, Version 2.0 (the "License");
6 | # you may not use this file except in compliance with the License.
7 | # You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 |
17 | class Dtype(object):
18 |
19 | _types = {
20 | "uint8" : (1, "uint8"),
21 | "int8" : (1, "int8"),
22 | "byte" : (1, "byte"),
23 | "char" : (1, "char"),
24 | "bool" : (1, "bool"),
25 |
26 | "float16" : (2, "fp16"),
27 | "half" : (2, "fp16"),
28 | "int16" : (2, "int16"),
29 | "short" : (2, "int16"),
30 |
31 | "float32" : (4, "fp32"),
32 | "float" : (4, "fp32"),
33 | "int32" : (4, "int32"),
34 | "int" : (4, "int32"),
35 |
36 | "int64" : (8, "int64"),
37 | "long" : (8, "int64"),
38 | "float64" : (8, "fp64"),
39 | "double" : (8, "fp64"),
40 | }
41 |
42 | @staticmethod
43 | def types():
44 | t = Dtype._types.keys()
45 | return list(t)
46 |
47 | def __init__(self, dtype):
48 | assert dtype in Dtype.types()
49 | size, name = Dtype._types[dtype]
50 | self._itemsize = size
51 | self._name = name
52 |
53 | def __str__(self):
54 | return self._name
55 |
56 | @property
57 | def itemsize(self):
58 | return self._itemsize
59 |
60 | def main():
61 | print(Dtype.types())
62 | for i in Dtype.types():
63 | dt = Dtype(i)
64 | print(i, dt, dt.itemsize)
65 |
66 | if __name__ == '__main__':
67 | main()
68 |
--------------------------------------------------------------------------------
/pyprof/prof/embedding.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # -*- coding: utf-8 -*-
3 |
4 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
5 | #
6 | # Licensed under the Apache License, Version 2.0 (the "License");
7 | # you may not use this file except in compliance with the License.
8 | # You may obtain a copy of the License at
9 | #
10 | # http://www.apache.org/licenses/LICENSE-2.0
11 | #
12 | # Unless required by applicable law or agreed to in writing, software
13 | # distributed under the License is distributed on an "AS IS" BASIS,
14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | # See the License for the specific language governing permissions and
16 | # limitations under the License.
17 |
18 | from .base import OperatorLayerBase
19 | from .tensor import Tensor
20 |
21 |
22 | class Embedding(OperatorLayerBase):
23 |
24 | def __init__(self, d):
25 | marker = eval(d.argMarker[0])
26 | mod = marker['mod']
27 | op = marker['op']
28 | args = marker['args']
29 |
30 | self.mod_ = mod
31 | self.op_ = op
32 |
33 | assert (mod == "torch.nn.functional")
34 | assert (op == "embedding")
35 |
36 | input = args[0]
37 | embedding = args[1]
38 |
39 | self.input = Tensor(input['shape'], input['dtype'])
40 | self.embedding = Tensor(embedding['shape'], embedding['dtype'])
41 |
42 | assert (len(self.embedding.shape) == 2)
43 |
44 | self.dir = d.dir
45 | self.sub = d.sub
46 | return
47 |
48 | def params(self):
49 | return str(self.input) + ";" + str(self.embedding)
50 |
51 | def op(self):
52 | return self.op_
53 |
54 | def mod(self):
55 | return self.mod_
56 |
57 | def tc(self):
58 | return "-"
59 |
60 | def bytes(self):
61 | b = 0
62 | if self.dir == "fprop":
63 | # read indices
64 | b += self.input.bytes
65 | # read and write the embedding values
66 | b += 2 * self.input.size * self.embedding.shape[1] * self.embedding.itemsize
67 | else:
68 | # 3 times the size of the incoming gradient
69 | b = 3 * self.input.size * self.embedding.shape[1] * self.embedding.itemsize
70 |
71 | if self.sub > 0:
72 | b = 0
73 |
74 | return b
75 |
76 | def flops(self):
77 | # Note: not implemented yet
78 | return 0
79 |
--------------------------------------------------------------------------------
/pyprof/prof/linear.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # -*- coding: utf-8 -*-
3 |
4 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
5 | #
6 | # Licensed under the Apache License, Version 2.0 (the "License");
7 | # you may not use this file except in compliance with the License.
8 | # You may obtain a copy of the License at
9 | #
10 | # http://www.apache.org/licenses/LICENSE-2.0
11 | #
12 | # Unless required by applicable law or agreed to in writing, software
13 | # distributed under the License is distributed on an "AS IS" BASIS,
14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | # See the License for the specific language governing permissions and
16 | # limitations under the License.
17 |
18 | from collections import OrderedDict
19 | from .tc import TC_Whitelist
20 | from .utility import Utility
21 | from .base import OperatorLayerBase
22 |
23 |
24 | class Linear(OperatorLayerBase):
25 | '''
26 | Notes:
27 | If the bias occurs before the GEMM, then its 1 write (bias expansion).
28 | If the bias occurs after, then its 1 read and 1 write.
29 | bias in bprop is a reduction and hence is 1 read.
30 | '''
31 |
32 | gemmKernels = [
33 | "gemm", "gemv", "dot_kernel", "splitKreduce_kernel",
34 | "reduce_1Block_kernel", "cutlass"
35 | ]
36 |
37 | biasKernels = [
38 | "kernelReduceContigDim", "kernelReduceNoncontigDim_shared",
39 | "elementwise_kernel", "reduce_kernel", "kernelPointwiseApply2",
40 | "2d_grouped_direct_kernel"
41 | ]
42 |
43 | def setXWBMNK(self, args):
44 | x = None
45 | w = None
46 | b = None
47 | if (len(args) == 2):
48 | x, w = args
49 | elif (len(args) == 3):
50 | x, w, b = args
51 | assert (x['type'] == w['type'] == "tensor")
52 | if (b['type'] == "tensor"):
53 | assert (len(b['shape']) == 1)
54 | elif (b['type'] == "NoneType"):
55 | assert b['value'] is None
56 | b = None
57 | else:
58 | assert False
59 | else:
60 | assert False
61 |
62 | assert (len(w['shape']) == 2)
63 | k1 = x['shape'][-1]
64 | n, k2 = w['shape']
65 | assert (k1 == k2)
66 | if b is not None:
67 | assert (b['shape'][0] == n)
68 | t1 = x['dtype']
69 | t2 = w['dtype']
70 | assert (t1 == t2)
71 |
72 | # X, W, B
73 | self.x = x['shape']
74 | self.w = w['shape']
75 | self.b = b['shape'] if b is not None else None
76 | self.type = t1
77 |
78 | # M, N, K
79 | #n = Utility.numElems(x[0:-1])
80 | n = self.x[0:-1]
81 | k = self.x[-1]
82 | m, k1 = self.w
83 | assert (k == k1)
84 |
85 | self.m = m
86 | self.n = n
87 | self.k = k
88 |
89 | def tc(self):
90 | if self.op() == "linear":
91 | if self.name in TC_Whitelist():
92 | return 1
93 | return 0
94 | else:
95 | return "-"
96 |
97 | def __init__(self, d):
98 | self.name = d.name
99 | self.dir = d.dir
100 | self.sub = d.sub
101 |
102 | marker = eval(d.argMarker[0])
103 | mod = marker['mod']
104 | op = marker['op']
105 | args = marker['args']
106 |
107 | assert (mod == "torch.nn.functional")
108 | assert (op == "linear")
109 |
110 | self.setXWBMNK(args)
111 |
112 | if any(x in d.name for x in Linear.gemmKernels):
113 | self.op_ = "linear"
114 | else:
115 | assert any(x in d.name for x in Linear.biasKernels), f"Kernel name: {d.name}"
116 | self.op_ = "bias"
117 | '''
118 | elif (("kernelPointwiseApply2" in d.name) or ("kernelReduceContigDim" in d.name) or ("kernelReduceNoncontigDim_shared" in d.name)):
119 | #bias expansion was before the gemm
120 | self.op_ = "bias"
121 |
122 | elif ("elementwise_kernel" in d.name):
123 | #Bias addition happens later with a broadcast tensor
124 | self.op_ = "bias"
125 | assert (len(d.argMarker) == 2)
126 | marker = eval(d.argMarker[1])
127 | mod = marker['mod']
128 | op = marker['op']
129 | args = marker['args']
130 |
131 | assert (mod == "Tensor")
132 | assert (op == "__iadd__")
133 | assert (len(args) == 2)
134 | mn = args[0]['shape']
135 | b = args[1]['shape']
136 | assert (len(b) == 1)
137 |
138 | assert (mn == (self.n + (self.m,)))
139 | assert (b == self.b)
140 |
141 | else:
142 | assert False
143 | '''
144 |
145 | def params(self):
146 | #p = OrderedDict([('X', self.x), ('W', self.w), ('B', self.b), ('type', self.type)])
147 |
148 | m, n, k, x, w, t = self.m, self.n, self.k, self.x, self.w, self.type
149 | if len(n) == 1:
150 | n = n[0]
151 |
152 | if self.op_ == "linear":
153 | if self.dir == "fprop":
154 | p = OrderedDict([('M', m), ('N', n), ('K', k), ('type', t)])
155 | elif self.dir == "bprop":
156 | if self.sub == 0: #dgrad (most likely)
157 | p = OrderedDict([('M', k), ('N', n), ('K', m), ('type', t)])
158 | elif self.sub == 1: #wgrad (most likely)
159 | p = OrderedDict([('M', k), ('N', m), ('K', n), ('type', t)])
160 | else:
161 | #This happens when there are additional kernels for reduction
162 | p = OrderedDict([('X', x), ('W', w), ('type', t)])
163 | else:
164 | assert False
165 |
166 | elif self.op_ == "bias":
167 | p = OrderedDict([('M', m), ('N', n), ('type', t)])
168 | else:
169 | assert False
170 | return p
171 |
172 | def op(self):
173 | return self.op_
174 |
175 | def bytesFlops(self):
176 |
177 | m = self.m
178 | n = Utility.numElems(self.n)
179 | k = self.k
180 |
181 | if self.op_ == "linear":
182 | if self.dir == "fprop":
183 | f = m * n * k * 2
184 | b = m * n + m * k + n * k * Utility.typeToBytes(self.type)
185 | elif self.dir == "bprop":
186 | if self.sub == 0: #dgrad (most likely)
187 | f = m * n * k * 2
188 | b = m * n + m * k + n * k * Utility.typeToBytes(self.type)
189 | elif self.sub == 1: #wgrad (most likely)
190 | f = m * n * k * 2
191 | b = m * n + m * k + n * k * Utility.typeToBytes(self.type)
192 | else:
193 | #This happens when there are additional kernels for reduction
194 | f = 0
195 | b = 0
196 | else:
197 | assert False
198 |
199 | elif self.op_ == "bias":
200 | f = m * n
201 | b = 2 * m * n * Utility.typeToBytes(self.type)
202 | else:
203 | assert False
204 | return b, f
205 |
206 | # TODO: Fix bytes and flops with CUTLASS kernels.
207 | def bytes(self):
208 | b, f = self.bytesFlops()
209 | return b
210 |
211 | def flops(self):
212 | b, f = self.bytesFlops()
213 | return f
214 |
215 | def mod(self):
216 | return self.mod_
217 |
--------------------------------------------------------------------------------
/pyprof/prof/loss.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # -*- coding: utf-8 -*-
3 |
4 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
5 | #
6 | # Licensed under the Apache License, Version 2.0 (the "License");
7 | # you may not use this file except in compliance with the License.
8 | # You may obtain a copy of the License at
9 | #
10 | # http://www.apache.org/licenses/LICENSE-2.0
11 | #
12 | # Unless required by applicable law or agreed to in writing, software
13 | # distributed under the License is distributed on an "AS IS" BASIS,
14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | # See the License for the specific language governing permissions and
16 | # limitations under the License.
17 |
18 | from collections import OrderedDict
19 | from .utility import Utility
20 | from .base import OperatorLayerBase
21 |
22 | #TODO: Add support for additional loss functions.
23 |
24 |
25 | class MSELoss(OperatorLayerBase):
26 |
27 | def __init__(self, d):
28 | marker = eval(d.argMarker[0])
29 | mod = marker['mod']
30 | op = marker['op']
31 | args = marker['args']
32 |
33 | self.marker = marker
34 | self.mod_ = mod
35 | self.op_ = op
36 | self.args = args
37 |
38 | assert (mod == "torch.nn.functional")
39 | assert (op == "mse_loss")
40 | assert (len(args) == 3)
41 |
42 | #Get input, target and reduction
43 | if (args[0]['name'] == ""):
44 | x = args[0]
45 | else:
46 | x = list(filter(lambda x: x['name'] == "input", args))[0]
47 |
48 | if (args[1]['name'] == ""):
49 | y = args[1]
50 | else:
51 | y = list(filter(lambda x: x['name'] == "target", args))[0]
52 |
53 | if (args[2]['name'] == ""):
54 | r = args[2]
55 | else:
56 | r = list(filter(lambda x: x['name'] == "reduction", args))[0]
57 |
58 | assert (x['type'] == y['type'] == "tensor")
59 | assert (x['shape'] == y['shape'])
60 | assert (x['dtype'] == y['dtype'])
61 | assert (r['type'] == "str")
62 | assert (r['value'] in ["none", "mean", "sum"])
63 |
64 | self.shape = x['shape']
65 | self.type = x['dtype']
66 | self.red = r['value']
67 | self.dir = d.dir
68 |
69 | def params(self):
70 | p = OrderedDict([('T', self.shape), ('type', self.type), ('red', self.red)])
71 | return p
72 |
73 | def elems(self):
74 | red = self.red
75 | e = Utility.numElems(self.shape)
76 |
77 | if self.dir == "fprop":
78 | if red == "none":
79 | e *= 3
80 | else:
81 | e *= 2
82 | else:
83 | if red == "none":
84 | e *= 4
85 | else:
86 | e *= 3
87 | return e
88 |
89 | def bytes(self):
90 | return self.elems() * Utility.typeToBytes(self.type)
91 |
92 | def flops(self):
93 | return self.elems() * 2 + 1
94 |
95 | def tc(self):
96 | return "-"
97 |
98 | def op(self):
99 | return self.op_
100 |
101 | def mod(self):
102 | return self.mod_
103 |
--------------------------------------------------------------------------------
/pyprof/prof/memory.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | # Copyright (c) 2020, Aditya Agrawal.
4 | #
5 | # Licensed under the Apache License, Version 2.0 (the "License");
6 | # you may not use this file except in compliance with the License.
7 | # You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 |
17 | from .base import OperatorLayerBase
18 | from .tensor import Tensor
19 |
20 | def readMarker(d):
21 | marker = eval(d.argMarker[0])
22 | return marker['mod'], marker['op'], marker['args']
23 |
24 | class OneZero(OperatorLayerBase):
25 | """
26 | Support for torch.ones, torch.zeros etc.
27 | Fill a tensor with ones or zeros.
28 | """
29 |
30 | ops = ["ones", "ones_like", "zero_", "zeros", "zeros_like"]
31 |
32 | def __init__(self, d):
33 | mod, op, args = readMarker(d)
34 | assert mod in ["torch", "Tensor"]
35 | assert op in OneZero.ops
36 |
37 | self.mod_ = mod
38 | self.op_ = op
39 |
40 | # For ones_like, zero_, zeros_like, the input is a tensor.
41 | if op in ["ones_like", "zero_", "zeros_like"]:
42 | assert(len(args) == 1)
43 | arg = args[0]
44 | self.input = Tensor(arg['shape'], arg['dtype'])
45 |
46 | # For ones and zeros, the input can be a list, tuple, sequence of integers.
47 | # E.g. torch.ones((3,5,6)) or torch.ones([3,5,6]) or torch.ones(3,5,6)
48 | else:
49 | assert op in ["ones", "zeros"]
50 | # TODO: Assume the output dtype is float
51 | if args[0]['type'] in ['list', 'tuple']:
52 | assert(len(args) == 1)
53 | self.input = Tensor(args[0]['value'], "float")
54 | elif args[0]['type'] == "int":
55 | # Get all unnamed arguments of type int
56 | args = list(filter(lambda x: x['name'] == "" and x['type'] == "int", args))
57 | shape = [x['value'] for x in args]
58 | self.input = Tensor(shape, "float")
59 | else:
60 | assert False
61 |
62 | def params(self):
63 | return str(self.input)
64 |
65 | def tc(self):
66 | return "-"
67 |
68 | def op(self):
69 | return self.op_
70 |
71 | def mod(self):
72 | return self.mod_
73 |
74 | def bytes(self):
75 | return self.input.bytes
76 |
77 | def flops(self):
78 | return 0
79 |
80 | class Fill(OperatorLayerBase):
81 | """
82 | Support for torch.fill_.
83 | Fill a tensor with a specific value.
84 | """
85 |
86 | def __init__(self, d):
87 | mod, op, args = readMarker(d)
88 | assert mod == "Tensor"
89 | assert op == "fill_"
90 |
91 | self.mod_ = mod
92 | self.op_ = op
93 |
94 | assert(len(args) == 2)
95 | arg = args[0]
96 | self.input = Tensor(arg['shape'], arg['dtype'])
97 |
98 | def params(self):
99 | return str(self.input)
100 |
101 | def tc(self):
102 | return "-"
103 |
104 | def op(self):
105 | return self.op_
106 |
107 | def mod(self):
108 | return self.mod_
109 |
110 | def bytes(self):
111 | return self.input.bytes
112 |
113 | def flops(self):
114 | return 0
115 |
116 | class Full(OperatorLayerBase):
117 | """
118 | Support for torch.full.
119 | Create a tensor of specified size and filled with a specified value.
120 | """
121 |
122 | def __init__(self, d):
123 | mod, op, args = readMarker(d)
124 | assert mod == "torch"
125 | assert op == "full"
126 |
127 | self.mod_ = mod
128 | self.op_ = op
129 |
130 | assert(len(args) == 2)
131 | arg1, arg2 = args
132 | assert arg1['type'] in ['list', 'tuple']
133 | # TODO: Add more types for arg2
134 | assert arg2['type'] in ['float', 'int']
135 | self.output = Tensor(arg1['value'], arg2['type'])
136 |
137 | def params(self):
138 | return str(self.output)
139 |
140 | def tc(self):
141 | return "-"
142 |
143 | def op(self):
144 | return self.op_
145 |
146 | def mod(self):
147 | return self.mod_
148 |
149 | def bytes(self):
150 | return self.output.bytes
151 |
152 | def flops(self):
153 | return 0
154 |
--------------------------------------------------------------------------------
/pyprof/prof/misc.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # -*- coding: utf-8 -*-
3 |
4 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
5 | #
6 | # Licensed under the Apache License, Version 2.0 (the "License");
7 | # you may not use this file except in compliance with the License.
8 | # You may obtain a copy of the License at
9 | #
10 | # http://www.apache.org/licenses/LICENSE-2.0
11 | #
12 | # Unless required by applicable law or agreed to in writing, software
13 | # distributed under the License is distributed on an "AS IS" BASIS,
14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | # See the License for the specific language governing permissions and
16 | # limitations under the License.
17 |
18 | from collections import OrderedDict
19 | from .utility import Utility
20 | from .base import OperatorLayerBase
21 |
22 |
23 | class Foo(OperatorLayerBase):
24 | """
25 | An object of Foo is instantiated when we detect an unsupported operator.
26 | """
27 |
28 | def __init__(self, d):
29 | marker = eval(d.argMarker[0])
30 | mod = marker['mod']
31 | op = marker['op']
32 | args = marker['args']
33 |
34 | self.marker = marker
35 | self.mod_ = mod
36 | self.op_ = op
37 | self.args = args
38 |
39 | shapes = []
40 | types = []
41 |
42 | for arg in args:
43 | if arg['type'] == "tensor":
44 | shapes.append(arg['shape'])
45 | types.append(arg['dtype'])
46 |
47 | self.shape = shapes
48 | self.type = types
49 |
50 | def params(self):
51 | p = OrderedDict([('T', self.shape), ('type', self.type)])
52 | return p
53 |
54 | def tc(self):
55 | return "-"
56 |
57 | def op(self):
58 | return self.op_
59 |
60 | def mod(self):
61 | return self.mod_
62 |
63 | def flops(self):
64 | return 0
65 |
66 | def bytes(self):
67 | return 0
68 |
69 |
70 | class Copy(OperatorLayerBase):
71 |
72 | def __init__(self, d):
73 | marker = eval(d.argMarker[0])
74 | mod = marker['mod']
75 | op = marker['op']
76 | args = marker['args']
77 |
78 | self.marker = marker
79 | self.mod_ = mod
80 | self.op_ = op
81 | self.args = args
82 |
83 | assert (mod == "Tensor")
84 | assert (op == "copy_")
85 | assert (len(args) == 2)
86 |
87 | dst, src = args
88 | assert (src['type'] == dst['type'])
89 | assert (src['shape'] == dst['shape'])
90 |
91 | self.shape = src['shape']
92 | self.stype = src['dtype']
93 | self.dtype = dst['dtype']
94 |
95 | def params(self):
96 | #The data type might be different
97 | p = OrderedDict([('T', self.shape), ('stype', self.stype), ('dtype', self.dtype)])
98 | return p
99 |
100 | def tc(self):
101 | return "-"
102 |
103 | def op(self):
104 | return self.op_
105 |
106 | def mod(self):
107 | return self.mod_
108 |
109 | def flops(self):
110 | return 0
111 |
112 | def elems(self):
113 | return Utility.numElems(self.shape)
114 |
115 | def bytes(self):
116 | return self.elems() * (Utility.typeToBytes(self.stype) + Utility.typeToBytes(self.dtype))
117 |
118 |
119 | class Clone(OperatorLayerBase):
120 |
121 | def __init__(self, d):
122 | marker = eval(d.argMarker[0])
123 | mod = marker['mod']
124 | op = marker['op']
125 | args = marker['args']
126 |
127 | self.marker = marker
128 | self.mod_ = mod
129 | self.op_ = op
130 | self.args = args
131 |
132 | assert (mod == "Tensor")
133 | assert (op == "clone")
134 | assert (len(args) == 1)
135 | t = args[0]
136 | self.shape = t['shape']
137 | self.type = t['dtype']
138 |
139 | def params(self):
140 | p = OrderedDict([('T', self.shape), ('type', self.type)])
141 | return p
142 |
143 | def flops(self):
144 | return 0
145 |
146 | def tc(self):
147 | return "-"
148 |
149 | def op(self):
150 | return self.op_
151 |
152 | def mod(self):
153 | return self.mod_
154 |
155 | def elems(self):
156 | return Utility.numElems(self.shape)
157 |
158 | def bytes(self):
159 | return 2 * self.elems() * Utility.typeToBytes(self.type)
160 |
161 |
162 | class Contiguous(OperatorLayerBase):
163 |
164 | def __init__(self, d):
165 | marker = eval(d.argMarker[0])
166 | mod = marker['mod']
167 | op = marker['op']
168 | args = marker['args']
169 |
170 | self.marker = marker
171 | self.mod_ = mod
172 | self.op_ = op
173 | self.args = args
174 |
175 | assert (mod == "Tensor")
176 | assert (op == "contiguous")
177 | assert (len(args) == 1)
178 | t = args[0]
179 | self.shape = t['shape']
180 | self.type = t['dtype']
181 |
182 | def params(self):
183 | p = OrderedDict([('T', self.shape), ('type', self.type)])
184 | return p
185 |
186 | def flops(self):
187 | return 0
188 |
189 | def bytes(self):
190 | return 2 * Utility.numElems(self.shape) * Utility.typeToBytes(self.type)
191 |
192 | def tc(self):
193 | return "-"
194 |
195 | def op(self):
196 | return self.op_
197 |
198 | def mod(self):
199 | return self.mod_
200 |
201 |
202 | class Any(OperatorLayerBase):
203 |
204 | def __init__(self, d):
205 | marker = eval(d.argMarker[0])
206 | mod = marker['mod']
207 | op = marker['op']
208 | args = marker['args']
209 |
210 | self.marker = marker
211 | self.mod_ = mod
212 | self.op_ = op
213 | self.args = args
214 |
215 | assert (mod == "Tensor")
216 | assert (op == "any")
217 | assert (len(args) in [1,2])
218 | t = args[0]
219 | # The input can be a tensor or scalar
220 | assert (t['type'] in ["tensor", "bool"])
221 |
222 | if t['type'] == "tensor":
223 | self.shape = t['shape']
224 | self.type = t['dtype']
225 | else:
226 | self.shape = (1,)
227 | self.type = t['type']
228 |
229 | self.sub = d.sub
230 | return
231 |
232 | def params(self):
233 | p = OrderedDict([('T', self.shape), ('type', self.type)])
234 | return p
235 |
236 | def op(self):
237 | return self.op_
238 |
239 | def mod(self):
240 | return self.mod_
241 |
242 | def tc(self):
243 | return "-"
244 |
245 | def flops(self):
246 | return 0
247 |
248 | def bytes(self):
249 | return Utility.numElems(self.shape) * Utility.typeToBytes(self.type)
250 |
--------------------------------------------------------------------------------
/pyprof/prof/normalization.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # -*- coding: utf-8 -*-
3 |
4 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
5 | #
6 | # Licensed under the Apache License, Version 2.0 (the "License");
7 | # you may not use this file except in compliance with the License.
8 | # You may obtain a copy of the License at
9 | #
10 | # http://www.apache.org/licenses/LICENSE-2.0
11 | #
12 | # Unless required by applicable law or agreed to in writing, software
13 | # distributed under the License is distributed on an "AS IS" BASIS,
14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | # See the License for the specific language governing permissions and
16 | # limitations under the License.
17 |
18 | from .base import OperatorLayerBase
19 | from .tensor import Tensor
20 |
21 |
22 | class BatchNorm(OperatorLayerBase):
23 |
24 | def __init__(self, d):
25 | marker = eval(d.argMarker[0])
26 | mod = marker['mod']
27 | op = marker['op']
28 | args = marker['args']
29 |
30 | self.mod_ = mod
31 | self.op_ = op
32 |
33 | assert (op == "batch_norm")
34 | assert (len(args) >= 1)
35 | i = args[0]
36 | assert (i['type'] == "tensor")
37 |
38 | self.input = Tensor(i['shape'], i['dtype'])
39 | self.dir = d.dir
40 | self.sub = d.sub
41 |
42 | def params(self):
43 | return str(self.input)
44 |
45 | def tc(self):
46 | return "-"
47 |
48 | def op(self):
49 | return self.op_
50 |
51 | def mod(self):
52 | return self.mod_
53 |
54 | def flops(self):
55 | # Variance algo-dependent, but this is a reasonable value.
56 | return self.input.size * 8
57 |
58 | def bytes(self):
59 | b = self.input.bytes
60 | # fprop is 2 reads, 2 writes
61 | # bprop is 4 reads, 1 write
62 | if self.dir == "fprop":
63 | b *= 4
64 | else:
65 | b *= 5
66 |
67 | if self.sub > 0:
68 | return 0
69 | else:
70 | return b
71 |
--------------------------------------------------------------------------------
/pyprof/prof/optim.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # -*- coding: utf-8 -*-
3 |
4 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
5 | #
6 | # Licensed under the Apache License, Version 2.0 (the "License");
7 | # you may not use this file except in compliance with the License.
8 | # You may obtain a copy of the License at
9 | #
10 | # http://www.apache.org/licenses/LICENSE-2.0
11 | #
12 | # Unless required by applicable law or agreed to in writing, software
13 | # distributed under the License is distributed on an "AS IS" BASIS,
14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | # See the License for the specific language governing permissions and
16 | # limitations under the License.
17 |
18 | from collections import OrderedDict
19 | from .utility import Utility
20 | from .base import OperatorLayerBase
21 |
22 | #TODO: Add support for other optimizers.
23 |
24 |
25 | class Adam(OperatorLayerBase):
26 |
27 | def __init__(self, d):
28 | marker = eval(d.argMarker[0])
29 | mod = marker['mod']
30 | op = marker['op']
31 | args = marker['args']
32 |
33 | self.marker = marker
34 | self.mod_ = mod
35 | self.op_ = op
36 | self.args = args
37 | self.sub = d.sub
38 |
39 | assert (op == "adam")
40 | assert (len(args) == 12) or (len(args) == 14)
41 | w, hw, m, v, g = args[0:5]
42 | assert (w['shape'] == m['shape'] == v['shape'] == g['shape'])
43 | assert (hw['shape'] == w['shape']) or (hw['shape'] == (0, )) #hw could be null
44 | assert (w['type'] == m['type'] == v['type'] == g['type'] == hw['type'] == "tensor")
45 | assert (w['dtype'] == m['dtype'] == v['dtype'] == "float32")
46 |
47 | self.w = w
48 | self.g = g
49 |
50 | def params(self):
51 | p = OrderedDict([('T', self.w['shape']), ('wtype', self.w['dtype']), ('gtype', self.g['dtype'])])
52 | return p
53 |
54 | def flops(self):
55 | return 0
56 |
57 | def bytes(self):
58 | wshape = self.w['shape']
59 | wtype = self.w['dtype']
60 | gtype = self.g['dtype']
61 | b = 0
62 |
63 | elems = Utility.numElems(wshape)
64 |
65 | #Get time to stream read/write w, m, v
66 | b += 6 * elems * Utility.typeToBytes(wtype)
67 |
68 | #Get time to read "g"
69 | b += elems * Utility.typeToBytes(gtype)
70 |
71 | if wtype != gtype: #mixed precision
72 | #Get time to write "hw
73 | b += elems * Utility.typeToBytes(gtype)
74 |
75 | return b if (self.sub == 0) else 0
76 |
77 | def tc(self):
78 | return "-"
79 |
80 | def op(self):
81 | return self.op_
82 |
83 | def mod(self):
84 | return self.mod_
85 |
--------------------------------------------------------------------------------
/pyprof/prof/output.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # -*- coding: utf-8 -*-
3 |
4 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
5 | #
6 | # Licensed under the Apache License, Version 2.0 (the "License");
7 | # you may not use this file except in compliance with the License.
8 | # You may obtain a copy of the License at
9 | #
10 | # http://www.apache.org/licenses/LICENSE-2.0
11 | #
12 | # Unless required by applicable law or agreed to in writing, software
13 | # distributed under the License is distributed on an "AS IS" BASIS,
14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | # See the License for the specific language governing permissions and
16 | # limitations under the License.
17 |
18 | import errno, os, sys
19 |
20 |
21 | class Output():
22 | """
23 | This class handles printing of a columed output and a CSV.
24 | """
25 |
26 | # The table below is organized as
27 | # user_option: [output_header, attribute_in_Data_class, type, min_width_in_columed_output]
28 | table = {
29 | "idx": ["Idx", "index", int, 7],
30 | "seq": ["SeqId", "seqId", str, 7],
31 | "altseq": ["AltSeqId", "altSeqId", str, 7],
32 | "tid": ["TId", "tid", int, 12],
33 | "layer": ["Layer", "layer", str, 10],
34 | "trace": ["Trace", "trace", str, 25],
35 | "dir": ["Direction", "dir", str, 5],
36 | "sub": ["Sub", "sub", int, 3],
37 | "mod": ["Module", "mod", str, 15],
38 | "op": ["Op", "op", str, 15],
39 | "kernel": ["Kernel", "name", str, 0],
40 | "params": ["Params", "params", str, 0],
41 | "sil": ["Sil(ns)", "sil", int, 10],
42 | "tc": ["TC", "tc", str, 2],
43 | "device": ["Device", "device", int, 3],
44 | "stream": ["Stream", "stream", int, 3],
45 | "grid": ["Grid", "grid", str, 12],
46 | "block": ["Block", "block", str, 12],
47 | "flops": ["FLOPs", "flops", int, 12],
48 | "bytes": ["Bytes", "bytes", int, 12]
49 | }
50 |
51 | def __init__(self, args):
52 | self.cols = args.c
53 | self.csv = args.csv
54 | self.col = True if (args.w > 0) else False
55 | self.width = args.w
56 |
57 | w = 0
58 | for col in self.cols:
59 | assert col in Output.table.keys()
60 | w += Output.table[col][3]
61 |
62 | if ((self.col) and (w > self.width)):
63 | print("Minimum width required to print {} = {}. Exiting.".format(",".join(self.cols), w))
64 | sys.exit(1)
65 |
66 | remainder = self.width - w
67 |
68 | if ("kernel" in self.cols) and ("params" in self.cols):
69 | Output.table["kernel"][3] = int(remainder / 2)
70 | Output.table["params"][3] = int(remainder / 2)
71 | elif ("kernel" in self.cols):
72 | Output.table["kernel"][3] = remainder
73 | elif ("params" in self.cols):
74 | Output.table["params"][3] = remainder
75 |
76 | #header format
77 | cadena = ""
78 | for col in self.cols:
79 | _, _, t, w = Output.table[col]
80 | cadena += "%-{}.{}s ".format(w, w)
81 |
82 | self.hFormat = cadena
83 |
84 | #data format
85 | cadena = ""
86 | for col in self.cols:
87 | _, _, t, w = Output.table[col]
88 | if (t == str):
89 | cadena += "%-{}.{}s ".format(w, w)
90 | elif (t == int):
91 | cadena += "%{}d ".format(w)
92 |
93 | self.dFormat = cadena
94 |
95 | def foo(self, cadena, pformat):
96 | if self.csv:
97 | cadena = ",".join(map(lambda x: '"' + str(x) + '"', cadena))
98 | elif self.col:
99 | cadena = pformat % cadena
100 | else:
101 | cadena = " ".join(map(str, cadena))
102 |
103 | try:
104 | print(cadena)
105 | except IOError as e:
106 | #gracefully handle pipes
107 | if e.errno == errno.EPIPE:
108 | # Python flushes standard streams on exit; redirect remaining output
109 | # to devnull to avoid another BrokenPipeError at shutdown
110 |
111 | devnull = os.open(os.devnull, os.O_WRONLY)
112 | os.dup2(devnull, sys.stdout.fileno())
113 | sys.exit(0)
114 | else:
115 | sys.exit(-1)
116 |
117 | def header(self):
118 | cadena = ()
119 | for col in self.cols:
120 | h = Output.table[col][0]
121 | cadena = cadena + (h, )
122 |
123 | self.foo(cadena, self.hFormat)
124 |
125 | def data(self, a):
126 | if a.dir == "":
127 | direc = "na"
128 | else:
129 | direc = a.dir
130 |
131 | if a.op == "":
132 | op = "na"
133 | else:
134 | op = a.op
135 |
136 | if a.mod == "":
137 | mod = "na"
138 | else:
139 | mod = a.mod
140 |
141 | cadena = ()
142 | for col in self.cols:
143 | attr = Output.table[col][1]
144 | val = getattr(a, attr)
145 |
146 | if col == "layer":
147 | assert (type(val) == list)
148 | val = ":".join(val)
149 | val = "-" if val == "" else val
150 |
151 | if col == "trace":
152 | assert (type(val) == list)
153 | if self.col and len(val):
154 | val = val[-1]
155 | val = val.split("/")[-1]
156 | else:
157 | val = ",".join(val)
158 | val = "-" if val == "" else val
159 |
160 | if col in ["seq", "altseq"]:
161 | assert (type(val) == list)
162 | val = ",".join(map(str, val))
163 | val = "-" if val == "" else val
164 |
165 | cadena = cadena + (val, )
166 |
167 | self.foo(cadena, self.dFormat)
168 |
--------------------------------------------------------------------------------
/pyprof/prof/pointwise.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # -*- coding: utf-8 -*-
3 |
4 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
5 | #
6 | # Licensed under the Apache License, Version 2.0 (the "License");
7 | # you may not use this file except in compliance with the License.
8 | # You may obtain a copy of the License at
9 | #
10 | # http://www.apache.org/licenses/LICENSE-2.0
11 | #
12 | # Unless required by applicable law or agreed to in writing, software
13 | # distributed under the License is distributed on an "AS IS" BASIS,
14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | # See the License for the specific language governing permissions and
16 | # limitations under the License.
17 |
18 | import numpy as np
19 | from .base import OperatorLayerBase
20 | from .tensor import Tensor
21 | from functools import reduce
22 | import operator
23 |
24 | class Pointwise(OperatorLayerBase):
25 |
26 | # TODO: Add more operators.
27 | # TODO: Determining the output dtype is tricky.
28 | # TODO: Refine calculations based on direction.
29 | # TODO: Refine calculations for non-arithmetic ops.
30 |
31 | # Unary
32 | unary = ["abs", "abs_", "neg", "neg_", "reciprocal", "reciprocal_"]
33 | unary += ["__abs__", "__neg__"]
34 |
35 | # Unary bitwise
36 | unary += ["__invert__"]
37 |
38 | # Exponential and log (unary)
39 | exp_log = ["exp", "exp_", "exp1m", "exp1m_", "log", "log_",
40 | "log10", "log10_", "log1p", "log1p_", "log2", "log2_"]
41 |
42 | # Sqrt (unary)
43 | sqrt = ["rsqrt", "rsqrt_", "sqrt", "sqrt_"]
44 |
45 | # Representation (unary)
46 | representation = ["ceil", "ceil_", "clamp", "clamp_", "floor", "floor_",
47 | "frac", "frac_", "round", "round_", "sign", "sign_",
48 | "trunc", "trunc_"]
49 |
50 | # Trigonometric and transcendental (unary)
51 | trig_trans = ["acos", "acos_", "asin", "asin_", "atan", "atan_",
52 | "atan2", "atan2_", "cos", "cos_", "cosh", "cosh_",
53 | "sin", "sin_", "sinh", "sinh_", "tan", "tan_",
54 | "sigmoid", "sigmoid_", "tanh", "tanh_"]
55 |
56 | # Error (unary)
57 | error = ["erf", "erf_", "erfc", "erfc_", "erfinv", "erfinv_"]
58 |
59 | # Binary
60 | binary = ["add", "add_", "div", "div_", "mul", "mul_",
61 | "remainder", "remainder_", "sub", "sub_"]
62 | binary += ["__add__", "__sub__", "__mul__", "__floordiv__",
63 | "__truediv__", "__mod__"]
64 | binary += ["__radd__", "__rsub__", "__rmul__", "__rdiv__",
65 | "__rtruediv__", "__rfloordiv__"]
66 | binary += ["fmod", "fmod_"]
67 |
68 | # Binary inplace
69 | ibinary = ["__iadd__", "__isub__", "__imul__", "__itruediv__"]
70 |
71 | # Power (binary)
72 | power = ["pow", "pow_", "__pow__", "__rpow__"]
73 |
74 | # Comparison (binary)
75 | comp = ["lt", "lt_", "gt", "gt_", "ge", "ge_", "le", "le_",
76 | "eq", "eq_", "ne", "ne_"]
77 | comp += ["__lt__", "__gt__", "__ge__", "__le__", "__eq__", "__ne__"]
78 |
79 | # Logical (binary)
80 | logical = ["__and__", "__or__", "__xor__", "__lshift__", "__rshift__"]
81 |
82 | # Logical inplace (binary)
83 | ilogical = ["__iand__", "__ior__", "__ixor__", "__ilshift__", "__irshift__"]
84 |
85 | # Ternary
86 | ternary = ["addcdiv", "addcdiv_", "addcmul", "addcmul_"]
87 |
88 | # Misc
89 | misc = ["digamma", "lerp", "lerp_", "mvlgamma"]
90 |
91 | ops = unary + binary + ibinary + comp + logical + ilogical + \
92 | ternary + exp_log + power + sqrt + representation + trig_trans + \
93 | error + misc
94 |
95 | def __init__(self, d):
96 | marker = eval(d.argMarker[0])
97 | mod = marker['mod']
98 | op = marker['op']
99 | args = marker['args']
100 |
101 | self.marker = marker
102 | self.mod_ = mod
103 | self.op_ = op
104 | self.args = args
105 |
106 | self.dir = d.dir
107 | assert (d.dir in ["fprop", "bprop"])
108 | assert (op in Pointwise.ops)
109 |
110 | # Filter out all named parameters (kwargs).
111 | # This might require revisiting in future.
112 | args = list(filter(lambda x: x['name'] == "", args))
113 |
114 | # Filter out non tensors
115 | #args = list(filter(lambda x: x['type'] == "tensor", args))
116 |
117 | assert (len(args) <= 4)
118 | self.input = []
119 |
120 | for arg in args:
121 | t = arg['type']
122 | if (t == "tensor"):
123 | tensor = Tensor(arg['shape'], arg['dtype'])
124 | elif t in ['float', 'int']:
125 | tensor = Tensor([], t)
126 | else:
127 | assert False
128 |
129 | self.input.append(tensor)
130 |
131 | def params(self):
132 | return ";".join([str(t) for t in self.input])
133 |
134 | def tc(self):
135 | return "-"
136 |
137 | def op(self):
138 | return self.op_
139 |
140 | def mod(self):
141 | return self.mod_
142 |
143 | def bytes_flops(self):
144 | b = f = 0
145 |
146 | # Unary
147 | if self.op() in Pointwise.unary + Pointwise.representation:
148 | # Relaxing assert. clamp has > 1 input arguments.
149 | assert (len(self.input) >= 1)
150 | b = 2 * self.input[0].bytes
151 | f = self.input[0].size
152 |
153 | elif self.op() in Pointwise.exp_log + Pointwise.trig_trans + \
154 | Pointwise.sqrt + Pointwise.error:
155 | assert (len(self.input) == 1)
156 | b = 2 * self.input[0].bytes
157 | f = self.input[0].size * 20 # estimate
158 |
159 | # Binary
160 | elif self.op() in Pointwise.comp + \
161 | Pointwise.binary + Pointwise.ibinary + \
162 | Pointwise.logical + Pointwise.ilogical:
163 |
164 | assert (len(self.input) == 2)
165 | out = Tensor.broadcast(self.input)
166 |
167 | if self.dir == "fprop":
168 | b = reduce(operator.add, [t.bytes for t in self.input])
169 | # The output of comparison is bool
170 | if self.op() in Pointwise.comp:
171 | out = Tensor(out.shape, "bool")
172 | b += out.bytes
173 | f = out.size
174 | else:
175 | if (self.op() in ["add", "__add__", "sub", "__sub__", "__isub__"]):
176 | b = 2 * out.bytes
177 | f = 0
178 | elif (self.op() in ["__mul__", "__imul__", "__rmul__", "div", "__truediv__"]):
179 | b = 3 * out.bytes
180 | f = out.size
181 | else:
182 | e = f'{self.op()} bprop not supported yet. Please file a bug.'
183 | assert False, e
184 |
185 | elif self.op() in Pointwise.power:
186 | assert (len(self.input) == 2)
187 | out = Tensor.broadcast(self.input)
188 | b = reduce(operator.add, [t.bytes for t in self.input])
189 | b += out.bytes
190 | f = out.size * 20 # estimate
191 |
192 | # Ternary
193 | elif self.op() in Pointwise.ternary:
194 | # Remove scalars
195 | tensors = list(filter(lambda x: x.shape != [], self.input))
196 | assert len(tensors) == 3
197 | out = Tensor.broadcast(tensors)
198 | b = reduce(operator.add, [t.bytes for t in tensors])
199 | b += out.bytes
200 | f = 3 * out.size
201 |
202 | else:
203 | e = f'{self.op()} not supported yet. Please file a bug.'
204 | assert False, e
205 |
206 | return b, f
207 |
208 | def bytes(self):
209 | b, f = self.bytes_flops()
210 | return b
211 |
212 | def flops(self):
213 | b, f = self.bytes_flops()
214 | return f
215 |
--------------------------------------------------------------------------------
/pyprof/prof/pooling.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # -*- coding: utf-8 -*-
3 |
4 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
5 | #
6 | # Licensed under the Apache License, Version 2.0 (the "License");
7 | # you may not use this file except in compliance with the License.
8 | # You may obtain a copy of the License at
9 | #
10 | # http://www.apache.org/licenses/LICENSE-2.0
11 | #
12 | # Unless required by applicable law or agreed to in writing, software
13 | # distributed under the License is distributed on an "AS IS" BASIS,
14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | # See the License for the specific language governing permissions and
16 | # limitations under the License.
17 |
18 | from collections import OrderedDict
19 | from .utility import Utility
20 |
21 | # Work in progress.
22 |
23 |
24 | #poolFuncs = ["max_pool2d_with_indices_forward", "max_pool2d_with_indices"]
25 | class MaxPool2d(object):
26 |
27 | def parse(marker):
28 |
29 | def convert2Tuple(arg):
30 | assert (arg['type'] in ["int", "tuple"])
31 | if arg['type'] == "int":
32 | return (arg['value'], arg['value'])
33 | else:
34 | return arg['value']
35 |
36 | mod = marker['mod']
37 | op = marker['op']
38 | args = marker['args']
39 | assert (mod == "torch.nn.functional")
40 | assert (op == "max_pool2d")
41 | assert (len(args) >= 2)
42 |
43 | #input
44 | assert (args[0]['name'] == "")
45 | inp = args[0]
46 | assert (inp['type'] == "tensor")
47 | i = inp['shape']
48 | t = inp['dtype']
49 | assert (len(i) == 4) #nchw tensor
50 |
51 | #kernel
52 | if (args[1]['name'] == ""):
53 | k = args[1]
54 | else:
55 | k = list(filter(lambda x: x['name'] == "kernel_size", args))[0]
56 | k = convert2Tuple(k)
57 |
58 | #stride
59 | s = k #default value
60 | if ((len(args) >= 3) and args[2] == ""):
61 | s = args[2]
62 | s = convert2Tuple(s)
63 | elif any(x['name'] == "stride" for x in args):
64 | s = list(filter(lambda x: x['name'] == "stride", args))[0]
65 | s = convert2Tuple(s)
66 |
67 | #padding
68 | p = (0, 0)
69 | if ((len(args) >= 4) and args[3] == ""):
70 | p = args[3]
71 | p = convert2Tuple(p)
72 | elif any(x['name'] == "padding" for x in args):
73 | p = list(filter(lambda x: x['name'] == "padding", args))[0]
74 | p = convert2Tuple(p)
75 |
76 | params = OrderedDict([('T', i), ('K', k), ('s', s), ('p', p), ('type', t)])
77 | return params
78 |
--------------------------------------------------------------------------------
/pyprof/prof/prof.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # -*- coding: utf-8 -*-
3 |
4 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
5 | #
6 | # Licensed under the Apache License, Version 2.0 (the "License");
7 | # you may not use this file except in compliance with the License.
8 | # You may obtain a copy of the License at
9 | #
10 | # http://www.apache.org/licenses/LICENSE-2.0
11 | #
12 | # Unless required by applicable law or agreed to in writing, software
13 | # distributed under the License is distributed on an "AS IS" BASIS,
14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | # See the License for the specific language governing permissions and
16 | # limitations under the License.
17 | """
18 | This script reads the output (Python dictionary) created by parse.py.
19 | For every kernel (line) in the input it determines
20 | module / class name e.g. torch.nn.functional
21 | operator name e.g. linear
22 | kernel parameters e.g. GEMM M, N, K, datatype
23 | bytes
24 | flops
25 | tensor core usage
26 | direction (fprop, bprop)
27 | and other things. Please see the tool usage.
28 | """
29 |
30 | from .usage import parseArgs
31 | from .output import Output
32 | from .utility import Utility
33 | from .pointwise import Pointwise
34 | from .convert import Convert
35 | from .blas import *
36 | from .embedding import Embedding
37 | from .reduction import *
38 | from .dropout import Dropout
39 | from .softmax import *
40 | #from pooling import * # work in progress
41 | from .linear import Linear
42 | from .optim import Adam
43 | from .misc import *
44 | from .conv import Conv
45 | from .activation import Activation
46 | from .index_slice_join_mutate import Cat, Reshape, MaskedScatter, Gather, Nonzero, IndexSelect, MaskedSelect
47 | from .recurrentCell import RNNCell
48 | from .normalization import BatchNorm
49 | from .randomSample import RandPerm
50 | from .loss import MSELoss
51 | from .data import Data
52 | from .memory import OneZero, Fill, Full
53 |
54 |
55 | def findFpropKernel(seq):
56 | #Find the last fprop kernel with the same seqId
57 | #First look at seqId and then at altSeqId
58 | for idx in reversed(range(len(kernels))):
59 | k = kernels[idx]
60 | if (seq in k['seqId']) and (k['dir'] == "fprop"):
61 | return idx
62 |
63 | for idx in reversed(range(len(kernels))):
64 | k = kernels[idx]
65 | if (seq in k['altSeqId']) and (k['dir'] == "fprop"):
66 | return idx
67 |
68 | return -1
69 | #print("Error: seqId {} not found.".format(seq), file=sys.stderr)
70 | #assert False
71 |
72 |
73 | def foo(mod, op, d):
74 | if (op[0] == "linear"):
75 | xx = Linear(d)
76 |
77 | # rnncell, lstmcell, grucell
78 | elif (mod[0] in ["LSTMCell", "GRUCell"]) and (op[0] == "forward"):
79 | xx = RNNCell(d)
80 |
81 | elif op[0] in [
82 | "conv1d",
83 | "conv2d",
84 | ]:
85 | xx = Conv(d)
86 |
87 | elif (op[0] in Pointwise.ops):
88 | xx = Pointwise(d)
89 |
90 | elif (op[0] in Convert.ops):
91 | xx = Convert(d)
92 |
93 | elif op[0] in ["__matmul__", "matmul"]:
94 | xx = Matmul(d)
95 |
96 | elif op[0] == "embedding":
97 | xx = Embedding(d)
98 |
99 | #reduction
100 | elif op[0] == "sum":
101 | xx = Sum(d)
102 |
103 | elif op[0] == "mean":
104 | xx = Mean(d)
105 |
106 | elif op[0] == "norm":
107 | xx = Norm(d)
108 |
109 | elif op[0] == "dropout":
110 | xx = Dropout(d)
111 |
112 | #Index, Slice, Join, Mutate
113 | elif (op[0] == "cat"):
114 | xx = Cat(d)
115 |
116 | elif (op[0] == "reshape"):
117 | xx = Reshape(d)
118 |
119 | elif (op[0] == "masked_scatter_"):
120 | xx = MaskedScatter(d)
121 |
122 | elif (op[0] == "gather"):
123 | xx = Gather(d)
124 |
125 | elif (op[0] == "nonzero"):
126 | xx = Nonzero(d)
127 |
128 | elif (op[0] == "index_select"):
129 | xx = IndexSelect(d)
130 |
131 | elif (op[0] == "masked_select"):
132 | xx = MaskedSelect(d)
133 |
134 | #blas
135 | elif op[0] in ["addmm", "addmm_"]:
136 | xx = Addmm(d)
137 |
138 | elif op[0] == "mm":
139 | xx = Mm(d)
140 |
141 | elif op[0] == "bmm":
142 | xx = Bmm(d)
143 |
144 | #softmax
145 | elif op[0] == "softmax":
146 | xx = Softmax(d)
147 |
148 | elif op[0] == "log_softmax":
149 | xx = LogSoftmax(d)
150 |
151 | #loss
152 | elif op[0] == "mse_loss":
153 | xx = MSELoss(d)
154 |
155 | #optimizers
156 | elif op[0] == "adam":
157 | xx = Adam(d)
158 |
159 | #normalization
160 | elif op[0] == "batch_norm":
161 | xx = BatchNorm(d)
162 |
163 | #random
164 | elif op[0] == "randperm":
165 | xx = RandPerm(d)
166 |
167 | #memory
168 | elif op[0] in OneZero.ops:
169 | xx = OneZero(d)
170 |
171 | elif op[0] == "fill_":
172 | xx = Fill(d)
173 |
174 | elif op[0] == "full":
175 | xx = Full(d)
176 |
177 | #misc
178 | elif op[0] == "copy_":
179 | xx = Copy(d)
180 |
181 | elif op[0] == "clone":
182 | xx = Clone(d)
183 |
184 | elif op[0] == "contiguous":
185 | xx = Contiguous(d)
186 |
187 | elif op[0] == "any":
188 | xx = Any(d)
189 |
190 | elif (op[0] in Activation.ops):
191 | xx = Activation(d)
192 |
193 | elif op[0] == "to":
194 | xx = Convert(d)
195 |
196 | else:
197 | xx = Foo(d)
198 |
199 | return xx
200 |
201 |
202 | def main():
203 | #Read cmd line arguments
204 | cmdArgs = parseArgs()
205 |
206 | output = Output(cmdArgs)
207 | output.header()
208 |
209 | idx = -1
210 | #Read in all the kernel info
211 | for line in cmdArgs.file:
212 | idx += 1
213 | kernel = eval(line)
214 | assert (kernel)
215 | kernels.append(kernel)
216 |
217 | k = kernel
218 | d = Data(k)
219 |
220 | mod = k['mod']
221 | op = k['op']
222 |
223 | flops = 0
224 | params = {"na": "na"}
225 | tc = "na"
226 | bytes = 0
227 |
228 | if (d.dir == "bprop"):
229 | d.seqMarker = k['seqMarker']
230 | seq = k['seqId']
231 | if len(seq) > 1:
232 | pass
233 | seq = k['seqId'][:1]
234 | assert (len(seq) == 1), seq
235 | #assert (seq[0] != 0)
236 | assert (len(d.seqMarker) > 0)
237 | #If there is no useful marker associated, use the
238 | #sequence number to find the kernel from fprop
239 | if len(d.argMarker) == 0:
240 | index = findFpropKernel(seq[0])
241 | if index >= 0:
242 | d.argMarker = kernels[index]['marker']
243 | d.modMarker = kernels[index]['reprMarkers']
244 | mod = kernels[index]['mod']
245 | op = kernels[index]['op']
246 |
247 | d.layer = kernels[index]['layer']
248 | d.trace = kernels[index]['trace']
249 |
250 | # Check if marker has our annotations
251 | if len(d.argMarker) and Utility.hasNVTX(d.argMarker[0]):
252 |
253 | xx = foo(mod, op, d)
254 |
255 | bytes = xx.bytes()
256 | flops = xx.flops()
257 | op = xx.op()
258 | params = xx.params()
259 | tc = xx.tc()
260 |
261 | if type(op) is list:
262 | if len(op):
263 | op = op[0]
264 | else:
265 | op = ""
266 |
267 | if type(mod) is list:
268 | if len(mod):
269 | mod = mod[0]
270 | else:
271 | mod = ""
272 |
273 | d.index = idx + 1
274 |
275 | # The following 8 come from operator class functions.
276 | d.setParams(params)
277 | d.tc = tc
278 | d.flops = flops
279 | d.bytes = bytes
280 | d.mod = mod
281 | d.op = op
282 |
283 | output.data(d)
284 |
285 |
286 | kernels = []
287 | if __name__ == '__main__':
288 | main()
289 |
--------------------------------------------------------------------------------
/pyprof/prof/randomSample.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # -*- coding: utf-8 -*-
3 |
4 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
5 | #
6 | # Licensed under the Apache License, Version 2.0 (the "License");
7 | # you may not use this file except in compliance with the License.
8 | # You may obtain a copy of the License at
9 | #
10 | # http://www.apache.org/licenses/LICENSE-2.0
11 | #
12 | # Unless required by applicable law or agreed to in writing, software
13 | # distributed under the License is distributed on an "AS IS" BASIS,
14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | # See the License for the specific language governing permissions and
16 | # limitations under the License.
17 |
18 | from collections import OrderedDict
19 | from .utility import Utility
20 | from .base import OperatorLayerBase
21 |
22 |
23 | class RandPerm(OperatorLayerBase):
24 |
25 | def __init__(self, d):
26 | marker = eval(d.argMarker[0])
27 | mod = marker['mod']
28 | op = marker['op']
29 | args = marker['args']
30 |
31 | self.marker = marker
32 | self.mod_ = mod
33 | self.op_ = op
34 | self.args = args
35 |
36 | assert (mod == "torch")
37 | assert (op == "randperm")
38 | assert (len(args) == 1)
39 | n = args[0]
40 | assert n['type'] == "int"
41 | self.n = n['value']
42 |
43 | def params(self):
44 | p = OrderedDict([('N', self.n)])
45 | return p
46 |
47 | def tc(self):
48 | return "-"
49 |
50 | def op(self):
51 | return self.op_
52 |
53 | def mod(self):
54 | return self.mod_
55 |
56 | def bytes(self):
57 | return self.n * Utility.typeToBytes("int64")
58 |
59 | def flops(self):
60 | # Depends on RNG but this is probably a reasonable assumption.
61 | return self.n * 3
62 |
--------------------------------------------------------------------------------
/pyprof/prof/recurrentCell.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # -*- coding: utf-8 -*-
3 |
4 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
5 | #
6 | # Licensed under the Apache License, Version 2.0 (the "License");
7 | # you may not use this file except in compliance with the License.
8 | # You may obtain a copy of the License at
9 | #
10 | # http://www.apache.org/licenses/LICENSE-2.0
11 | #
12 | # Unless required by applicable law or agreed to in writing, software
13 | # distributed under the License is distributed on an "AS IS" BASIS,
14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | # See the License for the specific language governing permissions and
16 | # limitations under the License.
17 |
18 | from collections import OrderedDict
19 | from .tc import TC_Whitelist
20 | from .utility import Utility
21 | from .base import OperatorLayerBase
22 |
23 |
24 | def hasTileSize(name):
25 | if ("sgemm" in name) or ("884gemm" in name) or ("hgemm" in name):
26 | return True
27 | else:
28 | return False
29 |
30 |
31 | def ctaTile(name):
32 | name = name.split("_")
33 | name = list(filter(lambda x: "x" in x, name))
34 | name = list(filter(lambda x: "slice" not in x, name))
35 | assert (len(name) == 1)
36 | name = name[0].split("x")
37 | assert (len(name) == 2)
38 | name = list(map(int, name))
39 | return name[0], name[1]
40 |
41 |
42 | class RNNCell(OperatorLayerBase):
43 | """
44 | This class supports RNNCell, LSTMCell and GRUCell.
45 | """
46 |
47 | def __init__(self, d):
48 | marker = eval(d.argMarker[0])
49 | mod = marker['mod']
50 | op = marker['op']
51 | args = marker['args']
52 |
53 | self.marker = marker
54 | self.mod_ = mod
55 | self.op_ = op
56 | self.args = args
57 |
58 | self.name = d.name
59 | self.dir = d.dir
60 | self.sub = d.sub
61 | self.grid = d.grid
62 |
63 | assert (op == "forward")
64 | assert (mod in ["LSTMCell", "GRUCell", "RNNCell"])
65 | assert (len(args) in [2, 3])
66 |
67 | x, h = args[0], args[1]
68 | b1, ii = x['shape']
69 | b2, hh = h['shape']
70 | assert b1 == b2
71 | assert x['dtype'] == h['dtype']
72 | t = x['dtype']
73 |
74 | self.cell = mod
75 | self.inp = ii
76 | self.hid = hh
77 | self.b = b1
78 | self.type = t
79 |
80 | self.multiple = 1
81 | if self.cell == "LSTMCell":
82 | self.multiple = 4
83 | elif self.cell == "GRUCell":
84 | self.multiple = 3
85 |
86 | self.gemm = None
87 | self.m = None
88 | self.n = None
89 | self.k = None
90 | self.elems = 0
91 |
92 | self.bar()
93 |
94 | def params(self):
95 | if self.gemm is None:
96 | p = OrderedDict([('cell', self.cell), ('X', self.inp), ('H', self.hid), ('B', self.b), ('type', self.type)])
97 | else:
98 | assert self.m is not None
99 | assert self.n is not None
100 | assert self.k is not None
101 | p = OrderedDict([('gemm', self.gemm), ('M', self.m), ('N', self.n), ('K', self.k), ('type', self.type)])
102 | return p
103 |
104 | def tc(self):
105 | if "gemm" in self.name:
106 | if self.name in TC_Whitelist():
107 | return 1
108 | return 0
109 | else:
110 | return "-"
111 |
112 | def op(self):
113 | return self.op_
114 |
115 | def mod(self):
116 | return self.mod_
117 |
118 | def bytes(self):
119 | if self.gemm is not None:
120 | m, n, k, t = self.m, self.n, self.k, self.type
121 | b = (m * k + k * n + m * n) * Utility.typeToBytes(t)
122 | elif self.elems != 0:
123 | b = self.elems * Utility.typeToBytes(self.type)
124 | else:
125 | b = 0
126 | return b
127 |
128 | def flops(self):
129 | if self.gemm is not None:
130 | m, n, k = self.m, self.n, self.k
131 | f = 2 * m * n * k
132 | elif self.elems != 0:
133 | f = 0 #TODO
134 | else:
135 | f = 0
136 | return f
137 |
138 | def bar(self):
139 | cell = self.cell
140 | X = self.inp
141 | H = self.hid
142 | B = self.b
143 | t = self.type
144 | subseqId = self.sub
145 | direc = self.dir
146 | name = self.name
147 | grid = self.grid
148 | multiple = self.multiple
149 |
150 | if direc == "fprop":
151 | subseqId = subseqId % 3
152 | if subseqId == 0: #layer gemm
153 | self.gemm = "layer"
154 | self.m = multiple * H
155 | self.n = B
156 | self.k = X
157 | elif subseqId == 1: #recurrent gemm
158 | self.gemm = "recur"
159 | self.m = multiple * H
160 | self.n = B
161 | self.k = H
162 | else:
163 | layerGemmElems = multiple * H * B
164 | recurGemmElems = multiple * H * B
165 | cElems = H * B
166 | hElems = H * B
167 | totElems = layerGemmElems + recurGemmElems + 2 * cElems + hElems
168 | self.elems = totElems
169 |
170 | else:
171 | if ("gemm" in name) and hasTileSize(name): #gemm
172 | #Get cta tile size
173 | tileX, tileY = ctaTile(name)
174 | #Get grid dimensions
175 | grid = grid.split(",")
176 | gridX, gridY, gridZ = map(lambda x: int(x), grid)
177 |
178 | gemmM = tileX * gridX
179 | gemmN = tileY * gridY
180 |
181 | if name[-3:] == "_nn": # dgrad
182 | if (gemmM == H): # recurrent dgrad
183 | #Ideally gemmN = B, but we have a limited set of tile sizes.
184 | gemmN = B
185 | gemmK = multiple * H
186 |
187 | self.gemm = "recur"
188 | self.m = gemmM
189 | self.n = gemmN
190 | self.k = gemmK
191 |
192 | elif (gemmM == X): # layer dgrad
193 | #assert(gemmN % B == 0)
194 | gemmK = multiple * H
195 |
196 | self.gemm = "layer"
197 | self.m = gemmM
198 | self.n = gemmN
199 | self.k = gemmK
200 |
201 | else:
202 | pass
203 |
204 | elif name[-3:] == "_nt": #wgrad
205 | if (gemmM == H): #recurrent wgrad
206 | assert (gemmN == multiple * H)
207 | gemmK = B
208 |
209 | self.gemm = "recur"
210 | self.m = gemmM
211 | self.n = gemmN
212 | self.k = gemmK
213 |
214 | elif (gemmM == X): #layer wgrad
215 | assert (gemmN == multiple * H)
216 | gemmK = B
217 |
218 | self.gemm = "layer"
219 | self.m = gemmM
220 | self.n = gemmN
221 | self.k = gemmK
222 |
223 | else:
224 | pass
225 | else:
226 | pass
227 | else:
228 | pass
229 |
230 | return
231 |
--------------------------------------------------------------------------------
/pyprof/prof/reduction.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # -*- coding: utf-8 -*-
3 |
4 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
5 | #
6 | # Licensed under the Apache License, Version 2.0 (the "License");
7 | # you may not use this file except in compliance with the License.
8 | # You may obtain a copy of the License at
9 | #
10 | # http://www.apache.org/licenses/LICENSE-2.0
11 | #
12 | # Unless required by applicable law or agreed to in writing, software
13 | # distributed under the License is distributed on an "AS IS" BASIS,
14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | # See the License for the specific language governing permissions and
16 | # limitations under the License.
17 |
18 | from collections import OrderedDict
19 | from .utility import Utility
20 | from .base import OperatorLayerBase
21 | from .tensor import Tensor
22 |
23 |
24 | class Mean(OperatorLayerBase):
25 |
26 | def __init__(self, d):
27 | marker = eval(d.argMarker[0])
28 | mod = marker['mod']
29 | op = marker['op']
30 | args = marker['args']
31 |
32 | self.mod_ = mod
33 | self.op_ = op
34 |
35 | assert (mod in ["torch", "Tensor"])
36 | assert (op == "mean")
37 |
38 | #Filter out named parameters
39 | args = list(filter(lambda x: x['name'] == '', args))
40 |
41 | assert (len(args) <= 2)
42 | i = args[0]
43 |
44 | # The input can be a scalar or a tensor
45 | if 'shape' in i: # tensor
46 | self.input = Tensor(i['shape'], i['dtype'])
47 | else: # scalar
48 | assert ('value' in i)
49 | self.input = Tensor([], i['type'])
50 |
51 | self.dir = d.dir
52 | self.sub = d.sub
53 |
54 | def params(self):
55 | return str(self.input)
56 |
57 | def tc(self):
58 | return "-"
59 |
60 | def op(self):
61 | return self.op_
62 |
63 | def mod(self):
64 | return self.mod_
65 |
66 | def bytes(self):
67 | if self.sub == 0:
68 | return self.input.bytes + self.input.itemsize
69 | else:
70 | return 0
71 |
72 | def flops(self):
73 | if self.sub == 0:
74 | return self.input.size + 1
75 | else:
76 | return 0
77 |
78 |
79 | class Sum(OperatorLayerBase):
80 |
81 | def __init__(self, d):
82 | marker = eval(d.argMarker[0])
83 | mod = marker['mod']
84 | op = marker['op']
85 | args = marker['args']
86 |
87 | self.marker = marker
88 | self.mod_ = mod
89 | self.op_ = op
90 | self.args = args
91 |
92 | assert (mod in ["torch", "Tensor"])
93 | assert (op == "sum")
94 | assert (len(args) >= 1)
95 |
96 | #Get input
97 | if (args[0]['name'] == ""):
98 | i = args[0]
99 | else:
100 | i = list(filter(lambda x: x['name'] == "input", args))[0]
101 |
102 | self.shape = i['shape']
103 | self.type = i['dtype']
104 | self.sub = d.sub
105 |
106 | def params(self):
107 | p = OrderedDict([('T', self.shape), ('type', self.type)])
108 | return p
109 |
110 | def tc(self):
111 | return "-"
112 |
113 | def op(self):
114 | return self.op_
115 |
116 | def mod(self):
117 | return self.mod_
118 |
119 | def elems(self):
120 | return Utility.numElems(self.shape)
121 |
122 | def flops(self):
123 | # Note: This is incorrect, need to calculate actual flops (say via nvprof)
124 | return self.elems()
125 |
126 | def bytes(self):
127 | b = self.elems() * Utility.typeToBytes(self.type)
128 | if self.sub == 0:
129 | return b
130 | else:
131 | return 0
132 |
133 |
134 | class Norm(OperatorLayerBase):
135 |
136 | def __init__(self, d):
137 | marker = eval(d.argMarker[0])
138 | mod = marker['mod']
139 | op = marker['op']
140 | args = marker['args']
141 |
142 | self.marker = marker
143 | self.mod_ = mod
144 | self.op_ = op
145 | self.args = args
146 |
147 | assert (mod in ["torch", "Tensor"])
148 | assert (op == "norm")
149 | #assert (len(args) == 1)
150 | i = args[0]
151 | self.shape = i['shape']
152 | self.type = i['dtype']
153 | self.sub = d.sub
154 |
155 | def params(self):
156 | p = OrderedDict([('T', self.shape), ('type', self.type)])
157 | return p
158 |
159 | def elems(self):
160 | return Utility.numElems(self.shape)
161 |
162 | def bytes(self):
163 | b = self.elems() * Utility.typeToBytes(self.type)
164 | if self.sub == 0:
165 | return b
166 | else:
167 | return 0
168 |
169 | def flops(self):
170 | # square and add plus sqrt
171 | f = 2 * self.elems() + 1
172 | if self.sub == 0:
173 | return f
174 | else:
175 | return 0
176 |
177 | def tc(self):
178 | return "-"
179 |
180 | def op(self):
181 | return self.op_
182 |
183 | def mod(self):
184 | return self.mod_
185 |
--------------------------------------------------------------------------------
/pyprof/prof/softmax.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # -*- coding: utf-8 -*-
3 |
4 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
5 | #
6 | # Licensed under the Apache License, Version 2.0 (the "License");
7 | # you may not use this file except in compliance with the License.
8 | # You may obtain a copy of the License at
9 | #
10 | # http://www.apache.org/licenses/LICENSE-2.0
11 | #
12 | # Unless required by applicable law or agreed to in writing, software
13 | # distributed under the License is distributed on an "AS IS" BASIS,
14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | # See the License for the specific language governing permissions and
16 | # limitations under the License.
17 |
18 | from .base import OperatorLayerBase
19 | from .tensor import Tensor
20 |
21 |
22 | class Softmax(OperatorLayerBase):
23 |
24 | def __init__(self, d):
25 | marker = eval(d.argMarker[0])
26 | mod = marker['mod']
27 | op = marker['op']
28 | args = marker['args']
29 |
30 | self.mod_ = mod
31 | self.op_ = op
32 |
33 | assert (mod == "torch.nn.functional")
34 | assert (op == "softmax")
35 |
36 | #Filter out named parameters
37 | args = list(filter(lambda x: x['name'] == '', args))
38 |
39 | assert (len(args) <= 2)
40 | arg = args[0]
41 | self.input = Tensor(arg['shape'], arg['dtype'])
42 | self.dir = d.dir
43 | return
44 |
45 | def op(self):
46 | return self.op_
47 |
48 | def mod(self):
49 | return self.mod_
50 |
51 | def tc(self):
52 | return "-"
53 |
54 | def params(self):
55 | return str(self.input)
56 |
57 | def flops(self):
58 | # An approximation
59 | # http://ai.stanford.edu/~paskin/slam/javadoc/javaslam/util/Flops.html#exp()
60 | # TODO: consider direction
61 | e = self.input.size
62 | f = e * 20 # denominator, exp all elements and reduce
63 | f += e * 20 # numerator, exp all elements and divide
64 | return f
65 |
66 | def bytes(self):
67 | # TODO: verify
68 | b = self.input.bytes
69 | # fprop is 2 reads, 1 write
70 | # bprop is 4 reads, 1 write
71 | b *= 3 if self.dir == "fprop" else 5
72 | return b
73 |
74 |
75 | class LogSoftmax(OperatorLayerBase):
76 |
77 | def __init__(self, d):
78 | marker = eval(d.argMarker[0])
79 | mod = marker['mod']
80 | op = marker['op']
81 | args = marker['args']
82 |
83 | self.mod_ = mod
84 | self.op_ = op
85 |
86 | assert (mod in ["torch", "Tensor", "torch.nn.functional"])
87 | assert (op == "log_softmax")
88 |
89 | #Filter out named parameters
90 | args = list(filter(lambda x: x['name'] == '', args))
91 |
92 | assert (len(args) <= 2)
93 |
94 | #Get input
95 | if (args[0]['name'] == ""):
96 | i = args[0]
97 | else:
98 | i = list(filter(lambda x: x['name'] == "input", args))[0]
99 |
100 | self.input = Tensor(i['shape'], i['dtype'])
101 | self.dir = d.dir
102 | return
103 |
104 | def op(self):
105 | return self.op_
106 |
107 | def mod(self):
108 | return self.mod_
109 |
110 | def tc(self):
111 | return "-"
112 |
113 | def params(self):
114 | return str(self.input)
115 |
116 | def flops(self):
117 | # An approximation
118 | # http://ai.stanford.edu/~paskin/slam/javadoc/javaslam/util/Flops.html#exp()
119 | # TODO: consider direction
120 | e = self.input.size
121 | f = e * 20 # denominator, exp all elements and reduce
122 | f += e # numerator, just a subtraction
123 | return f
124 |
125 | def bytes(self):
126 | # TODO: verify
127 | b = self.input.bytes
128 | # fprop is 2 reads, 1 write
129 | # bprop is 4 reads, 1 write
130 | b *= 3 if self.dir == "fprop" else 5
131 | return b
132 |
--------------------------------------------------------------------------------
/pyprof/prof/tc.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # -*- coding: utf-8 -*-
3 |
4 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
5 | #
6 | # Licensed under the Apache License, Version 2.0 (the "License");
7 | # you may not use this file except in compliance with the License.
8 | # You may obtain a copy of the License at
9 | #
10 | # http://www.apache.org/licenses/LICENSE-2.0
11 | #
12 | # Unless required by applicable law or agreed to in writing, software
13 | # distributed under the License is distributed on an "AS IS" BASIS,
14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | # See the License for the specific language governing permissions and
16 | # limitations under the License.
17 |
18 |
19 | class TC_Whitelist:
20 | whitelist = ['h884', 's884', 'h1688', 's1688', 'hmma', 'i8816', '16816',
21 | 'dgrad_1x1_stride_2x2', 'first_layer_wgrad_kernel', 'conv1x1',
22 | 'conv2d_c1_k1', 'direct_group', 'xmma_implicit_gemm',
23 | 'xmma_sparse_conv', 'xmma_warp_specialized_implicit_gemm',
24 | 'xmma_gemm', 'xmma_sparse_gemm', 'c1688']
25 | def __contains__(self, item):
26 | for pattern in self.whitelist:
27 | if pattern in item:
28 | return True
29 | return False
30 |
--------------------------------------------------------------------------------
/pyprof/prof/tensor.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | # Copyright (c) 2020, Aditya Agrawal.
4 | #
5 | # Licensed under the Apache License, Version 2.0 (the "License");
6 | # you may not use this file except in compliance with the License.
7 | # You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 |
17 | from functools import reduce
18 | import numpy as np
19 | from .dtype import Dtype
20 |
21 | class Tensor(object):
22 | def __init__(self, shape, dtype):
23 | assert type(shape) in [tuple, list]
24 | assert dtype in Dtype.types()
25 | self._shape = list(shape)
26 | self._dtype = dtype
27 |
28 | def __str__(self):
29 | t = Dtype(self.dtype)
30 | return str(self.shape).replace(" ", "") + str(t)
31 |
32 | @property
33 | def ndim(self):
34 | # can be 0 for scalars
35 | return len(self._shape)
36 |
37 | @property
38 | def shape(self):
39 | # can be () for scalars
40 | return self._shape
41 |
42 | @property
43 | def size(self):
44 | # number of elements
45 | return reduce(lambda x, y: x * y, self.shape, 1)
46 |
47 | @property
48 | def dtype(self):
49 | return self._dtype
50 |
51 | @property
52 | def itemsize(self):
53 | return Dtype(self.dtype).itemsize
54 |
55 | @property
56 | def bytes(self):
57 | return self.size * self.itemsize
58 |
59 | @staticmethod
60 | def broadcast(tensors):
61 | r'''
62 | The input is a list of Tensors.
63 | The output is a Tensor.
64 | '''
65 |
66 | assert len(tensors) > 1
67 | shape = tensors[0].shape
68 | # TODO: Assume the output dtype is the same as the first arg
69 | dt = tensors[0].dtype
70 |
71 | # Check if shapes are different
72 | if any(t.shape != shape for t in tensors):
73 | x = [np.empty(t.shape, t.dtype) for t in tensors]
74 | try:
75 | out = np.broadcast(*x)
76 | except:
77 | assert False # not broadcastable
78 | return Tensor(out.shape, dt)
79 | else:
80 | return Tensor(shape, dt)
81 |
82 | def main():
83 | for shape in [(), (1,), (3,7), (3,7,11)]:
84 | for dt in Dtype.types():
85 | t = Tensor(shape, dt)
86 | print(t.ndim, str(t.shape).replace(" ", ""), \
87 | t.size, t.dtype, t.itemsize, t.bytes, t)
88 |
89 | # Broadcast test
90 | a = Tensor([1,3], "int")
91 | b = Tensor([3,1], "float")
92 | c = Tensor([1,3], "float64")
93 | d = np.ones([], "float64")
94 | out = Tensor.broadcast([a,b,c,d])
95 | print(out.shape)
96 |
97 | if __name__ == '__main__':
98 | main()
99 |
--------------------------------------------------------------------------------
/pyprof/prof/usage.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # -*- coding: utf-8 -*-
3 |
4 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
5 | #
6 | # Licensed under the Apache License, Version 2.0 (the "License");
7 | # you may not use this file except in compliance with the License.
8 | # You may obtain a copy of the License at
9 | #
10 | # http://www.apache.org/licenses/LICENSE-2.0
11 | #
12 | # Unless required by applicable law or agreed to in writing, software
13 | # distributed under the License is distributed on an "AS IS" BASIS,
14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | # See the License for the specific language governing permissions and
16 | # limitations under the License.
17 |
18 | import sys
19 | import argparse
20 |
21 |
22 | def parseArgs():
23 | """
24 | Print usage and parse arguments.
25 | """
26 |
27 | def check_cols(value):
28 | valid = [
29 | "idx", "seq", "altseq", "tid", "layer", "trace", "dir", "sub", "mod", "op", "kernel", "params", "sil", "tc",
30 | "device", "stream", "grid", "block", "flops", "bytes"
31 | ]
32 | cols = value.split(",")
33 | for col in cols:
34 | if col not in valid:
35 | raise argparse.ArgumentTypeError(
36 | "{} is not a valid column name. Valid column names are {}.".format(col, ",".join(valid))
37 | )
38 | return cols
39 |
40 | def openFile(f):
41 | try:
42 | d = open(f, "r")
43 | return d
44 | except IOError:
45 | print("Error opening file {}. Exiting.".format(f), file=sys.stderr)
46 | sys.exit(1)
47 |
48 | parser = argparse.ArgumentParser(
49 | prog=sys.argv[0], description="PyTorch Profiler", formatter_class=argparse.RawTextHelpFormatter
50 | )
51 | parser.add_argument("file", nargs='?', type=str, default=None, help="Output of parse.py (Python dictionary).")
52 |
53 | parser.add_argument(
54 | "-c", type=check_cols, default="idx,dir,sub,mod,op,kernel,params,sil",
55 | help='''Comma seperated names of columns to print.
56 | idx: Index
57 | seq: PyTorch Sequence Id
58 | altseq: PyTorch Alternate Sequence Id
59 | tid: Thread Id
60 | layer: User annotated NVTX string (can be nested)
61 | trace: Function Call Trace
62 | dir: Direction
63 | sub: Sub Sequence Id
64 | mod: Module
65 | op: Operattion
66 | kernel: Kernel Name
67 | params: Parameters
68 | sil: Silicon Time (in ns)
69 | tc: Tensor Core Usage
70 | device: GPU Device Id
71 | stream: Stream Id
72 | grid: Grid Dimensions
73 | block: Block Dimensions
74 | flops: Floating point ops (FMA = 2 FLOPs)
75 | bytes: Number of bytes in and out of DRAM
76 | e.g. -c idx,kernel,sil'''
77 | )
78 |
79 | group = parser.add_mutually_exclusive_group()
80 | group.add_argument("--csv", action="store_true", default=False, help="Print a CSV output.")
81 | group.add_argument("-w", type=int, default=0, help="Width of columnated output.")
82 |
83 | args = parser.parse_args()
84 | if args.file is None:
85 | args.file = sys.stdin
86 | else:
87 | args.file = openFile(args.file)
88 | return args
89 |
--------------------------------------------------------------------------------
/pyprof/prof/utility.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # -*- coding: utf-8 -*-
3 |
4 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
5 | #
6 | # Licensed under the Apache License, Version 2.0 (the "License");
7 | # you may not use this file except in compliance with the License.
8 | # You may obtain a copy of the License at
9 | #
10 | # http://www.apache.org/licenses/LICENSE-2.0
11 | #
12 | # Unless required by applicable law or agreed to in writing, software
13 | # distributed under the License is distributed on an "AS IS" BASIS,
14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | # See the License for the specific language governing permissions and
16 | # limitations under the License.
17 |
18 | from functools import reduce
19 |
20 |
21 | class Utility(object):
22 |
23 | @staticmethod
24 | def numElems(shape):
25 | assert (type(shape) == tuple)
26 | return reduce(lambda x, y: x * y, shape, 1)
27 |
28 | @staticmethod
29 | def typeToBytes(t):
30 | if (t in ["uint8", "int8", "byte", "char", "bool"]):
31 | return 1
32 | elif (t in ["float16", "half", "int16", "short"]):
33 | return 2
34 | elif (t in ["float32", "float", "int32", "int"]):
35 | return 4
36 | elif (t in ["int64", "long", "float64", "double"]):
37 | return 8
38 | assert False
39 |
40 | @staticmethod
41 | def typeToString(t):
42 | if (t in ["uint8", "byte", "char"]):
43 | return "uint8"
44 | elif (t in [
45 | "int8",
46 | ]):
47 | return "int8"
48 | elif (t in [
49 | "int16",
50 | "short",
51 | ]):
52 | return "int16"
53 | elif (t in ["float16", "half"]):
54 | return "fp16"
55 | elif (t in ["float32", "float"]):
56 | return "fp32"
57 | elif (t in [
58 | "int32",
59 | "int",
60 | ]):
61 | return "int32"
62 | elif (t in ["int64", "long"]):
63 | return "int64"
64 | elif (t in [
65 | "float64",
66 | "double",
67 | ]):
68 | return "fp64"
69 | elif (t in [
70 | "bool",
71 | ]):
72 | return "bool"
73 | assert False
74 |
75 | @staticmethod
76 | def hasNVTX(marker):
77 | if type(marker) is str:
78 | try:
79 | marker = eval(marker)
80 | except:
81 | return False
82 |
83 | if type(marker) is dict:
84 | keys = marker.keys()
85 | return ("mod" in keys) and ("op" in keys) and ("args" in keys)
86 | else:
87 | return False
88 |
89 | @staticmethod
90 | def isscalar(t):
91 | return (t in ["float", "int"])
92 |
--------------------------------------------------------------------------------
/qa/L0_docs/test.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
16 | TEST_LOG="./docs.log"
17 |
18 | rm -f $TEST_LOG
19 | RET=0
20 |
21 | apt-get update && \
22 | apt-get install -y --no-install-recommends python3-pip zip doxygen && \
23 | rm -rf /root/.cache/pip && \
24 | pip uninstall -y Sphinx && \
25 | pip3 install --upgrade setuptools wheel && \
26 | pip3 install --upgrade sphinx==2.4.4 sphinx-rtd-theme==0.4.3 \
27 | nbsphinx==0.6.0 breathe==4.14.1
28 |
29 | set +e
30 |
31 | # Set visitor script to be included on every HTML page
32 | export VISITS_COUNTING_SCRIPT=//assets.adobedtm.com/b92787824f2e0e9b68dc2e993f9bd995339fe417/satelliteLib-7ba51e58dc61bcb0e9311aadd02a0108ab24cc6c.js
33 |
34 | (cd docs && rm -f pyprof_docs.zip && \
35 | make BUILDDIR=/opt/pytorch/pyprof/qa/L0_docs/build clean html) > $TEST_LOG 2>&1
36 | if [ $? -ne 0 ]; then
37 | RET=1
38 | fi
39 |
40 | (cd build && zip -r ../pyprof_docs.zip html)
41 | if [ $? -ne 0 ]; then
42 | RET=1
43 | fi
44 |
45 | set -e
46 |
47 | if [ $RET -eq 0 ]; then
48 | echo -e "\n***\n*** Test Passed\n***"
49 | else
50 | cat $TEST_LOG
51 | echo -e "\n***\n*** Test FAILED\n***"
52 | fi
53 |
54 | exit $RET
55 |
--------------------------------------------------------------------------------
/qa/L0_lenet/test.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
16 | TEST_LOG="./data.log"
17 |
18 |
19 | rm -f $TEST_LOG
20 | RET=0
21 |
22 | set +e
23 |
24 | ./test_lenet.py > $TEST_LOG 2>&1
25 | if [ $? -ne 0 ]; then
26 | RET=1
27 | fi
28 |
29 | set -e
30 |
31 | if [ $RET -eq 0 ]; then
32 | echo -e "\n***\n*** Test Passed\n***"
33 | else
34 | cat $TEST_LOG
35 | echo -e "\n***\n*** Test FAILED\n***"
36 | fi
37 |
38 | exit $RET
39 |
--------------------------------------------------------------------------------
/qa/L0_lenet/test_lenet.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # -*- coding: utf-8 -*-
3 |
4 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
5 | #
6 | # Licensed under the Apache License, Version 2.0 (the "License");
7 | # you may not use this file except in compliance with the License.
8 | # You may obtain a copy of the License at
9 | #
10 | # http://www.apache.org/licenses/LICENSE-2.0
11 | #
12 | # Unless required by applicable law or agreed to in writing, software
13 | # distributed under the License is distributed on an "AS IS" BASIS,
14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | # See the License for the specific language governing permissions and
16 | # limitations under the License.
17 | '''
18 | This test runs lenet through the 3 steps on pyprof.
19 | It ensures:
20 | - A database is created from nsys
21 | - A dict is created from pyprof.parse
22 | - A csv with valid data is created from pyprof.prof
23 | '''
24 |
25 | import subprocess
26 | from pathlib import Path
27 | import unittest
28 | import csv
29 |
30 | unittest.TestLoader.sortTestMethodsUsing = None
31 |
32 |
33 | class TestPyprofWithLenet(unittest.TestCase):
34 |
35 | @classmethod
36 | def setUpClass(cls):
37 | cls.pyprof_path = Path("/opt/pytorch/pyprof/pyprof/examples")
38 |
39 | def test_run_nsys(self):
40 | # Print a blank line to make the test output more readable
41 | print()
42 | command = "nsys profile -f true -o lenet --export sqlite python " + self.pyprof_path.as_posix() + "/lenet.py"
43 | command_tokens = command.split()
44 |
45 | ret_val = subprocess.run(command_tokens)
46 |
47 | self.assertEqual(ret_val.returncode, 0)
48 | db_path = Path('./lenet.sqlite')
49 | self.assertTrue(db_path.exists())
50 |
51 | def test_run_parse(self):
52 | command = "python -m pyprof.parse lenet.sqlite"
53 | command_tokens = command.split()
54 |
55 | with open("lenet.dict", "w") as f:
56 | ret_val = subprocess.run(command_tokens, stdout=f)
57 |
58 | self.assertEqual(ret_val.returncode, 0)
59 | dict_path = Path('./lenet.dict')
60 | self.assertTrue(dict_path.exists())
61 |
62 | def test_run_profile(self):
63 | lenet_csv = "./lenet.csv"
64 | command = "python -m pyprof.prof --csv lenet.dict"
65 | command_tokens = command.split()
66 | with open(lenet_csv, "w") as f:
67 | ret_val = subprocess.run(command_tokens, stdout=f)
68 |
69 | self.assertEqual(ret_val.returncode, 0)
70 | csv_path = Path(lenet_csv)
71 | self.assertTrue(csv_path.exists())
72 |
73 | directions = ["bprop", "fprop"]
74 | ops = [
75 | "", # covers the "reduce_kernel" kernel, op will be an empty string in the report
76 | "add_",
77 | "backward",
78 | "bias",
79 | "conv2d",
80 | "linear",
81 | "max_pool2d",
82 | "mse_loss",
83 | "relu",
84 | "sum",
85 | ]
86 |
87 | with open("lenet.csv", "r") as csvfile:
88 | reader = csv.DictReader(csvfile)
89 | for row in reader:
90 | # verify direction
91 | self.assertTrue(row['Direction'] in directions, f"Row direction: {row['Direction']}")
92 | # verify op
93 | self.assertTrue(row['Op'] in ops, f"Row op: {row['Op']}")
94 | # verify final id is in the range
95 | # Which kernel cuDNN uses is nondeterministic.
96 | # While the exact number of kernels is not clear, for this network, it should be [60, 70]
97 | self.assertTrue(int(row['Idx']) in range(65, 75), f"Final Idx: {row['Idx']}")
98 |
99 |
100 | if __name__ == '__main__':
101 | unittest.main(verbosity=2)
102 |
--------------------------------------------------------------------------------
/qa/L0_nvtx/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | import test_pyprof_nvtx.TestPyProfNvtx as TestPyProfNvtx
16 |
--------------------------------------------------------------------------------
/qa/L0_nvtx/test.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
16 | TEST_LOG="./nvtx.log"
17 |
18 |
19 | apt-get update && \
20 | apt-get install -y --no-install-recommends python3
21 |
22 | rm -f $TEST_LOG
23 | RET=0
24 |
25 | ./test_pyprof_nvtx.py > $TEST_LOG 2>&1
26 | if [ $? -ne 0 ]; then
27 | RET=1
28 | fi
29 |
30 | set -e
31 |
32 | if [ $RET -eq 0 ]; then
33 | echo -e "\n***\n*** Test Passed\n***"
34 | else
35 | cat $TEST_LOG
36 | echo -e "\n***\n*** Test FAILED\n***"
37 | fi
38 |
39 | exit $RET
--------------------------------------------------------------------------------
/qa/L0_pyprof_data/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVIDIA/PyProf/218dcc183bf7fdf97dbfc648878a3d09aea3b199/qa/L0_pyprof_data/__init__.py
--------------------------------------------------------------------------------
/qa/L0_pyprof_data/test.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 |
16 | TEST_LOG="./data.log"
17 |
18 |
19 | apt-get update && \
20 | apt-get install -y --no-install-recommends python
21 |
22 | rm -f $TEST_LOG
23 | RET=0
24 |
25 | ./test_pyprof_data.py > $TEST_LOG 2>&1
26 | if [ $? -ne 0 ]; then
27 | RET=1
28 | fi
29 |
30 | set -e
31 |
32 | if [ $RET -eq 0 ]; then
33 | echo -e "\n***\n*** Test Passed\n***"
34 | else
35 | cat $TEST_LOG
36 | echo -e "\n***\n*** Test FAILED\n***"
37 | fi
38 |
39 | exit $RET
--------------------------------------------------------------------------------
/qa/L0_pyprof_data/test_pyprof_data.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # -*- coding: utf-8 -*-
3 |
4 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
5 | #
6 | # Licensed under the Apache License, Version 2.0 (the "License");
7 | # you may not use this file except in compliance with the License.
8 | # You may obtain a copy of the License at
9 | #
10 | # http://www.apache.org/licenses/LICENSE-2.0
11 | #
12 | # Unless required by applicable law or agreed to in writing, software
13 | # distributed under the License is distributed on an "AS IS" BASIS,
14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | # See the License for the specific language governing permissions and
16 | # limitations under the License.
17 | '''
18 | This test creates 2 kernels and exercises the pyprof code for generating their representation.
19 | '''
20 | import inspect
21 | import unittest
22 |
23 | from pyprof.prof.data import Data
24 | from pyprof.prof.prof import foo
25 |
26 |
27 | class TestPyProfData(unittest.TestCase):
28 |
29 | def __init__(self, testName):
30 | super().__init__(testName)
31 |
32 | def setUp(self):
33 | pass
34 |
35 | def tearDown(self):
36 | pass
37 |
38 | def test_data(self):
39 | kernels = [
40 | {
41 | 'kShortName':
42 | 'elementwise_kernel',
43 | 'kDuration':
44 | 2848,
45 | 'layer': [],
46 | 'trace': [],
47 | 'reprMarkers': [],
48 | 'marker':
49 | [
50 | "{'mod': 'Tensor', 'op': 'float', 'args': [{'name': '', 'type': 'tensor', 'shape': (18, 104, 160), 'dtype': 'bool'}]}"
51 | ],
52 | 'seqMarker': ['to, seq = 60471'],
53 | 'seqId': [60471],
54 | 'subSeqId':
55 | 0,
56 | 'altSeqId': [],
57 | 'dir':
58 | 'fprop',
59 | 'mod': ['Tensor'],
60 | 'op': ['float'],
61 | 'tid':
62 | 1431533376,
63 | 'device':
64 | 0,
65 | 'stream':
66 | 7,
67 | 'grid': (585, 1, 1),
68 | 'block': (512, 1, 1),
69 | 'kLongName':
70 | 'void at::native::elementwise_kernel<512, 1, void at::native::gpu_kernel_impl(at::TensorIterator&)::{lambda(bool)#1}>(at::TensorIterator&, void at::native::copy_kernel_impl(at::TensorIterator&)::{lambda(bool)#1} const&)::{lambda(int)#1}>(int, void at::native::gpu_kernel_impl(at::TensorIterator&)::{lambda(bool)#1}>(at::TensorIterator&, void at::native::copy_kernel_impl(at::TensorIterator&)::{lambda(bool)#1} const&)::{lambda(int)#1})'
71 | },
72 | {
73 | 'kShortName':
74 | 'elementwise_kernel',
75 | 'kDuration':
76 | 201182,
77 | 'layer': [],
78 | 'trace': [],
79 | 'reprMarkers': [],
80 | 'marker':
81 | [
82 | "{'mod': 'Tensor', 'op': 'clone', 'args': [{'name': '', 'type': 'tensor', 'shape': (18, 4, 416, 640), 'dtype': 'float32'}]}"
83 | ],
84 | 'seqMarker': ['clone, seq = 60161'],
85 | 'seqId': [60161],
86 | 'subSeqId':
87 | 0,
88 | 'altSeqId': [],
89 | 'dir':
90 | 'fprop',
91 | 'mod': ['Tensor'],
92 | 'op': ['clone'],
93 | 'tid':
94 | 1431533376,
95 | 'device':
96 | 0,
97 | 'stream':
98 | 7,
99 | 'grid': (37440, 1, 1),
100 | 'block': (128, 1, 1),
101 | 'kLongName':
102 | 'void at::native::elementwise_kernel<128, 4, void at::native::gpu_kernel_impl(at::TensorIterator&)::{lambda(float)#1}>(at::TensorIterator&, void at::native::copy_kernel_impl(at::TensorIterator&)::{lambda(float)#1} const&)::{lambda(int)#2}>(int, void at::native::gpu_kernel_impl(at::TensorIterator&)::{lambda(float)#1}>(at::TensorIterator&, void at::native::copy_kernel_impl(at::TensorIterator&)::{lambda(float)#1} const&)::{lambda(int)#2})'
103 | },
104 | ]
105 |
106 | for k in kernels:
107 | d = Data(k)
108 | mod = k['mod']
109 | op = k['op']
110 | xx = foo(mod, op, d)
111 | d.setParams(xx.params())
112 |
113 |
114 | def run_tests(test_name):
115 | dummy = TestPyProfData(test_name)
116 | test_cases = list(
117 | filter(lambda x: 'test_' in x, map(lambda x: x[0], inspect.getmembers(dummy, predicate=inspect.ismethod)))
118 | )
119 | print(f'Running tests for {test_name}')
120 | suite = unittest.TestSuite()
121 | for test_case in test_cases:
122 | suite.addTest(TestPyProfData(test_case))
123 | result = unittest.TextTestRunner(verbosity=2).run(suite)
124 | if result.wasSuccessful():
125 | exit(0)
126 | else:
127 | exit(1)
128 |
129 |
130 | if __name__ == '__main__':
131 | run_tests('test_data')
132 |
--------------------------------------------------------------------------------
/qa/common/check_copyright.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/python
2 |
3 | # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
4 | #
5 | # Licensed under the Apache License, Version 2.0 (the "License");
6 | # you may not use this file except in compliance with the License.
7 | # You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 |
17 | import argparse
18 | import os
19 | import re
20 |
21 | FLAGS = None
22 | SKIP_EXTS = ('jpeg', 'jpg', 'pgm', 'png',
23 | 'log', 'serverlog',
24 | 'preprocessed', 'jmx', 'gz',
25 | 'caffemodel', 'json')
26 | SKIP_PATHS = ('requirements.txt',
27 | 'requirements/requirements_nsys.txt',
28 | 'requirements/requirements.txt',
29 | 'qa/L0_docs/VERSION',
30 | 'LICENSE',
31 | 'VERSION',
32 | 'MANIFEST.in',
33 | 'build/',
34 | 'dist/',
35 | 'nvidia_pyprof.egg-info/')
36 |
37 | COPYRIGHT_YEAR_RE0 = 'Copyright \\(c\\) (20[0-9][0-9]),'
38 | COPYRIGHT_YEAR_RE1 = 'Copyright \\(c\\) (20[0-9][0-9])-(20[0-9][0-9]),'
39 |
40 | COPYRIGHT ='''
41 |
42 | Licensed under the Apache License, Version 2.0 (the "License");
43 | you may not use this file except in compliance with the License.
44 | You may obtain a copy of the License at
45 |
46 | http://www.apache.org/licenses/LICENSE-2.0
47 |
48 | Unless required by applicable law or agreed to in writing, software
49 | distributed under the License is distributed on an "AS IS" BASIS,
50 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
51 | See the License for the specific language governing permissions and
52 | limitations under the License.
53 | '''
54 |
55 | single_re = re.compile(COPYRIGHT_YEAR_RE0)
56 | range_re = re.compile(COPYRIGHT_YEAR_RE1)
57 |
58 | def visit(path):
59 | if FLAGS.verbose:
60 | print("visiting " + path)
61 |
62 | for skip in SKIP_EXTS:
63 | if path.endswith('.' + skip):
64 | if FLAGS.verbose:
65 | print("skipping due to extension: " + path)
66 | return True
67 |
68 | for skip in SKIP_PATHS:
69 | if path.startswith(skip):
70 | if FLAGS.verbose:
71 | print("skipping due to path prefix: " + path)
72 | return True
73 |
74 | with open(path, 'r') as f:
75 | first_line = True
76 | second_line = True
77 | line = None
78 | try:
79 | for fline in f:
80 | line = fline
81 |
82 | # Skip any '#!', '..', '