├── .flake8
├── .github
└── workflows
│ └── unit-tests.yaml
├── .gitignore
├── .readthedocs.yml
├── LICENSE
├── README.md
├── docs
├── Makefile
├── conf.py
├── developer_guide.rst
├── examples
│ ├── csv_reader.py
│ ├── hpctoolkit.py
│ ├── nsight.py
│ ├── otf2_read.py
│ └── projections.py
├── getting_started.rst
├── index.rst
├── requirements.txt
├── source
│ ├── pipit.readers.rst
│ └── pipit.rst
└── user_guide.rst
├── logo.png
├── pipit
├── .gitignore
├── __init__.py
├── graph.py
├── readers
│ ├── __init__.py
│ ├── core_reader.py
│ ├── hpctoolkit_reader.py
│ ├── nsight_reader.py
│ ├── nsight_sqlite_reader.py
│ ├── otf2_reader.py
│ └── projections_reader.py
├── tests
│ ├── config.py
│ ├── conftest.py
│ ├── data
│ │ ├── foo-bar.csv
│ │ ├── ping-pong-hpctoolkit
│ │ │ ├── FORMATS.md
│ │ │ ├── cct.db
│ │ │ ├── meta.db
│ │ │ ├── metrics
│ │ │ │ ├── METRICS.yaml.ex
│ │ │ │ └── default.yaml
│ │ │ ├── profile.db
│ │ │ ├── src
│ │ │ │ └── ping-pong.c
│ │ │ └── trace.db
│ │ ├── ping-pong-otf2-papi
│ │ │ ├── MANIFEST.md
│ │ │ ├── scorep.cfg
│ │ │ ├── traces.def
│ │ │ ├── traces.otf2
│ │ │ └── traces
│ │ │ │ ├── 0.def
│ │ │ │ ├── 0.evt
│ │ │ │ ├── 1.def
│ │ │ │ └── 1.evt
│ │ ├── ping-pong-otf2
│ │ │ ├── MANIFEST.md
│ │ │ ├── scorep.cfg
│ │ │ ├── traces.def
│ │ │ ├── traces.otf2
│ │ │ └── traces
│ │ │ │ ├── 0.def
│ │ │ │ ├── 0.evt
│ │ │ │ ├── 1.def
│ │ │ │ └── 1.evt
│ │ └── ping-pong-projections
│ │ │ ├── pingpong.prj.0.log.gz
│ │ │ ├── pingpong.prj.1.log.gz
│ │ │ ├── pingpong.prj.projrc
│ │ │ └── pingpong.prj.sts
│ ├── hpctoolkit.py
│ ├── otf2-tests.py
│ ├── projections-tests.py
│ └── trace.py
├── trace.py
├── util
│ ├── __init__.py
│ ├── cct.py
│ └── config.py
└── writers
│ └── chrome_writer.py
├── pytest.ini
├── requirements.txt
└── setup.py
/.flake8:
--------------------------------------------------------------------------------
1 | # -*- conf -*-
2 | # flake8 settings for pipit
3 | #
4 | # These are the minimal flake8 settings recommended by Black
5 | # https://black.readthedocs.io/en/stable/the_black_code_style/current_style.html#code-style
6 |
7 | [flake8]
8 | max-line-length = 88
9 | extend-ignore = E203
10 |
--------------------------------------------------------------------------------
/.github/workflows/unit-tests.yaml:
--------------------------------------------------------------------------------
1 | name: unit tests
2 |
3 | on:
4 | push:
5 | branches: [develop]
6 | pull_request:
7 | branches: [develop]
8 |
9 | jobs:
10 | ubuntu:
11 | runs-on: ${{ matrix.os }}
12 | strategy:
13 | matrix:
14 | os: [ubuntu-latest]
15 | python-version: ["3.6", "3.7", "3.8", "3.9", "3.10", "3.11"]
16 |
17 | steps:
18 | - uses: actions/checkout@v2
19 | # use over setup python since this allows us to use older
20 | # Pythons
21 | - uses: mamba-org/setup-micromamba@v2
22 | with:
23 | # the create command looks like this:
24 | # `micromamba create -n test-env python=(version)`
25 | environment-name: test-env
26 | create-args: python=${{ matrix.python-version }}
27 |
28 | - name: Install Python packages
29 | run: |
30 | pip install --upgrade pip
31 | pip install --upgrade numpy pandas pytest otf2
32 |
33 | - name: Lint and format check with flake8 and black
34 | if: ${{ matrix.python-version == 3.9 }}
35 | run: |
36 | pip install --upgrade black flake8
37 | black --diff --check .
38 | flake8
39 |
40 | - name: Basic test with pytest
41 | run: |
42 | PYTHONPATH=. $(which pytest)
43 |
44 | macos:
45 | runs-on: macos-latest
46 | strategy:
47 | matrix:
48 | python-version: ["3.10", "3.11"]
49 |
50 | steps:
51 | - uses: actions/checkout@v2
52 | - uses: actions/setup-python@v2
53 | with:
54 | python-version: ${{ matrix.python-version }}
55 |
56 | - name: Install Python packages
57 | run: |
58 | pip install --upgrade pip
59 | pip install --upgrade numpy pandas pytest otf2
60 |
61 | - name: Basic test with pytest
62 | run: |
63 | PYTHONPATH=. $(which pytest)
64 |
65 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | *.pyc
2 | .cache
3 | .pytest_cache
4 | .ipynb_checkpoints
5 |
--------------------------------------------------------------------------------
/.readthedocs.yml:
--------------------------------------------------------------------------------
1 | version: 2
2 |
3 | # Build from the docs/ directory with Sphinx
4 | sphinx:
5 | configuration: docs/conf.py
6 |
7 | # Explicitly set the version of Python and its requirements
8 | python:
9 | version: 3.8
10 | install:
11 | - requirements: docs/requirements.txt
12 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | Copyright (c) 2021, Parallel Software and Systems Group, University of
2 | Maryland.
3 |
4 | Permission is hereby granted, free of charge, to any person obtaining a
5 | copy of this software and associated documentation files (the "Software"),
6 | to deal in the Software without restriction, including without limitation
7 | the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 | and/or sell copies of the Software, and to permit persons to whom the
9 | Software is furnished to do so, subject to the following conditions:
10 |
11 | The above copyright notice and this permission notice shall be included in
12 | all copies or substantial portions of the Software.
13 |
14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19 | FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20 | DEALINGS IN THE SOFTWARE.
21 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | #
2 |
3 | [](https://github.com/hpcgroup/pipit/actions)
4 | [](https://pipit.readthedocs.io/en/latest/?badge=latest)
5 | [](https://github.com/psf/black)
6 |
7 | A Python-based library for analyzing execution traces from parallel programs.
8 |
9 | ### Contributing
10 |
11 | Pipit is an open source project. We welcome contributions via pull requests,
12 | and questions, feature requests, or bug reports via issues.
13 |
14 | ### License
15 |
16 | Pipit is distributed under the terms of the MIT License.
17 |
18 | All contributions must be made under the the MIT license. Copyrights in the
19 | Pipit project are retained by contributors. No copyright assignment is
20 | required to contribute to Pipit.
21 |
22 | See [LICENSE](https://github.com/pssg-int/trace-analysis/blob/develop/LICENSE)
23 | for details.
24 |
25 | SPDX-License-Identifier: MIT
26 |
--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
1 | # Minimal makefile for Sphinx documentation
2 | #
3 |
4 | # You can set these variables from the command line, and also
5 | # from the environment for the first two.
6 | SPHINXOPTS ?=
7 | SPHINXBUILD ?= sphinx-build
8 | SOURCEDIR = .
9 | BUILDDIR = _build
10 |
11 | # Put it first so that "make" without argument is like "make help".
12 | help:
13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
14 |
15 | .PHONY: help Makefile
16 |
17 | # Catch-all target: route all unknown targets to Sphinx using the new
18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS).
19 | %: Makefile
20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
21 |
--------------------------------------------------------------------------------
/docs/conf.py:
--------------------------------------------------------------------------------
1 | # Copyright 2022-2023 Parallel Software and Systems Group, University of
2 | # Maryland. See the top-level LICENSE file for details.
3 | #
4 | # SPDX-License-Identifier: MIT
5 |
6 | # Configuration file for the Sphinx documentation builder.
7 | #
8 | # This file only contains a selection of the most common options. For a full
9 | # list see the documentation:
10 | # https://www.sphinx-doc.org/en/master/usage/configuration.html
11 |
12 | # -- Path setup --------------------------------------------------------------
13 |
14 | # If extensions (or modules to document with autodoc) are in another directory,
15 | # add these directories to sys.path here. If the directory is relative to the
16 | # documentation root, use os.path.abspath to make it absolute, like shown here.
17 | #
18 | # import os
19 | import sys
20 |
21 | # sys.path.insert(0, os.path.abspath('.'))
22 |
23 | # The name of the Pygments (syntax highlighting) style to use.
24 | from pygments.styles.default import DefaultStyle
25 | from pygments.token import Generic
26 |
27 | import pkg_resources
28 |
29 |
30 | # -- Project information -----------------------------------------------------
31 |
32 | project = "pipit"
33 | copyright = "2022-2023, Parallel Software and Systems Group, University of Maryland"
34 | author = "Abhinav Bhatele"
35 |
36 | # The full version, including alpha/beta/rc tags
37 | release = "0.1.0"
38 |
39 |
40 | # -- General configuration ---------------------------------------------------
41 |
42 | # Add any Sphinx extension module names here, as strings. They can be
43 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
44 | # ones.
45 | extensions = [
46 | "sphinx.ext.autodoc",
47 | "sphinx.ext.todo",
48 | "sphinx.ext.imgmath",
49 | "sphinx.ext.viewcode",
50 | "sphinx.ext.githubpages",
51 | "sphinx.ext.napoleon",
52 | ]
53 |
54 | # Add any paths that contain templates here, relative to this directory.
55 | templates_path = ["_templates"]
56 |
57 | # List of patterns, relative to source directory, that match files and
58 | # directories to ignore when looking for source files.
59 | # This pattern also affects html_static_path and html_extra_path.
60 | exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"]
61 |
62 |
63 | # modifications to the default style
64 | class PipitStyle(DefaultStyle):
65 | styles = DefaultStyle.styles.copy()
66 | background_color = "#f4f4f8"
67 | styles[Generic.Output] = "#355"
68 | styles[Generic.Prompt] = "bold #346ec9"
69 |
70 |
71 | dist = pkg_resources.Distribution(__file__)
72 | sys.path.append(".") # make 'conf' module findable
73 | ep = pkg_resources.EntryPoint.parse("pipit = conf:PipitStyle", dist=dist)
74 | dist._ep_map = {"pygments.styles": {"plugin1": ep}}
75 | pkg_resources.working_set.add(dist)
76 |
77 | pygments_style = "pipit"
78 |
79 |
80 | # -- Options for HTML output -------------------------------------------------
81 |
82 | # The theme to use for HTML and HTML Help pages. See the documentation for
83 | # a list of builtin themes.
84 | #
85 | html_theme = "sphinx_rtd_theme"
86 |
87 | # Theme options are theme-specific and customize the look and feel of a theme
88 | # further. For a list of options available for each theme, see the
89 | # documentation.
90 | #
91 | html_theme_options = {
92 | "canonical_url": "",
93 | "analytics_id": "",
94 | "logo_only": True,
95 | "display_version": True,
96 | "prev_next_buttons_location": "bottom",
97 | "style_external_links": False,
98 | # Toc options
99 | "collapse_navigation": True,
100 | "sticky_navigation": True,
101 | "navigation_depth": 4,
102 | "includehidden": True,
103 | "titles_only": False,
104 | }
105 |
106 | # Add any paths that contain custom static files (such as style sheets) here,
107 | # relative to this directory. They are copied after the builtin static files,
108 | # so a file named "default.css" will overwrite the builtin "default.css".
109 | html_static_path = []
110 |
--------------------------------------------------------------------------------
/docs/developer_guide.rst:
--------------------------------------------------------------------------------
1 | .. Copyright 2023 Parallel Software and Systems Group, University of Maryland.
2 | See the top-level LICENSE file for details.
3 |
4 | SPDX-License-Identifier: MIT
5 |
6 | ***************
7 | Developer Guide
8 | ***************
9 |
10 |
--------------------------------------------------------------------------------
/docs/examples/csv_reader.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 |
3 | import pipit as pp
4 |
5 |
6 | if __name__ == "__main__":
7 | # Use pipit's ``from_csv`` API to read in traces in CSV format.
8 | # The result is stored into pipit's Trace data structure.
9 |
10 | trace = pp.Trace.from_csv("../../pipit/tests/data/foo-bar.csv")
11 |
12 | trace.calc_inc_metrics()
13 | print(trace.events)
14 |
--------------------------------------------------------------------------------
/docs/examples/hpctoolkit.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 |
3 | import pipit as pp
4 |
5 |
6 | if __name__ == "__main__":
7 | # Path to HPCToolkit traces
8 | dirname = "../../pipit/tests/data/ping-pong-hpctoolkit"
9 |
10 | # Use pipit's ``from_hpctoolkit`` API to read in the traces.
11 | # The result is stored into pipit's Trace data structure.
12 | trace = pp.Trace.from_hpctoolkit(dirname)
13 |
14 | # Printout the DataFrame component of the Trace.
15 | print(trace.events)
16 |
--------------------------------------------------------------------------------
/docs/examples/nsight.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 |
3 | import pipit as pp
4 |
5 |
6 | if __name__ == "__main__":
7 | # Path to Nsight traces
8 | filename = "../../pipit/tests/data/nbody-nvtx/trace.csv"
9 |
10 | # Use pipit's ``from_nsight`` API to read in the traces.
11 | # The result is stored into pipit's Trace data structure.
12 | trace = pp.Trace.from_nsight(filename)
13 |
14 | # Printout the DataFrame component of the Trace.
15 | print(trace.events)
16 |
--------------------------------------------------------------------------------
/docs/examples/otf2_read.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 |
3 | import pipit as pp
4 |
5 |
6 | if __name__ == "__main__":
7 | # Path to OTF2 traces
8 | dirname = "../../pipit/tests/data/ping-pong-otf2"
9 |
10 | # Use pipit's ``from_otf2`` API to read in the OTF2 traces.
11 | # The result is stored into pipit's Trace data structure.
12 | trace = pp.Trace.from_otf2(dirname)
13 |
14 | # Printout the DataFrame component of the Trace.
15 | print(trace.events)
16 |
--------------------------------------------------------------------------------
/docs/examples/projections.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 |
3 | import pipit as pp
4 |
5 |
6 | if __name__ == "__main__":
7 | # Path to OTF2 traces
8 | dirname = "../../pipit/tests/data/ping-pong-projections"
9 |
10 | # Use pipit's ``from_projections`` API to read in the Projections traces.
11 | # The result is stored into pipit's Trace data structure.
12 | trace = pp.Trace.from_projections(dirname)
13 |
14 | # Printout the DataFrame component of the Trace.
15 | print(trace.events)
16 |
--------------------------------------------------------------------------------
/docs/getting_started.rst:
--------------------------------------------------------------------------------
1 | .. Copyright 2022-2023 Parallel Software and Systems Group, University of
2 | Maryland. See the top-level LICENSE file for details.
3 |
4 | SPDX-License-Identifier: MIT
5 |
6 | ***************
7 | Getting Started
8 | ***************
9 |
10 | Prerequisites
11 | =============
12 |
13 | Pipit has the following minimum requirements, which must be installed before
14 | pipit is run:
15 |
16 | #. Python 2 (2.7) or 3 (3.5 - 3.10)
17 | #. pandas
18 |
19 | Pipit is available on `GitHub `_
20 |
21 |
22 | Installation
23 | ============
24 |
25 |
26 | Supported data formats
27 | ======================
28 |
29 | Currently, pipit supports the following data formats as input:
30 |
31 | * `HPCToolkit `_ trace
32 | * OTF2
33 | * Nsight
34 | * Projections
35 |
--------------------------------------------------------------------------------
/docs/index.rst:
--------------------------------------------------------------------------------
1 | .. Copyright 2022-2023 Parallel Software and Systems Group, University of
2 | Maryland. See the top-level LICENSE file for details.
3 |
4 | SPDX-License-Identifier: MIT
5 |
6 | .. pipit documentation master file, created by
7 | sphinx-quickstart on Sun Nov 13 14:19:38 2022.
8 | You can adapt this file completely to your liking, but it should at least
9 | contain the root `toctree` directive.
10 |
11 | #####
12 | Pipit
13 | #####
14 |
15 | Pipit is a Python library for analyzing parallel execution traces.
16 |
17 | You can get pipit from its `GitHub repository
18 | `_:
19 |
20 | .. code-block:: console
21 |
22 | $ git clone https://github.com/hpcgroup/pipit.git
23 |
24 |
25 | .. toctree::
26 | :maxdepth: 2
27 | :caption: User Docs
28 |
29 | getting_started
30 | user_guide
31 |
32 | .. toctree::
33 | :maxdepth: 2
34 | :caption: Developer Docs
35 |
36 | developer_guide
37 |
38 | .. toctree::
39 | :maxdepth: 2
40 | :caption: API Docs
41 |
42 | Pipit API Docs
43 |
44 |
45 | ##################
46 | Indices and tables
47 | ##################
48 |
49 | * :ref:`genindex`
50 | * :ref:`modindex`
51 | * :ref:`search`
52 |
--------------------------------------------------------------------------------
/docs/requirements.txt:
--------------------------------------------------------------------------------
1 | # These dependencies should be installed using pip in order
2 | # to build the documentation.
3 |
4 | sphinx
5 | sphinxcontrib-programoutput
6 | sphinx-rtd-theme
7 | # Restrict to pygments <2.13
8 | pygments <2.13
9 |
--------------------------------------------------------------------------------
/docs/source/pipit.readers.rst:
--------------------------------------------------------------------------------
1 | pipit.readers package
2 | =====================
3 |
4 | Submodules
5 | ----------
6 |
7 | pipit.readers.hpctoolkit\_reader module
8 | ---------------------------------------
9 |
10 | .. automodule:: pipit.readers.hpctoolkit_reader
11 | :members:
12 | :undoc-members:
13 | :show-inheritance:
14 |
15 | pipit.readers.nsight\_reader module
16 | -----------------------------------
17 |
18 | .. automodule:: pipit.readers.nsight_reader
19 | :members:
20 | :undoc-members:
21 | :show-inheritance:
22 |
23 | pipit.readers.otf2\_reader module
24 | ---------------------------------
25 |
26 | .. automodule:: pipit.readers.otf2_reader
27 | :members:
28 | :undoc-members:
29 | :show-inheritance:
30 |
31 | pipit.readers.projections\_reader module
32 | ----------------------------------------
33 |
34 | .. automodule:: pipit.readers.projections_reader
35 | :members:
36 | :undoc-members:
37 | :show-inheritance:
38 |
39 | Module contents
40 | ---------------
41 |
42 | .. automodule:: pipit.readers
43 | :members:
44 | :undoc-members:
45 | :show-inheritance:
46 |
--------------------------------------------------------------------------------
/docs/source/pipit.rst:
--------------------------------------------------------------------------------
1 | pipit package
2 | =============
3 |
4 | Subpackages
5 | -----------
6 |
7 | .. toctree::
8 | :maxdepth: 4
9 |
10 | pipit.readers
11 |
12 | Submodules
13 | ----------
14 |
15 | pipit.graph module
16 | ------------------
17 |
18 | .. automodule:: pipit.graph
19 | :members:
20 | :undoc-members:
21 | :show-inheritance:
22 |
23 | pipit.trace module
24 | ------------------
25 |
26 | .. automodule:: pipit.trace
27 | :members:
28 | :undoc-members:
29 | :show-inheritance:
30 |
31 | Module contents
32 | ---------------
33 |
34 | .. automodule:: pipit
35 | :members:
36 | :undoc-members:
37 | :show-inheritance:
38 |
--------------------------------------------------------------------------------
/docs/user_guide.rst:
--------------------------------------------------------------------------------
1 | .. Copyright 2022-2023 Parallel Software and Systems Group, University of
2 | Maryland. See the top-level LICENSE file for details.
3 |
4 | SPDX-License-Identifier: MIT
5 |
6 | **********
7 | User Guide
8 | **********
9 |
10 |
11 |
--------------------------------------------------------------------------------
/logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hpcgroup/pipit/97beb979a126819de6fee1bd221647f4b9e2e6c7/logo.png
--------------------------------------------------------------------------------
/pipit/.gitignore:
--------------------------------------------------------------------------------
1 | *.pyc
2 | .cache
3 | .pytest_cache
4 | .ipynb_checkpoints
5 |
--------------------------------------------------------------------------------
/pipit/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright 2022-2023 Parallel Software and Systems Group, University of
2 | # Maryland. See the top-level LICENSE file for details.
3 | #
4 | # SPDX-License-Identifier: MIT
5 |
6 | from .trace import Trace # noqa: F401
7 | from .util.config import get_option, set_option, reset_option # noqa: F401
8 |
--------------------------------------------------------------------------------
/pipit/graph.py:
--------------------------------------------------------------------------------
1 | # Copyright 2022-2023 Parallel Software and Systems Group, University of
2 | # Maryland. See the top-level LICENSE file for details.
3 | #
4 | # SPDX-License-Identifier: MIT
5 |
6 |
7 | class Node:
8 | """Each Node corresponds to a PF tag in the experiment.xml file, and can be
9 | referenced by any calling_context_id directly under it
10 | """
11 |
12 | def __init__(self, id, parent, level=None) -> None:
13 | self._pipit_nid = id
14 | self.children = []
15 | self.parent = parent
16 |
17 | if level is None:
18 | self.level = self._calculate_level()
19 | else:
20 | self.level = level
21 |
22 | def add_child(self, child_node):
23 | self.children.append(child_node)
24 |
25 | def get_level(self):
26 | """This function returns the depth of the current node
27 | (a root node would return 0)
28 | """
29 | return self.level
30 |
31 | def get_intersection(self, node: "Node"):
32 | """Given two nodes, this function returns the interesection of them
33 | starting from their root nodes (least common ancestor)
34 | If the two nodes do not share the same root node, their intersection
35 | would be None, otherwise it returns the nodes that they have in
36 | common (starting from the root) as a new Node
37 | """
38 | if node is None:
39 | return None
40 |
41 | if self.get_level() > node.get_level():
42 | node1 = self
43 | node2 = node
44 | else:
45 | node1 = node
46 | node2 = self
47 |
48 | while node1.get_level() > node2.get_level():
49 | node1 = node1.parent
50 |
51 | while node1 != node2:
52 | node1 = node1.parent
53 | node2 = node2.parent
54 |
55 | return node1
56 |
57 | def get_node_list(self, min_level):
58 | """creates list from current node to node with level min_level
59 | backtracks on the current Node until root or min_level (whichever
60 | comes first) and returns them as a list of Nodes
61 | """
62 | node = self
63 | return_list = []
64 |
65 | while node is not None and node.level > min_level:
66 | return_list.append(node)
67 | node = node.parent
68 |
69 | return return_list
70 |
71 | def __str__(self) -> str:
72 | return "ID: " + str(self._pipit_nid) + " -- Level: " + str(self.level)
73 |
74 | def _calculate_level(self):
75 | """private function to get depth of node"""
76 | if self.parent is None:
77 | return 0
78 | else:
79 | return 1 + self.parent._calculate_level()
80 |
81 | def __eq__(self, obj) -> bool:
82 | if isinstance(obj, Node):
83 | return self._pipit_nid == obj._pipit_nid
84 | else:
85 | return False
86 |
87 |
88 | class Graph:
89 | """Represents the calling context tree / call graph"""
90 |
91 | def __init__(self) -> None:
92 | self.roots = []
93 |
94 | def add_root(self, node):
95 | self.roots.append(node)
96 |
97 | def __str__(self) -> str:
98 | return "Roots: " + str([str(curr_root) for curr_root in self.roots])
99 |
--------------------------------------------------------------------------------
/pipit/readers/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright 2022-2023 Parallel Software and Systems Group, University of
2 | # Maryland. See the top-level LICENSE file for details.
3 | #
4 | # SPDX-License-Identifier: MIT
5 |
--------------------------------------------------------------------------------
/pipit/readers/core_reader.py:
--------------------------------------------------------------------------------
1 | from typing import List, Dict
2 |
3 | import pandas
4 | from pipit.trace import Trace
5 |
6 |
7 | class CoreTraceReader:
8 | """
9 | Helper Object to read traces from different sources and convert them into a common
10 | format
11 | """
12 |
13 | def __init__(self, start: int = 0, stride: int = 1):
14 | """
15 | Should be called by each process to create an empty trace per process in the
16 | reader. Creates the following data structures to represent an empty trace:
17 | - events: Dict[int, Dict[int, List[Dict]]]
18 | - stacks: Dict[int, Dict[int, List[int]]]
19 | """
20 | # keep stride for how much unique id should be incremented
21 | self.stride = stride
22 |
23 | # keep track of a unique id for each event
24 | self.unique_id = start - self.stride
25 |
26 | # events are indexed by process number, then thread number
27 | # stores a list of events
28 | self.events: Dict[int, Dict[int, List[Dict]]] = {}
29 |
30 | # stacks are indexed by process number, then thread number
31 | # stores indices of events in the event list
32 | self.stacks: Dict[int, Dict[int, List[int]]] = {}
33 |
34 | def add_event(self, event: Dict) -> None:
35 | """
36 | Should be called to add each event to the trace. Will update the event lists and
37 | stacks accordingly.
38 | """
39 | # get process number -- if not present, set to 0
40 | if "Process" in event:
41 | process = event["Process"]
42 | else:
43 | process = 0
44 |
45 | # get thread number -- if not present, set to 0
46 | if "Thread" in event:
47 | thread = event["Thread"]
48 | else:
49 | thread = 0
50 | # event["Thread"] = 0
51 |
52 | # assign a unique id to the event
53 | event["unique_id"] = self.__get_unique_id()
54 |
55 | # get event list
56 | if process not in self.events:
57 | self.events[process] = {}
58 | if thread not in self.events[process]:
59 | self.events[process][thread] = []
60 | event_list = self.events[process][thread]
61 |
62 | # get stack
63 | if process not in self.stacks:
64 | self.stacks[process] = {}
65 | if thread not in self.stacks[process]:
66 | self.stacks[process][thread] = []
67 | stack: List[int] = self.stacks[process][thread]
68 |
69 | # if the event is an enter event, add the event to the stack and update the
70 | # parent-child relationships
71 | if event["Event Type"] == "Enter":
72 | self.__update_parent_child_relationships(event, stack, event_list, False)
73 | elif event["Event Type"] == "Instant":
74 | self.__update_parent_child_relationships(event, stack, event_list, True)
75 | # if the event is a leave event, update the matching event and pop from the
76 | # stack
77 | elif event["Event Type"] == "Leave":
78 | self.__update_match_event(event, stack, event_list)
79 |
80 | # Finally add the event to the event list
81 | event_list.append(event)
82 |
83 | def finalize(self):
84 | """
85 | Converts the events data structure into a pandas dataframe and returns it
86 | """
87 | all_events = []
88 | for process in self.events:
89 | for thread in self.events[process]:
90 | all_events.extend(self.events[process][thread])
91 |
92 | # create a dataframe
93 | trace_df = pandas.DataFrame(all_events)
94 |
95 | trace_df["_matching_event"].fillna(-1, inplace=True)
96 | trace_df["_parent"].fillna(-1, inplace=True)
97 | trace_df["_matching_timestamp"].fillna(-1, inplace=True)
98 |
99 | # categorical for memory savings
100 | trace_df = trace_df.astype(
101 | {
102 | "Name": "category",
103 | "Event Type": "category",
104 | "Process": "category",
105 | "_matching_event": "int32",
106 | "_parent": "int32",
107 | "_matching_timestamp": "int32",
108 | }
109 | )
110 | return trace_df
111 |
112 | def __update_parent_child_relationships(
113 | self, event: Dict, stack: List[int], event_list: List[Dict], is_instant: bool
114 | ) -> None:
115 | """
116 | This method can be thought of the update upon an "Enter" event. It adds to the
117 | stack and CCT
118 | """
119 | if len(stack) == 0:
120 | # root event
121 | event["_parent"] = -1
122 | else:
123 | parent_event = event_list[stack[-1]]
124 | event["_parent"] = parent_event["unique_id"]
125 |
126 | # update stack
127 | if not is_instant:
128 | stack.append(len(event_list))
129 |
130 | def __update_match_event(
131 | self, leave_event: Dict, stack: List[int], event_list: List[Dict]
132 | ) -> None:
133 | """
134 | This method can be thought of the update upon a "Leave" event. It pops from the
135 | stack and updates the event list. We should look into using this function to add
136 | artificial "Leave" events for unmatched "Enter" events
137 | """
138 |
139 | while len(stack) > 0:
140 |
141 | # popping matched events from the stack
142 | enter_event = event_list[stack.pop()]
143 |
144 | if enter_event["Name"] == leave_event["Name"]:
145 | # matching event found
146 |
147 | # update matching event ids
148 | leave_event["_matching_event"] = enter_event["unique_id"]
149 | enter_event["_matching_event"] = leave_event["unique_id"]
150 |
151 | # update matching timestamps
152 | leave_event["_matching_timestamp"] = enter_event["Timestamp (ns)"]
153 | enter_event["_matching_timestamp"] = leave_event["Timestamp (ns)"]
154 |
155 | break
156 |
157 | def __get_unique_id(self) -> int:
158 | self.unique_id += self.stride
159 | return self.unique_id
160 |
161 |
162 | def concat_trace_data(data_list):
163 | """
164 | Concatenates the data from multiple trace readers into a single trace reader
165 | """
166 | trace_data = pandas.concat(data_list, ignore_index=True)
167 | # set index to unique_id
168 | trace_data.set_index("unique_id", inplace=True)
169 | trace_data.sort_values(
170 | by="Timestamp (ns)", axis=0, ascending=True, inplace=True, ignore_index=True
171 | )
172 | return Trace(None, trace_data, None)
173 |
--------------------------------------------------------------------------------
/pipit/readers/nsight_reader.py:
--------------------------------------------------------------------------------
1 | # Copyright 2022-2023 Parallel Software and Systems Group, University of
2 | # Maryland. See the top-level LICENSE file for details.
3 | #
4 | # SPDX-License-Identifier: MIT
5 |
6 | import pandas as pd
7 | import pipit.trace
8 |
9 |
10 | class NsightReader:
11 | """Reader for Nsight trace files"""
12 |
13 | def __init__(self, file_name, create_cct=False) -> None:
14 | self.file_name = file_name
15 | self.df = None
16 | self.create_cct = create_cct
17 |
18 | def read(self):
19 | """
20 | This read function directly takes in a csv of the trace report and
21 | utilizes pandas to convert it from a csv into a dataframe.
22 | """
23 |
24 | # Read in csv
25 | self.df = pd.read_csv(self.file_name)
26 |
27 | # Grab the set of the column PID columns to see if
28 | # mutliprocess and convert to a list
29 | pid = set(self.df["PID"])
30 |
31 | # check if PID and TID are NOT the same. singlethreaded or multithreaded
32 | if self.df["PID"].equals(self.df["TID"]) is False:
33 | # Group the pids together and give each process it's own set of threads
34 | for i in pid:
35 | # Seeing where the rows of the PIDs match. Grabbing the rows in mask
36 | mask = self.df["PID"] == i
37 | # Creating a set from the matching PID rows dataframe of the TIDs
38 | tid = set(self.df[mask]["TID"])
39 | # Getting the TID set, creating a dictionary,
40 | # and increment the values (0,1,2,...)
41 | tid_dict = dict(zip(tid, range(0, len(tid))))
42 | # Grabbing the rows with mask and setting the thread column by
43 | # mapping the tids with the tid_dict
44 | self.df.loc[mask, "Thread"] = self.df["TID"].map(tid_dict)
45 | # Converting Thread from float to int
46 | self.df["Thread"] = self.df["Thread"].astype(int)
47 |
48 | # check if PID set is > 1, if so multiprocess or single process
49 | if len(pid) > 1:
50 | # Set Process column to PID
51 | self.df["Process"] = self.df["PID"]
52 | # Getting the PID set, creating a dictionary,
53 | # and increment the values (0,1,2,...)
54 | pid_dict = dict(zip(pid, range(0, len(pid))))
55 | # Using the dictionary to replace the Process values
56 | self.df["Process"].replace(pid_dict, inplace=True)
57 |
58 | # Copy self.df to create enter and leave rows
59 | df2 = self.df.copy()
60 |
61 | # Create new columns for self.df with start time to create enter rows
62 | self.df["Event Type"] = "Enter"
63 | self.df["Timestamp (ns)"] = self.df["Start (ns)"]
64 |
65 | # Create new columns for df2 with end time to create leave rows
66 | df2["Event Type"] = "Leave"
67 | df2["Timestamp (ns)"] = df2["End (ns)"]
68 |
69 | # Combine dataframes together
70 | self.df = pd.concat([self.df, df2])
71 |
72 | # Tidy Dataframe
73 | self.df.drop(["Start (ns)", "End (ns)"], axis=1, inplace=True)
74 |
75 | self.df.sort_values(by="Timestamp (ns)", ascending=True, inplace=True)
76 |
77 | self.df.reset_index(drop=True, inplace=True)
78 |
79 | self.df = self.df.astype(
80 | {
81 | "Event Type": "category",
82 | "Name": "category",
83 | "PID": "category",
84 | "TID": "category",
85 | }
86 | )
87 |
88 | # Grabbing the list of columns and rearranging them to put
89 | # Timestamp, Event Types, Name, Thread (potentially),
90 | # Process(potentially) in the front of the dataframe
91 | cols = list(self.df)
92 | cols.insert(0, cols.pop(cols.index("Timestamp (ns)")))
93 | cols.insert(1, cols.pop(cols.index("Event Type")))
94 | cols.insert(2, cols.pop(cols.index("Name")))
95 |
96 | if "Process" in self.df.columns:
97 | cols.insert(3, cols.pop(cols.index("Process")))
98 | if "Thread" in self.df.columns:
99 | cols.insert(3, cols.pop(cols.index("Thread")))
100 |
101 | elif "Thread" in self.df.columns:
102 | cols.insert(3, cols.pop(cols.index("Thread")))
103 |
104 | # Applying the column list to the dataframe to rearrange
105 | self.df = self.df.loc[:, cols]
106 |
107 | trace = pipit.trace.Trace(None, self.df)
108 | if self.create_cct:
109 | trace.create_cct()
110 |
111 | return trace
112 |
--------------------------------------------------------------------------------
/pipit/readers/nsight_sqlite_reader.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import pandas as pd
3 | import pipit.trace
4 | import sqlite3
5 |
6 |
7 | class NSightSQLiteReader:
8 | # Dictionary mapping trace type
9 | # (e.g. NVTX, CUDA API to SQL queries)
10 | _trace_queries = {
11 | "nvtx": [
12 | """
13 | SELECT
14 | start as Enter,
15 | end as Leave,
16 | 'annotation' as type,
17 | IFNULL(text, StringIds.value) as "Name",
18 | (ne.globalTid >> 24) & 0x00FFFFFF AS "Process",
19 | ne.globalTid & 0x00FFFFFF AS "Thread",
20 | jsonText as meta
21 | FROM
22 | NVTX_EVENTS as ne
23 | LEFT JOIN StringIds
24 | ON StringIds.id = ne.textId
25 | WHERE
26 | -- Filter to only include range start/end and push/pop events
27 | ne.eventType in (59, 60)
28 | """
29 | ],
30 | "cuda_api": [
31 | """
32 | SELECT
33 | start as Enter,
34 | end as Leave,
35 | rname.value AS Name,
36 | (cuda_api.globalTid >> 24) & 0x00FFFFFF AS "Process",
37 | cuda_api.globalTid & 0x00FFFFFF AS "Thread",
38 | correlationId As id,
39 | null as meta
40 | FROM
41 | CUPTI_ACTIVITY_KIND_RUNTIME as cuda_api
42 | JOIN ThreadNames AS tname
43 | ON cuda_api.globalTid == tname.globalTid
44 | JOIN
45 | StringIds AS rname
46 | ON cuda_api.nameId = rname.id
47 | JOIN
48 | StringIds AS rname2
49 | ON tname.nameId = rname2.id
50 | """
51 | ],
52 | "gpu_trace": [
53 | """
54 | SELECT
55 | cuda_gpu.start as Enter,
56 | cuda_gpu.end as Leave,
57 | cuda_gpu.deviceId as gpuId,
58 | value as Name,
59 | cuda_gpu.streamId,
60 | 'kernel' as type,
61 | null as bytes,
62 | cuda_gpu.correlationId as id,
63 | (cuda_api.globalTid >> 24) & 0x00FFFFFF AS "Process",
64 | null as meta
65 | FROM CUPTI_ACTIVITY_KIND_KERNEL as cuda_gpu
66 | JOIN StringIds
67 | ON cuda_gpu.shortName = StringIds.id
68 | JOIN CUPTI_ACTIVITY_KIND_RUNTIME as cuda_api
69 | ON cuda_gpu.correlationId = cuda_api.correlationId
70 | """,
71 | """
72 | SELECT
73 | cuda_memcpy.start as Enter,
74 | cuda_memcpy.end as Leave,
75 | cuda_memcpy.deviceId as gpuId,
76 | memcpy_labels.name as Name,
77 | cuda_memcpy.streamId,
78 | 'cuda_memcpy' as type,
79 | bytes,
80 | cuda_memcpy.correlationId as id,
81 | (cuda_api.globalTid >> 24) & 0x00FFFFFF AS "Process",
82 | null as meta
83 | FROM CUPTI_ACTIVITY_KIND_MEMCPY as cuda_memcpy
84 | JOIN ENUM_CUDA_MEMCPY_OPER as memcpy_labels
85 | ON cuda_memcpy.copyKind = memcpy_labels.id
86 | JOIN CUPTI_ACTIVITY_KIND_RUNTIME as cuda_api
87 | ON cuda_memcpy.correlationId = cuda_api.correlationId
88 | """,
89 | """
90 | SELECT
91 | cuda_memset.start as Enter,
92 | cuda_memset.end as Leave,
93 | cuda_memset.deviceId as gpuId,
94 | memset_labels.name as Name,
95 | streamId,
96 | 'cuda_memset' as type,
97 | bytes,
98 | cuda_memset.correlationId as id,
99 | (cuda_api.globalTid >> 24) & 0x00FFFFFF AS "Process",
100 | null as meta
101 | FROM CUPTI_ACTIVITY_KIND_MEMSET as cuda_memset
102 | JOIN ENUM_CUDA_MEM_KIND as memset_labels
103 | ON cuda_memset.memKind = memset_labels.id
104 | JOIN CUPTI_ACTIVITY_KIND_RUNTIME as cuda_api
105 | ON cuda_memset.correlationId = cuda_api.correlationId
106 | """,
107 | """
108 | SELECT
109 | cuda_sync.start as Enter,
110 | cuda_sync.end as Leave,
111 | cuda_sync.deviceId as gpuId,
112 | sync_labels.name as Name,
113 | cuda_sync.streamId,
114 | 'cuda_sync' as type,
115 | null as bytes,
116 | cuda_sync.correlationId as id,
117 | (cuda_api.globalTid >> 24) & 0x00FFFFFF AS "Process",
118 | null as meta
119 | FROM CUPTI_ACTIVITY_KIND_SYNCHRONIZATION as cuda_sync
120 | JOIN ENUM_CUPTI_SYNC_TYPE as sync_labels
121 | ON cuda_sync.syncType = sync_labels.id
122 | JOIN CUPTI_ACTIVITY_KIND_RUNTIME as cuda_api
123 | ON cuda_sync.correlationId = cuda_api.correlationId
124 | """,
125 | """
126 | SELECT
127 | cuda_graph.start as Enter,
128 | cuda_graph.end as Leave,
129 | cuda_graph.deviceId as gpuId,
130 | -- CUDA Graphs are not name-able, so we use their id
131 | -- instead
132 | 'CUDA Graph ' || cuda_graph.graphId as Name,
133 | cuda_graph.streamId,
134 | 'cuda_graph' as type,
135 | null as bytes,
136 | cuda_graph.correlationId as id,
137 | (cuda_api.globalTid >> 24) & 0x00FFFFFF AS "Process",
138 | null as meta
139 | FROM CUPTI_ACTIVITY_KIND_GRAPH_TRACE as cuda_graph
140 | JOIN CUPTI_ACTIVITY_KIND_RUNTIME as cuda_api
141 | ON cuda_graph.correlationId = cuda_api.correlationId
142 | """,
143 | ],
144 | # TODO: reading in all the gpu metrics takes up a lot of memory
145 | # We should figure out which ones we want exactly
146 | # "gpu_metrics": """
147 | # SELECT GENERIC_EVENTS.rawTimestamp, typeId, data
148 | # FROM GPU_METRICS
149 | # LEFT JOIN GENERIC_EVENTS
150 | # ON GENERIC_EVENTS.typeId = GPU_METRICS.typeId
151 | # """
152 | }
153 |
154 | def __init__(self, filepath, create_cct=False, trace_types="all") -> None:
155 | self.conn = sqlite3.connect(filepath)
156 | self.create_cct = create_cct
157 | # Get all the table names that exist
158 | # Sometimes, things like the GPU metrics and stuff might not
159 | # exist
160 | get_tables_query = """
161 | SELECT name FROM sqlite_master WHERE type='table'
162 | """
163 | self.table_names = set(pd.read_sql_query(get_tables_query, self.conn).squeeze())
164 | self.trace_queries = NSightSQLiteReader._trace_queries.copy()
165 | if trace_types == "all":
166 | # Even nsight has separate analyses for CUDA API summary, etc.
167 | # We do need a way to compare multiple traces side by side, though
168 |
169 | # Some traces (their tables, e.g. NVTX_EVENTS) may not always be present
170 | # in the sqlite db
171 | # Make sure that all tables that we read in queries are accounted for here
172 | self.trace_types = []
173 | if "NVTX_EVENTS" in self.table_names:
174 | self.trace_types.append("nvtx")
175 | if "CUPTI_ACTIVITY_KIND_RUNTIME" in self.table_names:
176 | self.trace_types.append("cuda_api")
177 | self.trace_types.append("gpu_trace")
178 |
179 | # GPU metrics are disabled, see comment above
180 | # if "GPU_METRICS" in self.table_names:
181 | # self.trace_types.append("gpu_metrics")
182 | else:
183 | self.trace_types = trace_types
184 |
185 | if "gpu_trace" in self.trace_types:
186 | # Check for existance of CUDA_ACTIVITY_KIND_MEMCPY/
187 | # CUDA_ACTIVITY_KIND_MEMSET since those can sometimes not exist
188 |
189 | gpu_trace_qs = []
190 | gpu_trace_needed_tbls = [
191 | "CUPTI_ACTIVITY_KIND_RUNTIME",
192 | "CUPTI_ACTIVITY_KIND_MEMCPY",
193 | "CUPTI_ACTIVITY_KIND_MEMSET",
194 | "CUPTI_ACTIVITY_KIND_SYNCHRONIZATION",
195 | "CUPTI_ACTIVITY_KIND_GRAPH_TRACE",
196 | ]
197 |
198 | for req_tbl, q in zip(
199 | gpu_trace_needed_tbls,
200 | NSightSQLiteReader._trace_queries["gpu_trace"],
201 | strict=True,
202 | ):
203 | if req_tbl in self.table_names:
204 | gpu_trace_qs.append(q)
205 | self.trace_queries["gpu_trace"] = gpu_trace_qs
206 |
207 | def read(self) -> pipit.trace.Trace:
208 | traces = []
209 |
210 | for typ in self.trace_types:
211 | dfs = []
212 | for q in self.trace_queries[typ]:
213 | dfs.append(pd.read_sql_query(q, con=self.conn))
214 | df = pd.concat(dfs, axis=0)
215 | df["Trace Type"] = typ
216 | traces.append(df)
217 |
218 | # concat traces together row wise
219 | trace_df = pd.concat(traces, axis=0)
220 |
221 | # Melt start/end columns into single event type column
222 | trace_df = pd.melt(
223 | trace_df,
224 | # These are the columns we don't want to melt
225 | # Columns not in here will be melted into a single column
226 | id_vars=[col for col in df.columns if col not in {"Enter", "Leave"}],
227 | value_vars=["Enter", "Leave"],
228 | var_name="Event Type",
229 | value_name="Timestamp (ns)",
230 | )
231 |
232 | # Convert to the pandas nullable dtypes
233 | # This will help preserve e.g. streamId as an
234 | # integer column with nulls instead of casting to
235 | # float64
236 | trace_df = trace_df.convert_dtypes()
237 |
238 | # Cache mapping
239 | trace_df["_matching_event"] = np.concatenate(
240 | [
241 | np.arange(len(trace_df) // 2, len(trace_df)),
242 | np.arange(0, len(trace_df) // 2),
243 | ]
244 | )
245 | # Convert to numpy before assignment otherwise pandas
246 | # will try to align indices, which will mess up order
247 | trace_df["_matching_timestamp"] = trace_df["Timestamp (ns)"][
248 | trace_df["_matching_event"]
249 | ].to_numpy()
250 |
251 | # Cannot use ignore_index = True since that breaks the
252 | # _matching_event col
253 | trace_df = trace_df.sort_values(by="Timestamp (ns)")
254 |
255 | if self.trace_types == ["gpu_trace"]:
256 | parallelism_levels = ["gpuId", "streamId"]
257 | elif self.trace_types == ["cuda_api"]:
258 | parallelism_levels = ["Process"]
259 | else:
260 | parallelism_levels = ["Process", "gpuId", "streamId"]
261 |
262 | trace = pipit.trace.Trace(None, trace_df, parallelism_levels=parallelism_levels)
263 | if self.create_cct:
264 | trace.create_cct()
265 |
266 | # Call match caller callee to recreate hierarchical
267 | # relationship between annotations
268 | trace._match_caller_callee()
269 |
270 | # Associate CUDA API calls with memory operations or
271 | # kernel launches
272 | # Note: looking at _match_caller_callee
273 | # _parent should point to the "Enter" event of the parent
274 | # _children also points to the "Enter" events of the children of 1 node
275 |
276 | enter_mask = trace_df["Event Type"] == "Enter"
277 | cuda_api_mask = trace_df["Trace Type"] == "cuda_api"
278 | calls_that_launch = (
279 | trace_df.loc[cuda_api_mask & enter_mask]
280 | .reset_index()
281 | .merge(
282 | trace_df.loc[~cuda_api_mask & enter_mask].reset_index(),
283 | on="id",
284 | how="inner",
285 | )
286 | )
287 | # TODO: can get rid of the apply if we use an Arrow ListDtype for children
288 | # globally
289 | children = calls_that_launch["index_y"].apply(lambda x: [x])
290 | # Convert to numpy otherwise the index messes stuff up
291 | trace_df.loc[calls_that_launch["index_x"].to_numpy(), "_children"] = (
292 | children.to_numpy()
293 | )
294 | trace_df.loc[calls_that_launch["index_y"].to_numpy(), "_parent"] = (
295 | calls_that_launch["index_x"].to_numpy()
296 | )
297 |
298 | return trace
299 |
--------------------------------------------------------------------------------
/pipit/readers/otf2_reader.py:
--------------------------------------------------------------------------------
1 | # Copyright 2022-2023 Parallel Software and Systems Group, University of
2 | # Maryland. See the top-level LICENSE file for details.
3 | #
4 | # SPDX-License-Identifier: MIT
5 |
6 | import otf2
7 | import numpy as np
8 | import pandas as pd
9 | import multiprocessing as mp
10 | import pipit.trace
11 |
12 |
13 | class OTF2Reader:
14 | """Reader for OTF2 trace files"""
15 |
16 | def __init__(self, dir_name, num_processes=None, create_cct=False):
17 | self.dir_name = dir_name # directory of otf2 file being read
18 | self.file_name = self.dir_name + "/traces.otf2"
19 | self.create_cct = create_cct
20 |
21 | num_cpus = mp.cpu_count()
22 | if num_processes is None or num_processes < 1 or num_processes > num_cpus:
23 | # uses all processes to parallelize reading by default
24 | self.num_processes = num_cpus
25 | else:
26 | self.num_processes = num_processes
27 |
28 | def field_to_val(self, field):
29 | """
30 | Handles otf2 and _otf2 objects
31 |
32 | Arguments:
33 | field: an otf2 object, _otf2 object, or any other field
34 | that can have different data types such as strings, ints, etc
35 |
36 | Returns:
37 | if otf2 definition, a string representation of the definition and
38 | its ID such as "Region 19" that the user can use to refer back
39 | to the definitions dataframe
40 | else if other otf2 or _otf2 objects, a simple string representation of
41 | the object
42 | else don't make any changes
43 |
44 | This function also ensures that there is no pickling of otf2 or _otf2
45 | objects, which could cause errors
46 | """
47 |
48 | """
49 | Note: any occurrence of [25:-2] or something similar
50 | is some simple string manipulation to only extract the relevant
51 | part of the string and not information like the type such as
52 | otf2.definitions, etc
53 | """
54 |
55 | field_type = str(type(field))
56 | if "otf2.definitions" in field_type:
57 | """
58 | Example: An event can have an attribute called region which corresponds
59 | to a definition. We strip the string and extract only the relevant
60 | information, which is the type of definition such as Region and also
61 | append its id (like Region 6) so that this definition can be accessed
62 | in the Definitions DataFrame
63 | """
64 | return field_type[25:-2] + " " + str(getattr(field, "_ref"))
65 | elif "_otf2" in field_type or "otf2" in field_type:
66 | """
67 | Example: A measurement event has an attribute called measurement mode
68 | which is either MeasurementMode.OFF or MeasurementMode.ON. These are not
69 | definitions, but they are an object in the lower level _otf2 library,
70 | and to ensure no pickling errors, I convert these objects to their
71 | string representation
72 | """
73 | return str(field)
74 | else:
75 | "not an otf2 type, then just return normally"
76 | return field
77 |
78 | def handle_data(self, data):
79 | """
80 | Handles different data structures
81 |
82 | Arguments:
83 | data: could be a list, tuple, set, dict, or any other python data type
84 |
85 | Returns:
86 | the same data structure as the passed argument but field_to_val is applied
87 | to all of the values it contains
88 |
89 | Note: all of the below cases handle the case where the data structure
90 | could be nested, which is always possibility depending on the trace's
91 | specific attributes
92 | """
93 |
94 | if isinstance(data, list):
95 | return [self.handle_data(data_element) for data_element in data]
96 | elif isinstance(data, dict):
97 | """
98 | Example: ProgramBegin events have an attribute that is a definition
99 | and quite ironically, also known as attribute. These are stored in
100 | a dictionary where the key is a definition like "Attribute 2" and
101 | the integer like 15968
102 | """
103 | return {
104 | self.field_to_val(data_key): self.handle_data(data_value)
105 | for data_key, data_value in data.items()
106 | }
107 | elif isinstance(data, tuple):
108 | """
109 | Example: There is a definition called CartTopology which has a
110 | field called dimensions that is a tuple of two other definitions
111 | called CartDimensions, showing why this nested structure is needed
112 | """
113 | return tuple([self.handle_data(data_element) for data_element in data])
114 | elif isinstance(data, set):
115 | """
116 | Haven't encountered this type, but added just in case any situations like
117 | the above ones do arise for this data type
118 | """
119 | return set([self.handle_data(data_element) for data_element in data])
120 | else:
121 | "this represents the case for most fields/attributes"
122 | return self.field_to_val(data)
123 |
124 | def fields_to_dict(self, def_object):
125 | """
126 | converts the fields in the attribute column of a definition
127 | object to a dictionary
128 | """
129 |
130 | fields_dict = {}
131 | # iterates through the fields of the definition
132 | # (ex: region has fields like name, paradigm source file, etc)
133 | for field in def_object._fields:
134 | field_name = str(field.name)
135 | # use the handle_data function to process the field's data appropriately
136 | fields_dict[field_name] = self.handle_data(getattr(def_object, field_name))
137 |
138 | if len(fields_dict) == 1:
139 | # collapse single dictionaries to a value
140 | return list(fields_dict.values())[0]
141 | else:
142 | return fields_dict
143 |
144 | def events_reader(self, rank_size):
145 | """
146 | Serial events reader that reads a subset of the trace
147 |
148 | Arguments:
149 | rank_size: a tuple containing the rank of the process
150 | and the size/total number of processors that are being used
151 |
152 | Returns:
153 | a dictionary with a subset of the trace events that can be converted
154 | to a dataframe
155 | """
156 |
157 | with otf2.reader.open(self.file_name) as trace:
158 | # extracts the rank and size
159 | # and gets all the locations
160 | # of the trace
161 | rank, size = rank_size[0], rank_size[1]
162 | locations = list(trace.definitions._locations)
163 | num_locations = len(locations)
164 |
165 | # base number of locations read by each process
166 | per_process = int(num_locations // size)
167 |
168 | # remainder number of locations to be split evenly
169 | remainder = int(num_locations % size)
170 |
171 | if rank < remainder:
172 | """
173 | Example:
174 | For the reading of 30 locations split over 14 processes,
175 | first 2 processes will read 3 locations each since the remainder
176 | is 2.
177 | """
178 | begin_int = rank * (per_process + 1)
179 | end_int = (rank + 1) * (per_process + 1)
180 | else:
181 | """
182 | Example:
183 | For the reading of 30 locations split over 14 processes,
184 | last 12 processes will read 2 locations each. The starting index
185 | accounts for the fact that the first two will read 3 locations each.
186 | """
187 | begin_int = (rank * per_process) + remainder
188 | end_int = ((rank + 1) * per_process) + remainder
189 |
190 | # select the locations to read based on above calculations
191 | loc_events = list(trace.events(locations[begin_int:end_int]).__iter__())
192 |
193 | # columns of the DataFrame
194 | timestamps, event_types, event_attributes, names = [], [], [], []
195 |
196 | # note: the below lists are for storing logical ids
197 | process_ids, thread_ids = [], []
198 |
199 | """
200 | Relevant Documentation for Metrics:
201 | https://scorepci.pages.jsc.fz-juelich.de/otf2-pipelines/doc.r4707/python/basics.html#metrics
202 | """
203 |
204 | # get members of metric class
205 | metric_members = (
206 | self.definitions.loc[
207 | self.definitions["Definition Type"] == "MetricClass"
208 | ]["Attributes"]
209 | .map(lambda attr: attr["members"])
210 | .values
211 | )
212 | metric_members = [] if len(metric_members) == 0 else metric_members[0]
213 |
214 | # ids of metric members
215 | metric_ids = list(
216 | map(lambda metric_member: int(metric_member[-1]), metric_members)
217 | )
218 |
219 | # names of metrics
220 | metric_names = (
221 | self.definitions.loc[
222 | (self.definitions["Definition Type"] == "MetricMember")
223 | & (self.definitions["ID"].isin(metric_ids))
224 | ]["Attributes"]
225 | .map(lambda attr: attr["name"])
226 | .values
227 | )
228 |
229 | # maps each metric to a list of its values
230 | metrics_dict = {metric_name: [] for metric_name in metric_names}
231 |
232 | # used to keep track of time that the
233 | # most recent metrics that were read at
234 | prev_metric_time = -1
235 |
236 | # iterates through the events and processes them
237 | for loc_event in loc_events:
238 | # extracts the location and event
239 | # location could be thread, process, etc
240 | loc, event = loc_event[0], loc_event[1]
241 |
242 | # To Do:
243 | # Support for GPU events has to be
244 | # added and unified across readers.
245 | if str(loc.type)[13:] == "CPU_THREAD":
246 | # don't add metric events as a separate row,
247 | # and add their values into columns instead
248 | if isinstance(event, otf2.events.Metric):
249 | # Since the location is a cpu thread, we know
250 | # that the metric event is of type MetricClass,
251 | # which has a list of MetricMembers.
252 | metrics = list(
253 | map(lambda metric: metric.name, event.metric.members)
254 | )
255 | metric_values = event.values
256 |
257 | # append the values for the metrics
258 | # to their appropriate lists
259 | for i in range(len(metrics)):
260 | metrics_dict[metrics[i]].append(metric_values[i])
261 |
262 | # store the metrics and their timestamp
263 | prev_metric_time = event.time
264 | else:
265 | # MetricClass metric events are synchronous
266 | # and coupled with an enter or leave event that
267 | # has the same timestamp
268 | if event.time != prev_metric_time:
269 | # if the event is not paired with any metric, then
270 | # add placeholders for all the metric lists
271 | for metric in metric_names:
272 | metrics_dict[metric].append(float("nan"))
273 |
274 | # reset this as a metric event was not read
275 | prev_metric_time = -1
276 |
277 | """
278 | Below is code to read the primary information about the
279 | non-metric event, such as location, attributes, etc.
280 | """
281 |
282 | process_id = loc.group._ref
283 | process_ids.append(process_id)
284 |
285 | # subtract the minimum location number of a process
286 | # from the location number to get threads numbered
287 | # 0 to (num_threads per process - 1) for each process.
288 | thread_ids.append(
289 | loc._ref - self.process_threads_map[process_id]
290 | )
291 |
292 | # type of event - enter, leave, or other types
293 | event_type = str(type(event))[20:-2]
294 | if event_type == "Enter" or event_type == "Leave":
295 | event_types.append(event_type)
296 | else:
297 | event_types.append("Instant")
298 |
299 | if event_type in ["Enter", "Leave"]:
300 | names.append(event.region.name)
301 | else:
302 | names.append(event_type)
303 |
304 | timestamps.append(event.time)
305 |
306 | # only add attributes for non-leave rows so that
307 | # there aren't duplicate attributes for a single event
308 | if event_type != "Leave":
309 | attributes_dict = {}
310 |
311 | # iterates through the event's attributes
312 | # (ex: region, bytes sent, etc)
313 | for key, value in vars(event).items():
314 | # only adds non-empty attributes
315 | # and ignores time so there isn't a duplicate time
316 | if value is not None and key != "time":
317 | # uses field_to_val to convert all data types
318 | # and ensure that there are no pickling errors
319 | attributes_dict[self.field_to_val(key)] = (
320 | self.handle_data(value)
321 | )
322 | event_attributes.append(attributes_dict)
323 | else:
324 | # nan attributes for leave rows
325 | # attributes column is of object dtype
326 | event_attributes.append(None)
327 |
328 | trace.close() # close event files
329 |
330 | # returns dataframe with all events and their fields
331 | trace_df = pd.DataFrame(
332 | {
333 | "Timestamp (ns)": timestamps,
334 | "Event Type": event_types,
335 | "Name": names,
336 | "Thread": thread_ids,
337 | "Process": process_ids,
338 | "Attributes": event_attributes,
339 | }
340 | )
341 |
342 | for metric, metric_values in metrics_dict.items():
343 | # only add columns of metrics which are populated with
344 | # some values (sometimes a metric could be defined but not
345 | # appear in the trace itself)
346 | if not np.isnan(metric_values).all():
347 | trace_df[metric] = metric_values
348 |
349 | return trace_df
350 |
351 | def read_definitions(self, trace):
352 | """
353 | Reads the definitions from the trace and converts them to a Pandas
354 | DataFrame
355 | """
356 |
357 | # OTF2 stores locations numbered from 0 to the (total number of threads - 1)
358 | # across all processes. This dict will help us convert those to be orderered
359 | # from 0 to (number of threads for each process - 1) per process instead.
360 | self.process_threads_map = dict()
361 |
362 | # ids are the _ref attribute of an object
363 | # all objects stored in a reference registry
364 | # (such as regions) have such an id
365 | def_name, def_id, attributes = [], [], []
366 |
367 | # iterating through definition registry attributes
368 | # such as regions, strings, locations, etc
369 | for key in vars(trace.definitions).keys():
370 | # current attribute such as region, string, etc
371 | def_attribute = getattr(trace.definitions, str(key))
372 |
373 | # only definition type that is not a registry
374 | if key == "clock_properties":
375 | # clock properties doesn't have an ID
376 | def_id.append(float("NaN"))
377 | def_name.append(str(type(def_attribute))[25:-2])
378 | attributes.append(self.fields_to_dict(def_attribute))
379 |
380 | # ignores otf2 wrapper properties (don't provide useful info)
381 | elif "otf2" not in key:
382 | """
383 | iterate through registry elements
384 | (ex: iterating through all regions
385 | if region is the current definition)
386 | def_object is a single object of that definition
387 | type for example, if def_attribute is regions,
388 | then def_object is a single region being looked at
389 | """
390 | for def_object in def_attribute.__iter__():
391 | # add to process threads map dict if you encounter a new location
392 | if (
393 | key == "_locations"
394 | and str(def_object.type) == "LocationType.CPU_THREAD"
395 | ):
396 | location_num, process_num = (
397 | def_object._ref,
398 | def_object.group._ref,
399 | )
400 |
401 | # each process (location group) will be mapped to its
402 | # minimum location number, which we will use to number threads
403 | # appropriately by subtracting that min from its location nums
404 | if process_num not in self.process_threads_map:
405 | self.process_threads_map[process_num] = location_num
406 | elif location_num < self.process_threads_map[process_num]:
407 | self.process_threads_map[process_num] = location_num
408 |
409 | if hasattr(def_object, "_ref"):
410 | # only add ids for those definitions that have it
411 | def_id.append(def_object._ref)
412 | else:
413 | # ID column is of float64 dtype
414 | def_id.append(float("NaN"))
415 |
416 | # name of the definition
417 | def_name.append(str(type(def_object))[25:-2])
418 |
419 | # converts a definition object to a dictionary of its attributes
420 | # this contains information that a user would have to access the
421 | # definitions DataFrame for
422 | attributes.append(self.fields_to_dict(def_object))
423 |
424 | # return the definitions as a DataFrame
425 | definitions_dataframe = pd.DataFrame(
426 | {"Definition Type": def_name, "ID": def_id, "Attributes": attributes}
427 | )
428 |
429 | # Definition column is of categorical dtype
430 | definitions_dataframe = definitions_dataframe.astype(
431 | {"Definition Type": "category"}
432 | )
433 |
434 | return definitions_dataframe
435 |
436 | def read_events(self):
437 | """
438 | Writes the events to a Pandas DataFrame
439 | using the multiprocessing library and the events_reader
440 | function
441 | """
442 |
443 | # parallelizes the reading of events
444 | # using the multiprocessing library
445 | pool_size, pool = self.num_processes, mp.Pool(self.num_processes)
446 |
447 | # list of dataframes returned by the processes pool
448 | events_dataframes = pool.map(
449 | self.events_reader, [(rank, pool_size) for rank in range(pool_size)]
450 | )
451 |
452 | pool.close()
453 |
454 | # merges the dataframe into one events dataframe
455 | events_dataframe = pd.concat(events_dataframes)
456 | del events_dataframes
457 |
458 | # accessing the clock properties of the trace using the definitions
459 | clock_properties = self.definitions.loc[
460 | self.definitions["Definition Type"] == "ClockProperties"
461 | ]["Attributes"].values[0]
462 | offset, resolution = (
463 | clock_properties["global_offset"],
464 | clock_properties["timer_resolution"],
465 | )
466 |
467 | # shifting the timestamps by the global offset
468 | # and dividing by the resolution to convert to nanoseconds
469 | # as per OTF2's website
470 | events_dataframe["Timestamp (ns)"] -= offset
471 | events_dataframe["Timestamp (ns)"] *= (10**9) / resolution
472 |
473 | # ensures the DataFrame is in order of increasing timestamp
474 | events_dataframe.sort_values(
475 | by="Timestamp (ns)", axis=0, ascending=True, inplace=True, ignore_index=True
476 | )
477 |
478 | # convert these to ints
479 | # (sometimes they get converted to floats
480 | # while concatenating dataframes)
481 | events_dataframe = events_dataframe.astype(
482 | {"Thread": "int32", "Process": "int32"}
483 | )
484 |
485 | # using categorical dtypes for memory optimization
486 | # (only efficient when used for categorical data)
487 | events_dataframe = events_dataframe.astype(
488 | {
489 | "Event Type": "category",
490 | "Name": "category",
491 | "Thread": "category",
492 | "Process": "category",
493 | }
494 | )
495 |
496 | return events_dataframe
497 |
498 | def read(self):
499 | """
500 | Returns a Trace object for the otf2 file
501 | that has one definitions DataFrame and another
502 | events DataFrame as its primary attributes
503 | """
504 |
505 | with otf2.reader.open(self.file_name) as trace: # noqa: F821
506 | self.definitions = self.read_definitions(trace) # definitions
507 |
508 | # if a trace has n locations, we should only parallelize
509 | # the reading of events over a number of processes
510 | # equal to n at a maximum
511 | num_locations = len(trace.definitions._locations)
512 | if self.num_processes > num_locations:
513 | self.num_processes = num_locations
514 |
515 | # close the trace and open it later per process
516 | trace.close()
517 |
518 | self.events = self.read_events() # events
519 |
520 | trace = pipit.trace.Trace(self.definitions, self.events)
521 | if self.create_cct:
522 | trace.create_cct()
523 |
524 | return trace
525 |
--------------------------------------------------------------------------------
/pipit/readers/projections_reader.py:
--------------------------------------------------------------------------------
1 | # Copyright 2022-2023 Parallel Software and Systems Group, University of
2 | # Maryland. See the top-level LICENSE file for details.
3 | #
4 | # SPDX-License-Identifier: MIT
5 |
6 | import os
7 | import gzip
8 | import pipit.trace
9 | import pandas as pd
10 | import multiprocessing as mp
11 |
12 |
13 | class ProjectionsConstants:
14 | """
15 | Projection constants are copied over from projections -- used to
16 | determine type of line in log files
17 | """
18 |
19 | # Message Creation po
20 | CREATION = 1
21 |
22 | BEGIN_PROCESSING = 2
23 | END_PROCESSING = 3
24 | ENQUEUE = 4
25 | DEQUEUE = 5
26 | BEGIN_COMPUTATION = 6
27 | END_COMPUTATION = 7
28 |
29 | BEGIN_INTERRUPT = 8
30 | END_INTERRUPT = 9
31 | MESSAGE_RECV = 10
32 | BEGIN_TRACE = 11
33 | END_TRACE = 12
34 | USER_EVENT = 13
35 | BEGIN_IDLE = 14
36 | END_IDLE = 15
37 | BEGIN_PACK = 16
38 | END_PACK = 17
39 | BEGIN_UNPACK = 18
40 | END_UNPACK = 19
41 | CREATION_BCAST = 20
42 |
43 | CREATION_MULTICAST = 21
44 |
45 | # A record for a user supplied integer value, likely a timestep
46 | USER_SUPPLIED = 26
47 |
48 | # A record for the memory usage
49 | MEMORY_USAGE = 27
50 |
51 | # A record for a user supplied string
52 | USER_SUPPLIED_NOTE = 28
53 | USER_SUPPLIED_BRACKETED_NOTE = 29
54 |
55 | BEGIN_USER_EVENT_PAIR = 98
56 | END_USER_EVENT_PAIR = 99
57 | USER_EVENT_PAIR = 100
58 | USER_STAT = 32
59 | # *** USER category ***
60 | NEW_CHARE_MSG = 0
61 | # NEW_CHARE_NO_BALANCE_MSG = 1;
62 | FOR_CHARE_MSG = 2
63 | BOC_INIT_MSG = 3
64 | # BOC_MSG = 4;
65 | # TERMINATE_TO_ZERO = 5; # never used ??
66 | # TERMINATE_SYS = 6; # never used ??
67 | # INIT_COUNT_MSG = 7;
68 | # READ_VAR_MSG = 8;
69 | # READ_MSG_MSG = 9;
70 | # BROADCAST_BOC_MSG = 10;
71 | # DYNAMIC_BOC_INIT_MSG = 11;
72 |
73 | # *** IMMEDIATE category ***
74 | LDB_MSG = 12
75 | # VID_SEND_OVER_MSG = 13;
76 | QD_BOC_MSG = 14
77 | QD_BROADCAST_BOC_MSG = 15
78 | # IMM_BOC_MSG = 16;
79 | # IMM_BROADCAST_BOC_MSG = 17;
80 | # INIT_BARRIER_PHASE_1 = 18;
81 | # INIT_BARRIER_PHASE_2 = 19;
82 |
83 |
84 | class STSReader:
85 | def __init__(self, file_location):
86 | self.sts_file = open(file_location, "r") # self.chares = {}
87 |
88 | # In 'self.entries', each entry stores (entry_name: str, chare_id: int)
89 | self.entries = {}
90 |
91 | # Stores user event names: {user_event_id: user event name}
92 | self.user_events = {}
93 |
94 | # Stores user stat names: {user_event_id: user stat name}
95 | self.user_stats = {}
96 |
97 | self.read_sts_file()
98 |
99 | # to get name of entry print >
100 | def get_entry_name(self, entry_id):
101 | # self.entries[entry_id][1] is the chare_id (index for self.chares)
102 | if entry_id not in self.entries:
103 | return ""
104 | entry_name, chare_id = self.entries[entry_id]
105 | ret_val = entry_name
106 | if chare_id in self.chares:
107 | return self.chares[chare_id][0] + "::" + ret_val
108 | else:
109 | return ret_val
110 |
111 | # To get the dimension of an entry
112 | def get_dimension(self, entry_id):
113 | return self.chares[self.entries[entry_id][1]][1]
114 |
115 | # Gets the user event name from the user_event_id
116 | def get_user_event(self, user_event_id):
117 | return self.user_events[user_event_id]
118 |
119 | # Gets the name of the user stat from the user_event_id
120 | def get_user_stat(self, user_event_id):
121 | return self.user_stats[user_event_id]
122 |
123 | # unsure what this is used for, but necessary to read PROCESSING
124 | def get_num_perf_counts(self):
125 | if hasattr(self, "papi_event_names"):
126 | return len(self.papi_event_names)
127 | else:
128 | return 0
129 | # self.entries[entry_id][1] is the chare_id (index for self.chares)
130 |
131 | # Gets event name from event_id
132 | def get_event_name(self, event_id):
133 | return self.user_events[event_id]
134 |
135 | def read_sts_file(self):
136 | for line in self.sts_file:
137 | line_arr = line.split()
138 |
139 | # Note: I'm disregarding TOTAL_STATS and TOTAL_EVENTS, because
140 | # projections reader disregards them
141 |
142 | # Note: currently not reading/storing VERSION, MACHINE, SMPMODE,
143 | # COMMANDLINE, CHARMVERSION, USERNAME, HOSTNAME
144 |
145 | # create chares array
146 | # In 'self.chares', each entry stores (chare_name: str, dimension: int)
147 | if line_arr[0] == "TOTAL_CHARES":
148 | total_chares = int(line_arr[1])
149 | self.chares = [None] * total_chares
150 |
151 | elif line_arr[0] == "TOTAL_EPS":
152 | self.num_eps = int(line_arr[1])
153 |
154 | # get num processors
155 | elif line_arr[0] == "PROCESSORS":
156 | self.num_pes = int(line_arr[1])
157 |
158 | # create message array
159 | elif line_arr[0] == "TOTAL_MSGS":
160 | total_messages = int(line_arr[1])
161 | self.message_table = [None] * total_messages
162 | elif line_arr[0] == "TIMESTAMP":
163 | self.timestamp_string = line_arr[1]
164 |
165 | # Add to self.chares
166 | elif line_arr[0] == "CHARE":
167 | id = int(line_arr[1])
168 | name = " ".join(line_arr[2:-1])[1:-1]
169 | dimensions = int(line_arr[-1])
170 | self.chares[id] = (name, dimensions)
171 |
172 | # add to self.entries
173 | elif line_arr[0] == "ENTRY":
174 | # Need to concat entry_name
175 | while not line_arr[3].endswith('"'):
176 | line_arr[3] = line_arr[3] + " " + line_arr[4]
177 | del line_arr[4]
178 |
179 | id = int(line_arr[2])
180 | entry_name = line_arr[3][1 : len(line_arr[3]) - 1]
181 | chare_id = int(line_arr[4])
182 | self.entries[id] = (entry_name, chare_id)
183 |
184 | # Add to message_table
185 | # Need clarification on this, as message_table is never referenced in
186 | # projections
187 | elif line_arr[0] == "MESSAGE":
188 | id = int(line_arr[1])
189 | message_size = int(line_arr[2])
190 | self.message_table[id] = message_size
191 |
192 | # Read/store event
193 | elif line_arr[0] == "EVENT":
194 | id = int(line_arr[1])
195 | event_name = ""
196 | # rest of line is the event name
197 | for i in range(2, len(line_arr)):
198 | event_name = event_name + line_arr[i] + " "
199 | self.user_events[id] = event_name
200 |
201 | # Read/store user stat
202 | elif line_arr[0] == "STAT":
203 | id = int(line_arr[1])
204 | event_name = ""
205 | # rest of line is the stat
206 | for i in range(2, len(line_arr)):
207 | event_name = event_name + line_arr[i] + " "
208 | self.user_stats[id] = event_name
209 |
210 | # create papi array
211 | elif line_arr[0] == "TOTAL_PAPI_EVENTS":
212 | num_papi_events = int(line_arr[1])
213 | self.papi_event_names = [None] * num_papi_events
214 |
215 | # Unsure of what these are for
216 | elif line_arr[0] == "PAPI_EVENT":
217 | id = int(line_arr[1])
218 | papi_event = line_arr[2]
219 | self.papi_event_names[id] = papi_event
220 |
221 | self.sts_file.close()
222 |
223 |
224 | class ProjectionsReader:
225 | def __init__(
226 | self, projections_directory: str, num_processes=None, create_cct=False
227 | ) -> None:
228 | if not os.path.isdir(projections_directory):
229 | raise ValueError("Not a valid directory.")
230 |
231 | # iterate through files in the directory to find sts file
232 | directory_contents = os.listdir(projections_directory)
233 | for file in directory_contents:
234 | if file.endswith(".sts"):
235 | if hasattr(self, "executable_location"):
236 | raise ValueError(
237 | "Invalid directory for projections - multiple sts files found."
238 | )
239 | else:
240 | executable_name = file[0:-4]
241 | self.executable_location = os.path.join(
242 | projections_directory, executable_name
243 | )
244 |
245 | if not hasattr(self, "executable_location"):
246 | raise ValueError("Invalid directory for projections - no sts files found.")
247 |
248 | self.num_pes = STSReader(self.executable_location + ".sts").num_pes
249 |
250 | # make sure all the log files exist
251 | for i in range(self.num_pes):
252 | log_file = executable_name + "." + str(i) + ".log.gz"
253 | if log_file not in directory_contents:
254 | raise ValueError(
255 | (
256 | "Invalid directory for projections - the sts file states that"
257 | "there are "
258 | )
259 | + str(i)
260 | + " PEs, but log file "
261 | + log_file
262 | + " is missing."
263 | )
264 |
265 | num_cpus = mp.cpu_count()
266 | if num_processes is None or num_processes < 1 or num_processes > num_cpus:
267 | # uses all processes to parallelize reading by default
268 | self.num_processes = num_cpus
269 | else:
270 | self.num_processes = num_processes
271 |
272 | self.create_cct = create_cct
273 |
274 | # Returns an empty dict, used for reading log file into dataframe
275 | @staticmethod
276 | def _create_empty_dict() -> dict:
277 | return {
278 | "Name": [],
279 | "Event Type": [],
280 | "Timestamp (ns)": [],
281 | "Process": [],
282 | "Attributes": [],
283 | }
284 |
285 | def read(self):
286 | if self.num_pes < 1:
287 | return None
288 |
289 | if self.num_processes > self.num_pes:
290 | self.num_processes = self.num_pes
291 |
292 | pool_size, pool = self.num_processes, mp.Pool(self.num_processes)
293 |
294 | # Read each log file and store as list of dataframes
295 | dataframes_list = pool.map(
296 | self._read_log_file, [(rank, pool_size) for rank in range(pool_size)]
297 | )
298 |
299 | pool.close()
300 |
301 | # Concatenate the dataframes list into dataframe containing entire trace
302 | trace_df = pd.concat(dataframes_list, ignore_index=True)
303 | trace_df.sort_values(
304 | by="Timestamp (ns)", axis=0, ascending=True, inplace=True, ignore_index=True
305 | )
306 |
307 | # categorical for memory savings
308 | trace_df = trace_df.astype(
309 | {
310 | "Name": "category",
311 | "Event Type": "category",
312 | "Process": "category",
313 | }
314 | )
315 |
316 | # re-order columns
317 | trace_df = trace_df[
318 | ["Timestamp (ns)", "Event Type", "Name", "Process", "Attributes"]
319 | ]
320 |
321 | trace = pipit.trace.Trace(None, trace_df)
322 | if self.create_cct:
323 | trace.create_cct()
324 |
325 | return trace
326 |
327 | def _read_log_file(self, rank_size) -> pd.DataFrame:
328 | # has information needed in sts file
329 | sts_reader = STSReader(self.executable_location + ".sts")
330 |
331 | rank, size = rank_size[0], rank_size[1]
332 | per_process = int(self.num_pes // size)
333 | remainder = int(self.num_pes % size)
334 |
335 | if rank < remainder:
336 | begin_int = rank * (per_process + 1)
337 | end_int = (rank + 1) * (per_process + 1)
338 | else:
339 | begin_int = (rank * per_process) + remainder
340 | end_int = ((rank + 1) * per_process) + remainder
341 |
342 | dfs = []
343 | for pe_num in range(begin_int, end_int, 1):
344 | # create an empty dict to append to
345 | data = self._create_empty_dict()
346 |
347 | # opening the log file we need to read
348 | log_file = gzip.open(
349 | self.executable_location + "." + str(pe_num) + ".log.gz", "rt"
350 | )
351 |
352 | # Basing read on projections log reader and log entry viewer
353 | # Iterated through every line in the file and adds to dict
354 | for line in log_file:
355 | line_arr = line.split()
356 |
357 | if not line_arr[0].isnumeric():
358 | pass
359 |
360 | elif int(line_arr[0]) == ProjectionsConstants.BEGIN_IDLE:
361 | time = int(line_arr[1]) * 1000
362 | pe = int(line_arr[2])
363 |
364 | details = {"From PE": pe}
365 |
366 | _add_to_trace_dict(data, "Idle", "Enter", time, pe_num, details)
367 |
368 | elif int(line_arr[0]) == ProjectionsConstants.END_IDLE:
369 | time = int(line_arr[1]) * 1000
370 | pe = int(line_arr[2])
371 |
372 | details = {"From PE": pe}
373 |
374 | _add_to_trace_dict(data, "Idle", "Leave", time, pe_num, details)
375 |
376 | # Pack message to be sent
377 | elif int(line_arr[0]) == ProjectionsConstants.BEGIN_PACK:
378 | time = int(line_arr[1]) * 1000
379 | pe = int(line_arr[2])
380 |
381 | details = {"From PE": pe}
382 |
383 | _add_to_trace_dict(data, "Pack", "Enter", time, pe_num, details)
384 |
385 | elif int(line_arr[0]) == ProjectionsConstants.END_PACK:
386 | time = int(line_arr[1]) * 1000
387 | pe = int(line_arr[2])
388 |
389 | details = {"From PE": pe}
390 |
391 | _add_to_trace_dict(data, "Pack", "Leave", time, pe_num, details)
392 |
393 | # Unpacking a received message
394 | elif int(line_arr[0]) == ProjectionsConstants.BEGIN_UNPACK:
395 | time = int(line_arr[1]) * 1000
396 | pe = int(line_arr[2])
397 |
398 | details = {"From PE": pe}
399 |
400 | _add_to_trace_dict(data, "Unpack", "Enter", time, pe_num, details)
401 |
402 | elif int(line_arr[0]) == ProjectionsConstants.END_UNPACK:
403 | time = int(line_arr[1]) * 1000
404 | pe = int(line_arr[2])
405 |
406 | details = {"From PE": pe}
407 |
408 | _add_to_trace_dict(data, "Unpack", "Leave", time, pe_num, details)
409 |
410 | elif int(line_arr[0]) == ProjectionsConstants.USER_SUPPLIED:
411 | user_supplied = line_arr[1]
412 | details = {"User Supplied": user_supplied}
413 |
414 | _add_to_trace_dict(
415 | data, "User Supplied", "Instant", -1, pe_num, details
416 | )
417 |
418 | elif int(line_arr[0]) == ProjectionsConstants.USER_SUPPLIED_NOTE:
419 | time = line_arr[1] * 1000
420 | note = ""
421 | for i in range(2, len(line_arr)):
422 | note = note + line_arr[i] + " "
423 |
424 | details = {"Note": note}
425 |
426 | _add_to_trace_dict(
427 | data, "User Supplied Note", "Instant", time, pe_num, details
428 | )
429 |
430 | # Not sure if this should be instant or enter/leave
431 | elif (
432 | int(line_arr[0])
433 | == ProjectionsConstants.USER_SUPPLIED_BRACKETED_NOTE
434 | ):
435 | time = line_arr[1] * 1000
436 | end_time = line_arr[2] * 1000
437 | user_event_id = line_arr[3]
438 | note = ""
439 | for i in range(4, len(line_arr)):
440 | note = note + line_arr[i] + " "
441 | note = note + '"'
442 |
443 | details = {
444 | "Event ID": user_event_id,
445 | "Event Name": sts_reader.get_event_name(user_event_id),
446 | "Note": note,
447 | }
448 |
449 | _add_to_trace_dict(
450 | data,
451 | "User Supplied Bracketed Note",
452 | "Enter",
453 | time,
454 | pe_num,
455 | details,
456 | )
457 |
458 | _add_to_trace_dict(
459 | data,
460 | "User Supplied Bracketed Note",
461 | "Leave",
462 | end_time,
463 | pe_num,
464 | details,
465 | )
466 |
467 | # Memory Usage at timestamp
468 | elif int(line_arr[0]) == ProjectionsConstants.MEMORY_USAGE:
469 | memory_usage = int(line_arr[1])
470 | time = int(line_arr[2]) * 1000
471 |
472 | details = {"Memory Usage": memory_usage}
473 |
474 | _add_to_trace_dict(
475 | data, "Memory Usage", "Instant", time, pe_num, details
476 | )
477 |
478 | # New chare create message being sent
479 | elif int(line_arr[0]) == ProjectionsConstants.CREATION:
480 | mtype = int(line_arr[1])
481 | entry = int(line_arr[2])
482 | time = int(line_arr[3]) * 1000
483 | event = int(line_arr[4])
484 | pe = int(line_arr[5])
485 | msglen = int(line_arr[6])
486 | send_time = int(line_arr[7]) * 1000
487 |
488 | details = {
489 | "From PE": pe,
490 | "MType": mtype,
491 | "Entry Type": "Create",
492 | "Message Length": msglen,
493 | "Event ID": event,
494 | "Send Time": send_time,
495 | }
496 |
497 | _add_to_trace_dict(
498 | data,
499 | sts_reader.get_entry_name(entry),
500 | "Instant",
501 | time,
502 | pe_num,
503 | details,
504 | )
505 |
506 | elif int(line_arr[0]) == ProjectionsConstants.CREATION_MULTICAST:
507 | mtype = int(line_arr[1])
508 | entry = int(line_arr[2])
509 | time = int(line_arr[3]) * 1000
510 | event = int(line_arr[4])
511 | pe = int(line_arr[5])
512 | msglen = int(line_arr[6])
513 | send_time = int(line_arr[7]) * 1000
514 | num_procs = int(line_arr[8])
515 | dest_procs = []
516 | for i in (0, num_procs):
517 | dest_procs.append(int(line_arr[9 + i]))
518 |
519 | details = {
520 | "From PE": pe,
521 | "Message Type": mtype,
522 | "Entry Type": "Multicast",
523 | "Message Length": msglen,
524 | "Event ID": event,
525 | "Send Time": send_time,
526 | "Destinatopn PEs": dest_procs,
527 | }
528 |
529 | _add_to_trace_dict(
530 | data,
531 | sts_reader.get_entry_name(entry),
532 | "Instant",
533 | time,
534 | pe_num,
535 | "To " + str(num_procs) + "processors",
536 | )
537 |
538 | # Processing of chare (i.e. execution) ?
539 | elif int(line_arr[0]) == ProjectionsConstants.BEGIN_PROCESSING:
540 | mtype = int(line_arr[1])
541 | entry = int(line_arr[2])
542 | time = int(line_arr[3]) * 1000
543 | event = int(line_arr[4])
544 | pe = int(line_arr[5])
545 | msglen = int(line_arr[6])
546 | recv_time = int(line_arr[7])
547 | dimensions = sts_reader.get_dimension(entry)
548 | id = []
549 | for i in range(8, 8 + dimensions):
550 | id.append(int(line_arr[i]))
551 | cpu_start_time = int(line_arr[8 + dimensions])
552 |
553 | num_perf_counts = sts_reader.get_num_perf_counts()
554 | perf_counts = []
555 | for i in range(9 + dimensions, 9 + dimensions + num_perf_counts):
556 | perf_counts.append(int(line_arr[i]))
557 |
558 | details = {
559 | "From PE": pe,
560 | "Message Type": mtype,
561 | "Entry Type": "Processing",
562 | "Event ID": event,
563 | "Message Length": msglen,
564 | "Receive Time": recv_time,
565 | "ID List": id,
566 | "CPU Start Time": cpu_start_time,
567 | "perf counts list": perf_counts,
568 | }
569 |
570 | _add_to_trace_dict(
571 | data,
572 | sts_reader.get_entry_name(entry),
573 | "Enter",
574 | time,
575 | pe_num,
576 | details,
577 | )
578 |
579 | elif int(line_arr[0]) == ProjectionsConstants.END_PROCESSING:
580 | mtype = int(line_arr[1])
581 | entry = int(line_arr[2])
582 | time = int(line_arr[3]) * 1000
583 | event = int(line_arr[4])
584 | pe = int(line_arr[5])
585 | msglen = int(line_arr[6])
586 | cpu_end_time = int(line_arr[7])
587 | num_perf_counts = sts_reader.get_num_perf_counts()
588 | perf_counts = []
589 | for i in range(num_perf_counts):
590 | perf_counts.append(int(line_arr[8 + i]))
591 |
592 | details = {
593 | "From PE": pe,
594 | "Message Type": mtype,
595 | "Entry Name": "Processing",
596 | "Event ID": event,
597 | "Message Length": msglen,
598 | "CPU End Time": cpu_end_time,
599 | "perf counts list": perf_counts,
600 | }
601 |
602 | _add_to_trace_dict(
603 | data,
604 | sts_reader.get_entry_name(entry),
605 | "Leave",
606 | time,
607 | pe_num,
608 | None,
609 | )
610 |
611 | # For selective tracing - when trace is called inside code
612 | elif int(line_arr[0]) == ProjectionsConstants.BEGIN_TRACE:
613 | time = int(line_arr[1]) * 1000
614 |
615 | _add_to_trace_dict(data, "Trace", "Enter", time, pe_num, None)
616 |
617 | elif int(line_arr[0]) == ProjectionsConstants.END_TRACE:
618 | time = int(line_arr[1]) * 1000
619 |
620 | _add_to_trace_dict(data, "Trace", "Leave", time, pe_num, None)
621 |
622 | # Message Receive ?
623 | elif int(line_arr[0]) == ProjectionsConstants.MESSAGE_RECV:
624 | mtype = int(line_arr[1])
625 | time = int(line_arr[2]) * 1000
626 | event = int(line_arr[3])
627 | pe = int(line_arr[4])
628 | message_length = int(line_arr[5])
629 |
630 | details = {
631 | "From PE": pe,
632 | "Message Type": mtype,
633 | "Event ID": event,
634 | "Message Length": message_length,
635 | }
636 |
637 | _add_to_trace_dict(
638 | data, "Message Receive", "Instant", time, pe_num, details
639 | )
640 |
641 | # queueing creation ?
642 | elif int(line_arr[0]) == ProjectionsConstants.ENQUEUE:
643 | mtype = int(line_arr[1])
644 | time = int(line_arr[2]) * 1000
645 | event = int(line_arr[3])
646 | pe = int(line_arr[4])
647 |
648 | details = {"From PE": pe, "Message Type": mtype, "Event ID": event}
649 |
650 | _add_to_trace_dict(data, "Enque", "Instant", time, pe_num, details)
651 |
652 | elif int(line_arr[0]) == ProjectionsConstants.DEQUEUE:
653 | mtype = int(line_arr[1])
654 | time = int(line_arr[2]) * 1000
655 | event = int(line_arr[3])
656 | pe = int(line_arr[4])
657 |
658 | details = {"From PE": pe, "Message Type": mtype, "Event ID": event}
659 |
660 | _add_to_trace_dict(data, "Deque", "Instant", time, pe_num, details)
661 |
662 | # Interrupt from different chare ?
663 | elif int(line_arr[0]) == ProjectionsConstants.BEGIN_INTERRUPT:
664 | time = int(line_arr[1]) * 1000
665 | event = int(line_arr[2])
666 | pe = int(line_arr[3])
667 |
668 | details = {"From PE": pe, "Event ID": event}
669 |
670 | _add_to_trace_dict(
671 | data, "Interrupt", "Enter", time, pe_num, details
672 | )
673 |
674 | elif int(line_arr[0]) == ProjectionsConstants.END_INTERRUPT:
675 | time = int(line_arr[1]) * 1000
676 | event = int(line_arr[2])
677 | pe = int(line_arr[3])
678 |
679 | details = {"From PE": pe, "Event ID": event}
680 |
681 | _add_to_trace_dict(
682 | data, "Interrupt", "Leave", time, pe_num, details
683 | )
684 |
685 | # Very start of the program - encapsulates every other event
686 | elif int(line_arr[0]) == ProjectionsConstants.BEGIN_COMPUTATION:
687 | time = int(line_arr[1]) * 1000
688 |
689 | _add_to_trace_dict(data, "Computation", "Enter", time, pe_num, None)
690 |
691 | elif int(line_arr[0]) == ProjectionsConstants.END_COMPUTATION:
692 | time = int(line_arr[1]) * 1000
693 |
694 | _add_to_trace_dict(data, "Computation", "Leave", time, pe_num, None)
695 |
696 | # User event (in code)
697 | elif int(line_arr[0]) == ProjectionsConstants.USER_EVENT:
698 | user_event_id = int(line_arr[1])
699 | time = int(line_arr[2]) * 1000
700 | event = int(line_arr[3])
701 | pe = int(line_arr[4])
702 |
703 | user_event_name = sts_reader.get_user_event(user_event_id)
704 |
705 | details = {
706 | "From PE": pe,
707 | "Event ID": event,
708 | "Event Type": "User Event",
709 | }
710 |
711 | _add_to_trace_dict(
712 | data, user_event_name, "Instant", time, pe_num, details
713 | )
714 |
715 | elif int(line_arr[0]) == ProjectionsConstants.USER_EVENT_PAIR:
716 | user_event_id = int(line_arr[1])
717 | time = int(line_arr[2]) * 1000
718 | event = int(line_arr[3])
719 | pe = int(line_arr[4])
720 | nested_id = int(line_arr[5])
721 |
722 | user_event_name = sts_reader.get_user_event(user_event_id)
723 |
724 | details = {
725 | "From PE": pe,
726 | "Event ID": event,
727 | "Nested ID": nested_id,
728 | "Event Type": "User Event Pair",
729 | }
730 |
731 | _add_to_trace_dict(
732 | data, user_event_name, "Instant", time, pe_num, details
733 | )
734 |
735 | elif int(line_arr[0]) == ProjectionsConstants.BEGIN_USER_EVENT_PAIR:
736 | user_event_id = int(line_arr[1])
737 | time = int(line_arr[2]) * 1000
738 | event = int(line_arr[3])
739 | pe = int(line_arr[4])
740 | nested_id = int(line_arr[5])
741 |
742 | details = {
743 | "From PE": pe,
744 | "Event ID": event,
745 | "Nested ID": nested_id,
746 | "User Event Name": sts_reader.get_user_event(user_event_id),
747 | }
748 |
749 | _add_to_trace_dict(
750 | data, "User Event Pair", "Enter", time, pe_num, details
751 | )
752 |
753 | elif int(line_arr[0]) == ProjectionsConstants.END_USER_EVENT_PAIR:
754 | user_event_id = int(line_arr[1])
755 | time = int(line_arr[2]) * 1000
756 | event = int(line_arr[3])
757 | pe = int(line_arr[4])
758 | nested_id = int(line_arr[5])
759 |
760 | details = {
761 | "From PE": pe,
762 | "Event ID": event,
763 | "Nested ID": nested_id,
764 | "User Event Name": sts_reader.get_user_event(user_event_id),
765 | }
766 |
767 | _add_to_trace_dict(
768 | "User Event Pair", "Leave", time, pe_num, details
769 | )
770 |
771 | # User stat (in code)
772 | elif int(line_arr[0]) == ProjectionsConstants.USER_STAT:
773 | time = int(line_arr[1]) * 1000
774 | user_time = int(line_arr[2]) * 1000
775 | stat = float(line_arr[3])
776 | pe = int(line_arr[4])
777 | user_event_id = int(line_arr[5])
778 |
779 | user_stat_name = sts_reader.get_user_stat(user_event_id)
780 |
781 | details = {
782 | "From PE": pe,
783 | "User Time": user_time,
784 | "Stat": stat,
785 | "Event Type": "User Stat",
786 | }
787 |
788 | _add_to_trace_dict(
789 | data, user_stat_name, "Instant", time, pe_num, details
790 | )
791 |
792 | # Making sure that the log file ends with END_COMPUTATION
793 | if len(data["Name"]) > 0 and data["Name"][-1] != "Computation":
794 | time = data["Timestamp (ns)"][-1] * 1000
795 | _add_to_trace_dict(data, "Computation", "Leave", time, pe_num, None)
796 |
797 | log_file.close()
798 | dfs.append(pd.DataFrame(data))
799 |
800 | return pd.concat(dfs)
801 |
802 |
803 | def _add_to_trace_dict(data, name, evt_type, time, process, attributes):
804 | data["Name"].append(name)
805 | data["Event Type"].append(evt_type)
806 | data["Timestamp (ns)"].append(time)
807 | data["Process"].append(process)
808 | data["Attributes"].append(attributes)
809 |
--------------------------------------------------------------------------------
/pipit/tests/config.py:
--------------------------------------------------------------------------------
1 | # Copyright 2023 Parallel Software and Systems Group, University of Maryland.
2 | # See the top-level LICENSE file for details.
3 | #
4 | # SPDX-License-Identifier: MIT
5 |
6 |
7 | import pipit as pp
8 |
9 |
10 | def test_get_option():
11 | # assert that default values are returned
12 | assert pp.get_option("log_level") == "INFO"
13 | assert pp.get_option("notebook_url") == "http://localhost:8888"
14 |
15 | # assert that invalid key raises ValueError
16 | try:
17 | pp.get_option("invalid_key")
18 | except ValueError:
19 | pass
20 | else:
21 | assert False
22 |
23 |
24 | def test_set_option():
25 | # assert that valid values are set
26 | pp.set_option("log_level", "DEBUG")
27 | assert pp.get_option("log_level") == "DEBUG"
28 |
29 | pp.set_option("notebook_url", "http://127.0.0.1:8080")
30 | assert pp.get_option("notebook_url") == "http://127.0.0.1:8080"
31 |
32 | # assert that invalid key raises ValueError
33 | try:
34 | pp.set_option("invalid_key", "invalid_value")
35 | except ValueError:
36 | pass
37 | else:
38 | assert False
39 |
40 | # assert that invalid value raises ValueError
41 | try:
42 | pp.set_option("log_level", "invalid_value")
43 | except ValueError:
44 | pass
45 | else:
46 | assert False
47 |
48 | try:
49 | pp.set_option("notebook_url", "invalid_value")
50 | except ValueError:
51 | pass
52 | else:
53 | assert False
54 |
--------------------------------------------------------------------------------
/pipit/tests/conftest.py:
--------------------------------------------------------------------------------
1 | # Copyright 2022-2023 Parallel Software and Systems Group, University of
2 | # Maryland. See the top-level LICENSE file for details.
3 | #
4 | # SPDX-License-Identifier: MIT
5 |
6 | import os
7 | import shutil
8 | from glob import glob
9 |
10 | import pytest
11 |
12 |
13 | @pytest.fixture
14 | def data_dir():
15 | """Return path to the top-level data directory for tests."""
16 | parent = os.path.dirname(__file__)
17 | return os.path.join(parent, "data")
18 |
19 |
20 | @pytest.fixture
21 | def ping_pong_hpct_trace(data_dir, tmpdir):
22 | """Builds a temporary directory containing the ping-pong traces."""
23 | hpct_db_dir = os.path.join(data_dir, "ping-pong-hpctoolkit")
24 |
25 | for f in glob(os.path.join(str(hpct_db_dir), "*.db")):
26 | shutil.copy(f, str(tmpdir))
27 |
28 | return tmpdir
29 |
30 |
31 | @pytest.fixture
32 | def ping_pong_projections_trace(data_dir, tmpdir):
33 | """Builds a temporary directory containing the ping-pong traces."""
34 | projections_dir = os.path.join(data_dir, "ping-pong-projections")
35 |
36 | shutil.copy(os.path.join(projections_dir, "pingpong.prj.sts"), str(tmpdir))
37 | shutil.copy(os.path.join(projections_dir, "pingpong.prj.0.log.gz"), str(tmpdir))
38 | shutil.copy(os.path.join(projections_dir, "pingpong.prj.1.log.gz"), str(tmpdir))
39 |
40 | return tmpdir
41 |
42 |
43 | @pytest.fixture
44 | def ping_pong_otf2_trace(data_dir, tmpdir):
45 | """Builds a temporary directory containing the ping-pong traces."""
46 | otf2_dir = os.path.join(data_dir, "ping-pong-otf2")
47 |
48 | shutil.copytree(os.path.join(str(otf2_dir), "traces"), str(tmpdir) + "/traces")
49 | shutil.copy(os.path.join(str(otf2_dir), "scorep.cfg"), str(tmpdir))
50 | shutil.copy(os.path.join(str(otf2_dir), "traces.def"), str(tmpdir))
51 | shutil.copy(os.path.join(str(otf2_dir), "traces.otf2"), str(tmpdir))
52 |
53 | return tmpdir
54 |
55 |
56 | @pytest.fixture
57 | def ping_pong_otf2_papi_trace(data_dir, tmpdir):
58 | """Builds a temporary directory containing the ping-pong traces."""
59 | otf2_dir = os.path.join(data_dir, "ping-pong-otf2-papi")
60 |
61 | shutil.copytree(os.path.join(str(otf2_dir), "traces"), str(tmpdir) + "/traces")
62 | shutil.copy(os.path.join(str(otf2_dir), "scorep.cfg"), str(tmpdir))
63 | shutil.copy(os.path.join(str(otf2_dir), "traces.def"), str(tmpdir))
64 | shutil.copy(os.path.join(str(otf2_dir), "traces.otf2"), str(tmpdir))
65 |
66 | return tmpdir
67 |
--------------------------------------------------------------------------------
/pipit/tests/data/foo-bar.csv:
--------------------------------------------------------------------------------
1 | Timestamp (s), Event Type, Name, Process
2 | 0, Enter, main(), 0
3 | 1, Enter, foo(), 0
4 | 3, Enter, MPI_Send, 0
5 | 5, Leave, MPI_Send, 0
6 | 8, Enter, baz(), 0
7 | 18, Leave, baz(), 0
8 | 25, Leave, foo(), 0
9 | 100, Leave, main(), 0
10 | 0, Enter, main(), 1
11 | 1, Enter, bar(), 1
12 | 2, Enter, Idle, 1
13 | 10, Leave, Idle, 1
14 | 10, Enter, MPI_Recv, 1
15 | 14, Leave, MPI_Recv, 1
16 | 39, Leave, bar(), 1
17 | 39, Enter, Idle, 1
18 | 57, Leave, Idle, 1
19 | 57, Enter, grault(), 1
20 | 77, Leave, grault(), 1
21 | 100, Leave, main(), 1
22 |
--------------------------------------------------------------------------------
/pipit/tests/data/ping-pong-hpctoolkit/cct.db:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hpcgroup/pipit/97beb979a126819de6fee1bd221647f4b9e2e6c7/pipit/tests/data/ping-pong-hpctoolkit/cct.db
--------------------------------------------------------------------------------
/pipit/tests/data/ping-pong-hpctoolkit/meta.db:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hpcgroup/pipit/97beb979a126819de6fee1bd221647f4b9e2e6c7/pipit/tests/data/ping-pong-hpctoolkit/meta.db
--------------------------------------------------------------------------------
/pipit/tests/data/ping-pong-hpctoolkit/metrics/METRICS.yaml.ex:
--------------------------------------------------------------------------------
1 | %YAML 1.2
2 | ---
3 | # Specification and example document for metric taxonomies.
4 |
5 | # Each HPCToolkit database provides post-processed performance data for every
6 | # calling context, application thread and performance metric. Performance
7 | # metrics are generally very specific and the impact on the application
8 | # performance is not always clear (eg. is 98% of the GPU L2 misses on a single
9 | # line a problem?).
10 |
11 | # Files of this format provide a full "taxonomy" of metrics, structured to aid
12 | # manual performance analysis. Very general metrics (eg. time) are presented
13 | # first to give a sense for *where* significant performance issues are, which
14 | # can be expanded to present increasingly specific metrics to determine the
15 | # *why* and *how*. In other words, the majority of an HPCToolkit database
16 | # (see FORMATS.md) provides raw performance metrics, while METRICS.yaml files
17 | # provide the interpretation.
18 |
19 | # This format is primarily intended to be read by the GUI application of
20 | # HPCToolkit, HPCViewer. A number of keys in this file only make sense in this
21 | # context, for instance options on how to present the final metric values.
22 |
23 | # NOTE: !!-type specifiers when used below indicate the type(s) allowed for
24 | # the various keys. They are not required and match up with the default type
25 | # as interpreted by most general YAML parsers.
26 |
27 | # Version of the METRICS.yaml format required by this file. Can be used by
28 | # readers to error gracefully without reading the entire file. If omitted
29 | # version checks are disabled.
30 | version: !!int 0
31 |
32 | # Set of all performance metrics used by this taxonomy. These correspond to
33 | # performance metrics listed in the meta.db file.
34 | # Anchors are used to refer to these metrics later in the file.
35 | inputs: !!seq
36 | - &in-cycles-E
37 | # Canonical name for the performance metric.
38 | # See Performance Metric Specification in FORMATS.md for details.
39 | metric: !!str perf::cycles
40 | # Name of the propagation scope for the value referenced.
41 | # See Performance Metric Specification in FORMATS.md for details.
42 | scope: !!str function
43 | # Unary function used to generate summary statistic values, see Performance
44 | # Metric Specification in FORMATS.md for details.
45 | # This is a formula in the same format as the variants:formula:* keys in
46 | # in the metric description below, with the following differences:
47 | # - The formula must consist of a single !!str, not a !!seq or other
48 | # formula structure ("$$" is used as the variable), and
49 | # - The formula is canonicalized: whitespace and extraneous paratheticals
50 | # should be removed to achieve a match.
51 | # Defaults to '$$'.
52 | formula: !!str $$
53 | # Combination function use to generate summary statistic values, see
54 | # Performance Metric Specification in FORMATS.md for details.
55 | # One of 'sum', 'min' or 'max'. Defaults to 'sum'.
56 | combine: !!str sum
57 | # Merge keys can be used to lower the repetition of common fields:
58 | - &in-cycles-I
59 | <<: *in-cycles-E
60 | scope: execution
61 | - &in-cycles-E-cnt
62 | <<: *in-cycles-E
63 | formula: 1
64 | - &in-cycles-I-cnt
65 | <<: *in-cycles-I
66 | formula: 1
67 |
68 | - &in-l1-miss-E
69 | metric: perf::l1-cache-miss
70 | scope: function
71 | - &in-l1-miss-I
72 | <<: *in-l1-miss-E
73 | scope: execution
74 | - &in-l1-miss-E-cnt
75 | <<: *in-l1-miss-E
76 | formula: 1
77 | - &in-l1-miss-I-cnt
78 | <<: *in-l1-miss-I
79 | formula: 1
80 |
81 | - &in-l2-miss-E
82 | metric: perf::l2-cache-miss
83 | scope: function
84 | - &in-l2-miss-I
85 | <<: *in-l2-miss-E
86 | scope: execution
87 | - &in-l2-miss-E-cnt
88 | <<: *in-l2-miss-E
89 | formula: 1
90 | - &in-l2-miss-I-cnt
91 | <<: *in-l2-miss-I
92 | formula: 1
93 |
94 | - &in-l3-miss-E
95 | metric: perf::l3-cache-miss
96 | scope: function
97 | - &in-l3-miss-I
98 | <<: *in-l3-miss-E
99 | scope: execution
100 | - &in-l3-miss-E-cnt
101 | <<: *in-l3-miss-E
102 | formula: 1
103 | - &in-l3-miss-I-cnt
104 | <<: *in-l3-miss-I
105 | formula: 1
106 |
107 | # Sequence of root metrics provided in this taxonomy. Every metric listed in the
108 | # taxonomy is a descendant of one of these.
109 | roots:
110 | - # Name for the metric.
111 | name: !!str CPU Cycles
112 | # Longer description of the metric, written in Markdown.
113 | # Defaults to the `short description:` if given.
114 | description: >
115 | Cycles spent:
116 | - In the CPU doing actual work (FLOPs), or
117 | - Waiting for outside operations to complete (memory stalls).
118 | # Short description of the metric, used for cases where a long description
119 | # would not be suitable.
120 | # Defaults to `description:` up to the first period or newline.
121 | short description: !!str Cycles spent in the CPU.
122 |
123 | # Whether this metric should be visible in the Viewer by default, default
124 | # true. If false, the Viewer may require that the metric be enabled in the
125 | # metric list before it will be presented.
126 | visible by default: true
127 |
128 | # How the values in the metrics rooted here will be presented in the Viewer
129 | # by default. One of:
130 | # - 'column': Columns of data that can be expanded to show inner metrics.
131 | # Defaults to 'column'. Only allowed on root metrics.
132 | presentation: !!str column
133 |
134 | # Sequence of child metrics, format is the same as a root metric.
135 | # If omitted there are no child metrics.
136 | children: !!seq
137 | - name: L2 Bound
138 | description: Rough cycles spent accessing the L2 cache
139 |
140 | # List of formula variations for this taxonomic metric. Metric values are
141 | # always attributed to an application thread, however for large executions
142 | # this gives too much data to present clearly. Instead, the Viewer
143 | # presents on "summary" values by applying statistics across threads.
144 | # The `inputs:` key above lists the "partial" results required for
145 | # calculating statistics, this key lists the final formulas to generate
146 | # presentable values.
147 | #
148 | # Keys in this map are the human-readable names of the variants.
149 | variants: !!map
150 | !!str Sum:
151 | # How the final value(s) for this metric variant should be rendered.
152 | # Orderless set of elements to be rendered in the metric cell, the
153 | # following options are available:
154 | # - 'number': Numerical rendering (see `format:`).
155 | # - 'percentage': Percentage of the global inclusive value. Only
156 | # allowed if `formula:inclusive:` is given.
157 | # - 'hidden': Mark as hiding (some) inner values (`*`).
158 | # - 'colorbar': Color bar visually indicating the relative sizes of
159 | # values in child metrics. An additional "grey" color is added to
160 | # the bar to indicate the difference between sum-of-children and
161 | # this metric variant's value. (Note that this difference will be
162 | # exactly 0 if `formula:` is 'sum'.)
163 | # The Viewer will order the elements reasonably, and may elide
164 | # elements if screen real estate is tight.
165 | render: !!seq [number, percent] # eg: 1.23e+04 56.7%
166 | # Can also be given as a !!str for a single element:
167 | render: !!str 'number' # eg: 1.23e+04
168 |
169 | # Printf-like format to use when rendering the metric value(s) as a
170 | # number (`render: number`). The input to "printf" is a single double
171 | # value. Defaults to '%.2e'.
172 | #
173 | # In more detail, this string must be of the form:
174 | # [prefix]%(#0- +')*[field width][.precision](eEfFgGaA)[suffix]
175 | # Where "prefix" and "suffix" use %% to generate a literal %.
176 | format: !!str '%.2e'
177 |
178 | # Which variant child metric values are gotten from. Also used as the
179 | # default variant when first expanding this metric variant. Explicitly
180 | # lists the variant to use for each child metric in order.
181 | child variant: !!seq
182 | - Sum # Use Sum value(s) from first child
183 | - Mean # Use Mean value(s) from second child
184 | # Or can also be given as a !!str if the variant is the same.
185 | child variant: !!str Sum # Use Sum value(s) from all children
186 | # Defaults to the name of this variant.
187 |
188 | # Formula(s) for calculating the final value(s) for this metric
189 | # variant. Ignored unless `render:` contains a numerical element
190 | # (ie. everything except 'hidden'). Can be one of:
191 | # - 'first': Value(s) for this variant are copied from the value(s)
192 | # of the first child. Invalid if `render:` contains 'colorbar'.
193 | # - 'sum': Value(s) are generated by summing child value(s).
194 | # In all cases value(s) are generated vector-wise (ie. inclusive
195 | # values come from inclusive child values, exclusive from exclusive,
196 | # etc.), and null child values generate null values in the parent
197 | # (ie. they aren't replaced with 0).
198 | formula: !!str first
199 | # Can also be written as a !!map listing the vector of formulas.
200 | formula: !!map
201 | # The following keys define the formulas used to generate metrics.
202 | # Formulas are roughly written as a C-like math expression, except:
203 | # - "Variables" are references to other nodes, which can be other
204 | # formulas (sub-expressions) or an entry in the global `inputs:`.
205 | # Eg: `*in-cycles-E` is an input metric value.
206 | # - Parentheses are represented with a YAML !!seq ([...]), breaks
207 | # between elements (,) are considered whitespace.
208 | # Eg: `2 * (3 + 4)` -> `[2 *,[3,+,4]]`
209 | # - Number constants and infix operators can be represented by
210 | # !!int, !!float and !!str YAML elements (as appropriate), and
211 | # need not be separated by an element break (whitespace suffices).
212 | # Eg: `[2 *,[3,+,4]]` == `[2,*,[3+4]]`
213 | # The following operators are available in increasing precedence:
214 | # + - # Addition and subtraction
215 | # * / # Multiplication and (true) division
216 | # ^ # Exponentiation
217 | # - Function calls are represented by a YAML !!map with a single
218 | # pair. The key is the function name and the value is a !!seq
219 | # listing the arguments.
220 | # Eg: `foo(1, 2, 3)` -> `[foo:[1,2,3]]`,
221 | # and `foo(1+x)` -> `[foo:[ [1+,*x] ]]`
222 | # The following functions are available:
223 | # sum:[...] # Sum of arguments
224 | # prod:[...] # Product of arguments
225 | # pow:[a, b] # a raised to the b
226 | # sqrt:[a] # Square root of a (pow(a, .5))
227 | # log:[a, b] # Logarithm of a base-b
228 | # log:[a] # Natural logarithm of a
229 | # min:[...] # Smallest of arguments
230 | # max:[...] # Largest of arguments
231 | # floor:[a] # Largest integer less than or equal to a
232 | # strict floor:[a] # Largest integer less than a
233 | # ceil:[a] # Smallest integer greater than or equal to a
234 | # strict ceil:[a] # Smallest integer greater than a
235 |
236 | # Formulas to generate "inclusive" cost values. Defaults to null.
237 | inclusive:
238 | # Custom formula used when no special properties are required of
239 | # the formulation. Defaults to the value of `standard:`.
240 | custom: [4*,[*in-l1-miss-I,-,*in-l2-miss-I]]
241 |
242 | # Version of the formula based completely on well-defined metric
243 | # inputs, which refer only to non-custom propagation scopes. Used
244 | # in the bottom-up and flat views, where this property is required
245 | # for accurate analysis. Defaults to null.
246 | # See the meta.db Performance Metrics section for details.
247 | standard: [4*,[*in-l1-miss-I,-,*in-l2-miss-I]]
248 |
249 | # Formulas to generate "exclusive" cost values. Defaults to null.
250 | exclusive:
251 | standard: [4*,[*in-l1-miss-E,-,*in-l2-miss-E]]
252 |
253 | # Another example variant for "L2 Bound"
254 | Mean:
255 | render: [number, percent]
256 | formula:
257 | inclusive: [4*,[*in-l1-miss-I,/,*in-l1-miss-I-cnt, -,*in-l2-miss-I,/,*in-l2-miss-I-cnt]]
258 | exclusive: [4*,[*in-l1-miss-E,/,*in-l1-miss-E-cnt, -,*in-l2-miss-E,/,*in-l2-miss-E-cnt]]
259 |
260 | # Sibling metric, still under "CPU Cycles"
261 | - name: L3 Bound
262 | description: Rough cycles spent accessing L3 cache
263 | variants:
264 | Sum:
265 | render: number
266 | formula:
267 | inclusive: [64*,[*in-l2-miss-I, -,*in-l3-miss-I]]
268 | exclusive: [64*,[*in-l2-miss-E, -,*in-l3-miss-E]]
269 | Mean:
270 | render: [number, percent]
271 | formula:
272 | inclusive: [64*,[*in-l2-miss-I,/,*in-l2-miss-I-cnt, -,*in-l3-miss-I,/,*in-l3-miss-I-cnt]]
273 | exclusive: [64*,[*in-l2-miss-E,/,*in-l2-miss-E-cnt, -,*in-l3-miss-E,/,*in-l3-miss-E-cnt]]
274 |
275 | # Parameters for the root "CPU Cycles" metric
276 | variants:
277 | Sum:
278 | render: number
279 | formula:
280 | inclusive: *in-cycles-I
281 | exclusive: *in-cycles-E
282 | Mean:
283 | render: [number, colorbar]
284 | formula:
285 | inclusive: [*in-cycles-I,/,*in-cycles-I-cnt]
286 | exclusive: [*in-cycles-E,/,*in-cycles-E-cnt]
287 |
--------------------------------------------------------------------------------
/pipit/tests/data/ping-pong-hpctoolkit/metrics/default.yaml:
--------------------------------------------------------------------------------
1 | version: 0
2 | inputs:
3 | - &CPUTIMEx20_x28_secx29_-sum-x5b_0x0x5d_-execution
4 | metric: CPUTIME (sec)
5 | scope: execution
6 | formula: $$
7 | combine: sum
8 | - &CPUTIMEx20_x28_secx29_-sum-x5b_0x0x5d_-lex_aware
9 | metric: CPUTIME (sec)
10 | scope: lex_aware
11 | formula: $$
12 | combine: sum
13 | - &CPUTIMEx20_x28_secx29_-sum-x5b_0x0x5d_-function
14 | metric: CPUTIME (sec)
15 | scope: function
16 | formula: $$
17 | combine: sum
18 | roots:
19 | - name: CPUTIME (sec)
20 | description: CPUTIME (sec)
21 | variants:
22 | Sum:
23 | render: [number, percent]
24 | formula:
25 | inclusive:
26 | standard: *CPUTIMEx20_x28_secx29_-sum-x5b_0x0x5d_-execution
27 | exclusive:
28 | custom: *CPUTIMEx20_x28_secx29_-sum-x5b_0x0x5d_-lex_aware
29 | standard: *CPUTIMEx20_x28_secx29_-sum-x5b_0x0x5d_-function
--------------------------------------------------------------------------------
/pipit/tests/data/ping-pong-hpctoolkit/profile.db:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hpcgroup/pipit/97beb979a126819de6fee1bd221647f4b9e2e6c7/pipit/tests/data/ping-pong-hpctoolkit/profile.db
--------------------------------------------------------------------------------
/pipit/tests/data/ping-pong-hpctoolkit/src/ping-pong.c:
--------------------------------------------------------------------------------
1 | #include
2 | #include
3 | #include
4 |
5 | int main(int argc, char *argv[])
6 | {
7 | /* -------------------------------------------------------------------------------------------
8 | MPI Initialization
9 | --------------------------------------------------------------------------------------------*/
10 | MPI_Init(&argc, &argv);
11 |
12 | int size;
13 | MPI_Comm_size(MPI_COMM_WORLD, &size);
14 |
15 | int rank;
16 | MPI_Comm_rank(MPI_COMM_WORLD, &rank);
17 |
18 | MPI_Status stat;
19 |
20 | if(size != 2){
21 | if(rank == 0){
22 | printf("This program requires exactly 2 MPI ranks, but you are attempting to use %d! Exiting...\n", size);
23 | }
24 | MPI_Finalize();
25 | exit(0);
26 | }
27 |
28 | /* -------------------------------------------------------------------------------------------
29 | Loop from 8 B to 1 GB
30 | --------------------------------------------------------------------------------------------*/
31 |
32 | for(int i=11; i<=18; i++){
33 |
34 | long int N = 1 << i;
35 |
36 | // Allocate memory for A on CPU
37 | double *A = (double*)malloc(N*sizeof(double));
38 |
39 | // Initialize all elements of A to 0.0
40 | for(int i=0; i 0x24680 [libpsm2.so.2.2]",
58 | "MPID_Finalize [libmpi.so.12.1.1]",
59 | "MPID_Recv [libmpi.so.12.1.1]",
60 | "MPI_Finalize",
61 | "PMPI_Finalize [libmpi.so.12.1.1]",
62 | "PMPI_Recv [libmpi.so.12.1.1]",
63 | "PMPI_Send [libmpi.so.12.1.1]",
64 | "__GI___munmap [libc-2.17.so]",
65 | "__GI___unlink [libc-2.17.so]",
66 | "__GI_process_vm_readv [libc-2.17.so]",
67 | "loop",
68 | "main",
69 | "main thread",
70 | "psm2_ep_close [libpsm2.so.2.2]",
71 | "psm2_mq_ipeek2 [libpsm2.so.2.2]",
72 | "psm2_mq_irecv2 [libpsm2.so.2.2]",
73 | "psm_dofinalize [libmpi.so.12.1.1]",
74 | "psm_progress_wait [libmpi.so.12.1.1]",
75 | "psm_recv [libmpi.so.12.1.1]",
76 | "psm_try_complete [libmpi.so.12.1.1]",
77 | "shm_unlink [librt-2.17.so]",
78 | "targ5030 [libpsm2.so.2.2]",
79 | }
80 |
81 | # Test correct number of MPI Send/Recv events
82 | mpi_send_df = events_df.loc[events_df["Name"].str.contains("PMPI_Send")].loc[
83 | events_df["Event Type"] == "Enter"
84 | ]
85 | mpi_recv_df = events_df.loc[events_df["Name"].str.contains("PMPI_Recv")].loc[
86 | events_df["Event Type"] == "Enter"
87 | ]
88 |
89 | # Process 0 has 6 MPI Sends and 5 MPI Recvs
90 | assert len(mpi_send_df.loc[events_df["Process"] == 0]) == 7
91 | assert len(mpi_recv_df.loc[events_df["Process"] == 0]) == 7
92 |
93 | # Process 1 has 5 MPI Sends and 5 MPI Recvs
94 | assert len(mpi_send_df.loc[events_df["Process"] == 1]) == 7
95 | assert len(mpi_recv_df.loc[events_df["Process"] == 1]) == 7
96 |
97 | # Timestamps should be sorted in increasing order
98 | assert (np.diff(events_df["Timestamp (ns)"]) >= 0).all()
99 |
--------------------------------------------------------------------------------
/pipit/tests/otf2-tests.py:
--------------------------------------------------------------------------------
1 | # Copyright 2022-2023 Parallel Software and Systems Group, University of
2 | # Maryland. See the top-level LICENSE file for details.
3 | #
4 | # SPDX-License-Identifier: MIT
5 |
6 | import numpy as np
7 | from pipit import Trace
8 |
9 |
10 | def test_events(data_dir, ping_pong_otf2_trace):
11 | trace = Trace.from_otf2(str(ping_pong_otf2_trace))
12 | events_df = trace.events
13 |
14 | # 120 total events in ping pong trace
15 | assert len(events_df) == 120
16 |
17 | # event types for trace (instant events are program begin/end and mpi send/recv)
18 | assert set(events_df["Event Type"]) == set(["Enter", "Instant", "Leave"])
19 |
20 | # all event names in the trace
21 | assert set(events_df["Name"]) == set(
22 | [
23 | "ProgramBegin",
24 | "ProgramEnd",
25 | "MPI_Send",
26 | "MPI_Recv",
27 | "MpiSend",
28 | "MpiRecv",
29 | "MPI_Init",
30 | "MPI_Finalize",
31 | "MPI_Comm_rank",
32 | "MPI_Comm_size",
33 | "int main(int, char**)",
34 | ]
35 | )
36 |
37 | # 8 sends per rank, so 16 sends total -> 32 including both enter and leave rows
38 | assert len(events_df.loc[events_df["Name"] == "MPI_Send"]) == 32
39 |
40 | assert len(set(events_df["Process"])) == 2 # 2 ranks for ping pong trace
41 |
42 | assert len(set(events_df["Thread"])) == 1 # 1 thread per rank
43 |
44 | assert len(events_df.loc[events_df["Process"] == 0]) == 60 # 60 events per rank
45 |
46 | assert (
47 | len(events_df.loc[events_df["Thread"] == 0]) == 120
48 | ) # all 120 events associated with the main thread
49 |
50 | # timestamps should be sorted in increasing order
51 | assert (np.diff(events_df["Timestamp (ns)"]) > 0).all()
52 |
53 |
54 | def test_definitions(data_dir, ping_pong_otf2_trace):
55 | trace = Trace.from_otf2(str(ping_pong_otf2_trace))
56 | definitions_df = trace.definitions
57 |
58 | assert len(definitions_df) == 533
59 |
60 | # 17 unique definition types in trace
61 | assert len(set(definitions_df["Definition Type"])) == 17
62 |
63 | # 2 ranks, so 2 location definitions in the trace
64 | assert len(definitions_df.loc[definitions_df["Definition Type"] == "Location"]) == 2
65 |
66 | # communicator should evidently be present in the ping pong trace definitions
67 | assert "Comm" in set(definitions_df["Definition Type"])
68 |
--------------------------------------------------------------------------------
/pipit/tests/projections-tests.py:
--------------------------------------------------------------------------------
1 | # Copyright 2022-2023 Parallel Software and Systems Group, University of
2 | # Maryland. See the top-level LICENSE file for details.
3 | #
4 | # SPDX-License-Identifier: MIT
5 |
6 | from pipit import Trace
7 |
8 |
9 | def test_events(data_dir, ping_pong_projections_trace):
10 | trace = Trace.from_projections(str(ping_pong_projections_trace))
11 | events_df = trace.events
12 |
13 | # The projections trace has 2 PEs
14 | assert set(events_df["Process"]) == {0, 1}
15 |
16 | # event types for trace
17 | assert set(events_df["Event Type"]) == {"Enter", "Instant", "Leave"}
18 |
19 | # all event names in the trace
20 | assert set(events_df["Name"]) == {
21 | "Computation",
22 | "Idle",
23 | "Pack",
24 | "Ping1()",
25 | "Ping2()",
26 | "Ping3()",
27 | "PingC()",
28 | "PingC(IdMsg* impl_msg)",
29 | "PingF()",
30 | "PingMarshall()",
31 | "Unpack",
32 | "dummy_thread_ep",
33 | "exchange(IdMsg* impl_msg)",
34 | "maindone()",
35 | "recv(PingMsg* impl_msg)",
36 | "recvHandle(const CkNcpyBuffer &destInfo)",
37 | "recv_zerocopy(CkNcpyBuffer ncpyBuffer_msg, int size)",
38 | "remoteDoneInserting()",
39 | "start(const bool &reportTime)",
40 | "traceProjectionsParallelShutdown(int impl_noname_8)",
41 | "trecv(PingMsg* impl_msg)",
42 | "updateLocation(const CkArrayIndex &idx, const CkLocEntry &e)",
43 | }
44 |
45 | # PE 1 has 68 create events (which are the only instant events)
46 | assert (
47 | len(
48 | events_df.loc[events_df["Process"] == 1].loc[
49 | events_df["Event Type"] == "Instant"
50 | ]
51 | )
52 | == 68
53 | )
54 | # PE 0 has 77 create events (which are the only instant events)
55 | assert (
56 | len(
57 | events_df.loc[events_df["Process"] == 0].loc[
58 | events_df["Event Type"] == "Instant"
59 | ]
60 | )
61 | == 77
62 | )
63 |
64 | # PE0 has 161 Begin Processing Events
65 | len(
66 | events_df.loc[events_df["Process"] == 0]
67 | .loc[events_df["Event Type"] == "Enter"]
68 | .loc[events_df["Name"] == "Processing"]
69 | ) == 161
70 | # PE0 has 146 Begin Processing Events
71 | len(
72 | events_df.loc[events_df["Process"] == 1]
73 | .loc[events_df["Event Type"] == "Enter"]
74 | .loc[events_df["Name"] == "Processing"]
75 | ) == 146
76 |
77 | # Each log file starts/ends with a Computation Event
78 | assert events_df.loc[events_df["Process"] == 1].iloc[0]["Name"] == "Computation"
79 | assert events_df.loc[events_df["Process"] == 1].iloc[-1]["Name"] == "Computation"
80 |
81 | assert events_df.loc[events_df["Process"] == 0].iloc[0]["Name"] == "Computation"
82 | assert events_df.loc[events_df["Process"] == 0].iloc[-1]["Name"] == "Computation"
83 |
--------------------------------------------------------------------------------
/pipit/tests/trace.py:
--------------------------------------------------------------------------------
1 | # Copyright 2022-2023 Parallel Software and Systems Group, University of
2 | # Maryland. See the top-level LICENSE file for details.
3 | #
4 | # SPDX-License-Identifier: MIT
5 |
6 | import numpy as np
7 | from pipit import Trace
8 |
9 | from numpy.testing import assert_allclose
10 |
11 |
12 | def test_comm_matrix(data_dir, ping_pong_otf2_trace):
13 | # bytes sent between pairs of processes
14 | size_comm_matrix = Trace.from_otf2(str(ping_pong_otf2_trace)).comm_matrix()
15 |
16 | # number of messages sent between pairs of processes
17 | count_comm_matrix = Trace.from_otf2(str(ping_pong_otf2_trace)).comm_matrix("count")
18 |
19 | # 2 ranks in ping pong trace, so comm matrix should have shape 2 x 2
20 | assert size_comm_matrix.shape == size_comm_matrix.shape == (2, 2)
21 |
22 | # no messages from ranks to themselves
23 | # note: comm matrix elements accessed using matrix[sender_rank][receiver_rank]
24 | assert (
25 | size_comm_matrix[0][0]
26 | == size_comm_matrix[1][1]
27 | == count_comm_matrix[0][0]
28 | == count_comm_matrix[1][1]
29 | == 0
30 | )
31 |
32 | # 8 sends from each process (total of 4177920 bytes ~ 3.984 mebibytes)
33 | assert size_comm_matrix[0][1] == size_comm_matrix[1][0] == 4177920
34 | assert count_comm_matrix[0][1] == count_comm_matrix[1][0] == 8
35 |
36 |
37 | def test_comm_over_time(data_dir, ping_pong_otf2_trace):
38 | ping_pong = Trace.from_otf2(str(ping_pong_otf2_trace))
39 |
40 | hist, edges = ping_pong.comm_over_time(output="size", message_type="send", bins=5)
41 |
42 | assert len(edges) == 6
43 | assert all(hist[0:3] == 0)
44 | assert hist[4] == 4177920 * 2
45 |
46 | hist, edges = ping_pong.comm_over_time(
47 | output="count", message_type="receive", bins=5
48 | )
49 |
50 | assert len(edges) == 6
51 | assert all(hist[0:3] == 0)
52 | assert hist[4] == 8 * 2
53 |
54 |
55 | def test_comm_by_process(data_dir, ping_pong_otf2_trace):
56 | ping_pong = Trace.from_otf2(str(ping_pong_otf2_trace))
57 |
58 | sizes = ping_pong.comm_by_process()
59 |
60 | assert sizes.loc[0]["Sent"] == 4177920
61 | assert sizes.loc[0]["Received"] == 4177920
62 | assert sizes.loc[1]["Sent"] == 4177920
63 | assert sizes.loc[1]["Received"] == 4177920
64 |
65 | counts = ping_pong.comm_by_process(output="count")
66 |
67 | assert counts.loc[0]["Sent"] == 8
68 | assert counts.loc[0]["Received"] == 8
69 | assert counts.loc[1]["Sent"] == 8
70 | assert counts.loc[1]["Received"] == 8
71 |
72 |
73 | def test_match_events(data_dir, ping_pong_otf2_trace):
74 | trace = Trace.from_otf2(str(ping_pong_otf2_trace))
75 | trace._match_events()
76 |
77 | df = trace.events
78 |
79 | # test both ranks
80 | rank_0_df = df.loc[(df["Process"] == 0) & (df["Event Type"] != "Instant")]
81 | rank_1_df = df.loc[(df["Process"] == 1) & (df["Event Type"] != "Instant")]
82 |
83 | # Make lists of normal and matching columns for both indices and
84 | # timestamps. Compares the values of these lists to ensure the pairing
85 | # functions produced correct results.
86 | rank_0_indices = rank_0_df.index.to_list()
87 | rank_0_matching_indices = rank_0_df["_matching_event"].to_list()
88 | rank_0_timestamps = rank_0_df["Timestamp (ns)"].to_list()
89 | rank_0_matching_timestamps = rank_0_df["_matching_timestamp"].to_list()
90 |
91 | # All events in ping pong trace except main are leaves in the call tree,
92 | # so the leave row occurs immediately after the enter. The below assertions
93 | # test this.
94 | for i in range(len(rank_0_df)):
95 | if (
96 | rank_0_df["Event Type"].iloc[i] == "Enter"
97 | and rank_0_df["Name"].iloc[i] != "int main(int, char**)"
98 | ):
99 | # the matching event and timestamp for enter rows
100 | # should occur right after (ex: (Enter: 45, Leave: 46))
101 | assert rank_0_matching_indices[i] == rank_0_indices[i + 1]
102 | assert rank_0_matching_timestamps[i] == rank_0_timestamps[i + 1]
103 | elif rank_0_df["Name"].iloc[i] != "int main(int, char**)":
104 | # the matching event and timestamp for leave rows
105 | # should occur right before (ex: (Enter: 45, Leave: 46))
106 | assert rank_0_matching_indices[i] == rank_0_indices[i - 1]
107 | assert rank_0_matching_timestamps[i] == rank_0_timestamps[i - 1]
108 |
109 | # tests all the same as mentioned above, except for rank 1 as well
110 | rank_1_indices = rank_1_df.index.to_list()
111 | rank_1_matching_indices = rank_1_df["_matching_event"].to_list()
112 | rank_1_timestamps = rank_1_df["Timestamp (ns)"].to_list()
113 | rank_1_matching_timestamps = rank_1_df["_matching_timestamp"].to_list()
114 |
115 | for i in range(len(rank_1_df)):
116 | if (
117 | rank_1_df["Event Type"].iloc[i] == "Enter"
118 | and rank_1_df["Name"].iloc[i] != "int main(int, char**)"
119 | ):
120 | assert rank_1_matching_indices[i] == rank_1_indices[i + 1]
121 | assert rank_1_matching_timestamps[i] == rank_1_timestamps[i + 1]
122 | elif rank_1_df["Name"].iloc[i] != "int main(int, char**)":
123 | assert rank_1_matching_indices[i] == rank_1_indices[i - 1]
124 | assert rank_1_matching_timestamps[i] == rank_1_timestamps[i - 1]
125 |
126 | # Checks that the Matching Indices and Timestamps for the Enter rows are
127 | # greater than their values
128 | assert (
129 | np.array(df.loc[df["Event Type"] == "Enter"]["_matching_event"])
130 | > np.array(df.loc[df["Event Type"] == "Enter"].index)
131 | ).all()
132 | assert (
133 | np.array(df.loc[df["Event Type"] == "Enter"]["_matching_timestamp"])
134 | > np.array(df.loc[df["Event Type"] == "Enter"]["Timestamp (ns)"])
135 | ).all()
136 |
137 |
138 | def test_match_caller_callee(data_dir, ping_pong_otf2_trace):
139 | trace = Trace.from_otf2(str(ping_pong_otf2_trace))
140 | trace._match_caller_callee()
141 |
142 | df = trace.events
143 |
144 | # nodes with a parent = 40
145 | assert len(df.loc[df["_parent"].notnull()]) == 40
146 |
147 | # nodes with children = 2
148 | assert len(df.loc[df["_children"].notnull()]) == 2
149 |
150 |
151 | def test_time_profile(data_dir, ping_pong_otf2_trace):
152 | trace = Trace.from_otf2(str(ping_pong_otf2_trace))
153 | trace.calc_exc_metrics(["Timestamp (ns)"])
154 |
155 | time_profile = trace.time_profile(num_bins=62)
156 |
157 | # check length
158 | assert len(time_profile) == 62
159 |
160 | # check bin sizes
161 | exp_duration = trace.events["Timestamp (ns)"].max()
162 | exp_bin_size = exp_duration / 62
163 | bin_sizes = time_profile["bin_end"] - time_profile["bin_start"]
164 |
165 | assert_allclose(bin_sizes, exp_bin_size)
166 |
167 | # check that sum of function contributions per bin equals bin duration
168 | exp_bin_total_duration = exp_bin_size * 2
169 | time_profile.drop(columns=["bin_start", "bin_end"], inplace=True)
170 |
171 | assert_allclose(time_profile.sum(axis=1), exp_bin_total_duration)
172 |
173 | # check for each function that sum of exc time per bin equals total exc time
174 | total_exc_times = trace.events.groupby("Name")["time.exc"].sum()
175 |
176 | for column in time_profile:
177 | if column == "idle_time":
178 | continue
179 |
180 | assert_allclose(time_profile[column].sum(), total_exc_times[column])
181 |
182 | # check normalization
183 | norm = trace.time_profile(num_bins=62, normalized=True)
184 | norm.drop(columns=["bin_start", "bin_end"], inplace=True)
185 |
186 | assert (time_profile / exp_bin_total_duration).equals(norm)
187 |
188 | # check against ground truth
189 | # generated using Vampir's Function Summary chart (step size=16)
190 | assert_allclose(norm.loc[0]["int main(int, char**)"], 0.00299437, rtol=1e-05)
191 | assert_allclose(norm.loc[0]["MPI_Init"], 0.93999815)
192 | assert_allclose(norm.loc[0]["MPI_Comm_size"], 0.0)
193 | assert_allclose(norm.loc[0]["MPI_Comm_rank"], 0.0)
194 | assert_allclose(norm.loc[0]["MPI_Send"], 0.0)
195 | assert_allclose(norm.loc[0]["MPI_Recv"], 0.0)
196 | assert_allclose(norm.loc[0]["MPI_Finalize"], 0.0)
197 |
198 | assert_allclose(norm.loc[1:59]["int main(int, char**)"], 0.0)
199 | assert_allclose(norm.loc[1:59]["MPI_Init"], 1.0)
200 | assert_allclose(norm.loc[1:59]["MPI_Comm_size"], 0.0)
201 | assert_allclose(norm.loc[1:59]["MPI_Comm_rank"], 0.0)
202 | assert_allclose(norm.loc[1:59]["MPI_Send"], 0.0)
203 | assert_allclose(norm.loc[1:59]["MPI_Recv"], 0.0)
204 | assert_allclose(norm.loc[1:59]["MPI_Finalize"], 0.0)
205 |
206 | assert_allclose(norm.loc[60]["int main(int, char**)"], 0.39464799)
207 | assert_allclose(norm.loc[60]["MPI_Init"], 0.14843661)
208 | assert_allclose(norm.loc[60]["MPI_Send"], 0.24594134)
209 | assert_allclose(norm.loc[60]["MPI_Recv"], 0.21017099)
210 | assert_allclose(norm.loc[60]["MPI_Comm_size"], 0.00046047, rtol=1e-05)
211 | assert_allclose(norm.loc[60]["MPI_Comm_rank"], 0.00034261, rtol=1e-05)
212 | assert_allclose(norm.loc[60]["MPI_Finalize"], 0.0)
213 |
214 | assert_allclose(norm.loc[61]["int main(int, char**)"], 0.43560727)
215 | assert_allclose(norm.loc[61]["MPI_Init"], 0.0)
216 | assert_allclose(norm.loc[61]["MPI_Send"], 0.29640222)
217 | assert_allclose(norm.loc[61]["MPI_Recv"], 0.24300865)
218 | assert_allclose(norm.loc[61]["MPI_Comm_size"], 0.0)
219 | assert_allclose(norm.loc[61]["MPI_Comm_rank"], 0.0)
220 | assert_allclose(norm.loc[61]["MPI_Finalize"], 0.01614835, rtol=1e-05)
221 |
--------------------------------------------------------------------------------
/pipit/trace.py:
--------------------------------------------------------------------------------
1 | # Copyright 2022-2023 Parallel Software and Systems Group, University of
2 | # Maryland. See the top-level LICENSE file for details.
3 | #
4 | # SPDX-License-Identifier: MIT
5 |
6 | import numpy as np
7 | import pandas as pd
8 | from pipit.util.cct import create_cct
9 |
10 |
11 | class Trace:
12 | """
13 | A trace dataset is read into an object of this type, which
14 | includes one or more dataframes and a calling context tree.
15 | """
16 |
17 | def __init__(self, definitions, events, cct=None, parallelism_levels=None):
18 | """Create a new Trace object."""
19 | self.definitions = definitions
20 | self.events = events
21 | self.cct = cct
22 | if parallelism_levels is None:
23 | self.parallelism_levels = ["Process"]
24 | else:
25 | assert isinstance(parallelism_levels, list)
26 | self.parallelism_levels = parallelism_levels
27 |
28 | # list of numeric columns which we can calculate inc/exc metrics with
29 | self.numeric_cols = list(
30 | self.events.select_dtypes(include=[np.number]).columns.values
31 | )
32 |
33 | # will store columns names for inc/exc metrics
34 | self.inc_metrics = []
35 | self.exc_metrics = []
36 |
37 | def create_cct(self):
38 | # adds a column of cct nodes to the events dataframe
39 | # and stores the graph object in self.cct
40 | self.cct = create_cct(self.events)
41 |
42 | @staticmethod
43 | def from_otf2(dirname, num_processes=None, create_cct=False):
44 | """Read an OTF2 trace into a new Trace object."""
45 | # import this lazily to avoid circular dependencies
46 | from .readers.otf2_reader import OTF2Reader
47 |
48 | return OTF2Reader(dirname, num_processes, create_cct).read()
49 |
50 | @staticmethod
51 | def from_hpctoolkit(dirname):
52 | """Read an HPCToolkit trace into a new Trace object."""
53 | # import this lazily to avoid circular dependencies
54 | from .readers.hpctoolkit_reader import HPCToolkitReader
55 |
56 | return HPCToolkitReader(dirname).read()
57 |
58 | @staticmethod
59 | def from_projections(dirname, num_processes=None, create_cct=False):
60 | """Read a Projections trace into a new Trace object."""
61 | # import this lazily to avoid circular dependencies
62 | from .readers.projections_reader import ProjectionsReader
63 |
64 | return ProjectionsReader(dirname, num_processes, create_cct).read()
65 |
66 | @staticmethod
67 | def from_nsight(filename, create_cct=False):
68 | """Read an Nsight trace into a new Trace object."""
69 | # import this lazily to avoid circular dependencies
70 | from .readers.nsight_reader import NsightReader
71 |
72 | return NsightReader(filename, create_cct).read()
73 |
74 | @staticmethod
75 | def from_nsight_sqlite(filename, create_cct=False, trace_types="all"):
76 | """Read an Nsight trace into a new Trace object."""
77 | # import this lazily to avoid circular dependencies
78 | from .readers.nsight_sqlite_reader import NSightSQLiteReader
79 |
80 | return NSightSQLiteReader(filename, create_cct, trace_types).read()
81 |
82 | @staticmethod
83 | def from_csv(filename):
84 | events_dataframe = pd.read_csv(filename, skipinitialspace=True)
85 |
86 | # if timestamps are in seconds, convert them to nanoseconds
87 | if "Timestamp (s)" in events_dataframe.columns:
88 | events_dataframe["Timestamp (s)"] *= 10**9
89 | events_dataframe.rename(
90 | columns={"Timestamp (s)": "Timestamp (ns)"}, inplace=True
91 | )
92 |
93 | # ensure that ranks are ints
94 | events_dataframe = events_dataframe.astype({"Process": "int32"})
95 |
96 | # make certain columns categorical
97 | events_dataframe = events_dataframe.astype(
98 | {
99 | "Event Type": "category",
100 | "Name": "category",
101 | "Process": "category",
102 | }
103 | )
104 |
105 | # sort the dataframe by Timestamp
106 | events_dataframe.sort_values(
107 | by="Timestamp (ns)", axis=0, ascending=True, inplace=True, ignore_index=True
108 | )
109 |
110 | return Trace(None, events_dataframe)
111 |
112 | def to_chrome(self, filename=None):
113 | """Export as Chrome Tracing JSON, which can be opened
114 | in Perfetto."""
115 | from .writers.chrome_writer import ChromeWriter
116 |
117 | return ChromeWriter(self, filename).write()
118 |
119 | def _match_events(self):
120 | """Matches corresponding enter/leave events and adds two columns to the
121 | dataframe: _matching_event and _matching_timestamp
122 | """
123 |
124 | if "_matching_event" not in self.events.columns:
125 | matching_events = [float("nan")] * len(self.events)
126 | matching_times = [float("nan")] * len(self.events)
127 |
128 | # only pairing enter and leave rows
129 | enter_leave_df = self.events.loc[
130 | self.events["Event Type"].isin(["Enter", "Leave"])
131 | ]
132 |
133 | # list of processes and/or threads to iterate over
134 | if "Thread" in self.events.columns:
135 | exec_locations = set(zip(self.events["Process"], self.events["Thread"]))
136 | has_thread = True
137 | else:
138 | exec_locations = set(self.events["Process"])
139 | has_thread = False
140 |
141 | for curr_loc in exec_locations:
142 | # only filter by thread if the trace has a thread column
143 | if has_thread:
144 | curr_process, curr_thread = curr_loc
145 | filtered_df = enter_leave_df.loc[
146 | (enter_leave_df["Process"] == curr_process)
147 | & (enter_leave_df["Thread"] == curr_thread)
148 | ]
149 | else:
150 | filtered_df = enter_leave_df.loc[
151 | (enter_leave_df["Process"] == curr_loc)
152 | ]
153 |
154 | stack = []
155 |
156 | # Note: The reason that we are creating lists that are
157 | # copies of the dataframe columns below and iterating over
158 | # those instead of using pandas iterrows is due to an
159 | # observed improvement in performance when using lists.
160 |
161 | event_types = list(filtered_df["Event Type"])
162 | df_indices, timestamps, names = (
163 | list(filtered_df.index),
164 | list(filtered_df["Timestamp (ns)"]),
165 | list(filtered_df.Name),
166 | )
167 |
168 | # Iterate through all events of filtered DataFrame
169 | for i in range(len(filtered_df)):
170 | curr_df_index, curr_timestamp, evt_type, curr_name = (
171 | df_indices[i],
172 | timestamps[i],
173 | event_types[i],
174 | names[i],
175 | )
176 |
177 | if evt_type == "Enter":
178 | # Add current dataframe index and timestamp to stack
179 | stack.append((curr_df_index, curr_timestamp, curr_name))
180 | else:
181 | # we want to iterate through the stack in reverse order
182 | # until we find the corresponding "Enter" Event
183 | enter_name, i = None, len(stack) - 1
184 | while enter_name != curr_name and i > -1:
185 | enter_df_index, enter_timestamp, enter_name = stack[i]
186 | i -= 1
187 |
188 | if enter_name == curr_name:
189 | # remove matched event from the stack
190 | del stack[i + 1]
191 |
192 | # Fill in the lists with the matching values if event found
193 | matching_events[enter_df_index] = curr_df_index
194 | matching_events[curr_df_index] = enter_df_index
195 |
196 | matching_times[enter_df_index] = curr_timestamp
197 | matching_times[curr_df_index] = enter_timestamp
198 | else:
199 | continue
200 |
201 | self.events["_matching_event"] = matching_events
202 | self.events["_matching_timestamp"] = matching_times
203 |
204 | self.events = self.events.astype({"_matching_event": "Int32"})
205 |
206 | def _match_caller_callee(self):
207 | """Matches callers (parents) to callees (children) and adds three
208 | columns to the dataframe:
209 | _depth, _parent, and _children
210 | _depth is the depth of the event in the call tree (starting from 0 for root)
211 | _parent is the dataframe index of a row's parent event.
212 | _children is a list of dataframe indices of a row's children events.
213 | """
214 |
215 | if "_children" not in self.events.columns:
216 | # match events so we can
217 | # ignore unmatched ones
218 | self._match_events()
219 |
220 | def _match_caller_callee_by_level(filtered_df):
221 | # Matches caller/callee for each parallelism level
222 | children = np.array([None] * len(filtered_df))
223 | depth, parent = [float("nan")] * len(filtered_df), [float("nan")] * len(
224 | filtered_df
225 | )
226 |
227 | # Depth is the level in the
228 | # Call Tree starting from 0
229 | curr_depth = 0
230 |
231 | stack = []
232 | event_types = list(filtered_df["Event Type"])
233 |
234 | # loop through the events of the filtered dataframe
235 | for i in range(len(filtered_df)):
236 | evt_type = event_types[i]
237 |
238 | if evt_type == "Enter":
239 | if curr_depth > 0: # if event is a child of some other event
240 | parent_df_index = stack[-1]
241 |
242 | if children[parent_df_index] is None:
243 | # create a new list of children for the
244 | # parent if the current event is the first
245 | # child being added
246 | children[parent_df_index] = [filtered_df.index[i]]
247 | else:
248 | children[parent_df_index].append(filtered_df.index[i])
249 |
250 | parent[i] = filtered_df.index[parent_df_index]
251 |
252 | depth[i] = curr_depth
253 | curr_depth += 1
254 |
255 | # add enter dataframe index to stack
256 | stack.append(i)
257 | else:
258 | # pop event off stack once matching leave found
259 | # Note: parent, and children for a leave row
260 | # can be found using the matching index that
261 | # corresponds to the enter row
262 | stack.pop()
263 |
264 | curr_depth -= 1
265 |
266 | new_df = filtered_df.copy() # don't mutate in transform!
267 | new_df["_depth"] = depth
268 | new_df["_parent"] = parent
269 | new_df["_children"] = children
270 | return new_df
271 |
272 | # only use enter and leave rows
273 | # to determine calling relationships
274 | enter_leave_mask = self.events["Event Type"].isin(["Enter", "Leave"]) & (
275 | self.events["_matching_event"].notnull()
276 | )
277 | enter_leave_df = self.events.loc[enter_leave_mask]
278 |
279 | # add dummy values for depth/parent/children
280 | # (otherwise loc won't insert the values)
281 | self.events["_depth"] = 0
282 | self.events["_parent"] = None
283 | self.events["_children"] = None
284 | self.events.loc[enter_leave_mask] = enter_leave_df.groupby(
285 | self.parallelism_levels, group_keys=False, dropna=False
286 | ).apply(_match_caller_callee_by_level)
287 |
288 | self.events = self.events.astype({"_depth": "Int32", "_parent": "Int32"})
289 | self.events = self.events.astype({"_depth": "category", "_parent": "category"})
290 |
291 | def calc_inc_metrics(self, columns=None):
292 | # if no columns are specified by the user, then we calculate
293 | # inclusive metrics for all the numeric columns in the trace
294 | columns = self.numeric_cols if columns is None else columns
295 |
296 | # pair enter and leave rows
297 | if "_matching_event" not in self.events.columns:
298 | self._match_events()
299 |
300 | # only filter to enters that have a matching event
301 | enter_df = self.events.loc[
302 | (self.events["Event Type"] == "Enter")
303 | & (self.events["_matching_event"].notnull())
304 | ]
305 |
306 | # calculate inclusive metric for each column specified
307 | for col in columns:
308 | # name of column for this inclusive metric
309 | metric_col_name = ("time" if col == "Timestamp (ns)" else col) + ".inc"
310 |
311 | if metric_col_name not in self.events.columns:
312 | # calculate the inclusive metric by subtracting
313 | # the values at the enter rows from the values
314 | # at the corresponding leave rows
315 | self.events.loc[
316 | (self.events["_matching_event"].notnull())
317 | & (self.events["Event Type"] == "Enter"),
318 | metric_col_name,
319 | ] = (
320 | self.events[col][enter_df["_matching_event"]].values
321 | - enter_df[col].values
322 | )
323 |
324 | self.inc_metrics.append(metric_col_name)
325 |
326 | def calc_exc_metrics(self, columns=None):
327 | # calculate exc metrics for all numeric columns if not specified
328 | columns = self.numeric_cols if columns is None else columns
329 |
330 | # match caller and callee rows
331 | self._match_caller_callee()
332 |
333 | # exclusive metrics only change for rows that have children
334 | filtered_df = self.events.loc[self.events["_children"].notnull()]
335 | parent_df_indices, children = (
336 | list(filtered_df.index),
337 | filtered_df["_children"].to_list(),
338 | )
339 |
340 | for col in columns:
341 | # get the corresponding inclusive column name for this metric
342 | inc_col_name = ("time" if col == "Timestamp (ns)" else col) + ".inc"
343 | if inc_col_name not in self.events.columns:
344 | self.calc_inc_metrics([col])
345 |
346 | # name of column for this exclusive metric
347 | metric_col_name = ("time" if col == "Timestamp (ns)" else col) + ".exc"
348 |
349 | if metric_col_name not in self.events.columns:
350 | # exc metric starts out as a copy of the inc metric values
351 | exc_values = self.events[inc_col_name].copy()
352 | inc_values = self.events[inc_col_name]
353 |
354 | for i in range(len(filtered_df)):
355 | curr_parent_idx, curr_children = parent_df_indices[i], children[i]
356 | for child_idx in curr_children:
357 | # subtract each child's inclusive metric from the total
358 | # to calculate the exclusive metric for the parent
359 |
360 | # if the exclusive metric is time, we only want to subtract
361 | # the overlapping portion of time between the parent and child
362 | # this is important for e.g. GPUs where execution happens async
363 | # relative to e.g. a kernel launch
364 | inc_metric = inc_values[child_idx]
365 | if col == "Timestamp (ns)":
366 | # calculate overlap between
367 | # start of child event and end of parent event
368 | end_time = min(
369 | self.events.loc[curr_parent_idx, "_matching_timestamp"],
370 | self.events.loc[child_idx, "_matching_timestamp"],
371 | )
372 | inc_metric = max(
373 | end_time - self.events.loc[child_idx, "Timestamp (ns)"],
374 | 0,
375 | )
376 | exc_values[curr_parent_idx] -= inc_metric
377 |
378 | self.events[metric_col_name] = exc_values
379 | self.exc_metrics.append(metric_col_name)
380 |
381 | def comm_matrix(self, output="size"):
382 | """
383 | Communication Matrix for Peer-to-Peer (P2P) MPI messages
384 |
385 | Arguments:
386 | 1) output -
387 | string to choose whether the communication volume should be measured
388 | by bytes transferred between two processes or the number of messages
389 | sent (two choices - "size" or "count")
390 |
391 | Returns:
392 | Creates three lists - sender ranks, receiver ranks, and message volume.
393 | All of these lists are the length of the number of messages sent in the trace.
394 | It then loops through these lists containing individual message pairs
395 | and volume for those messages and updates the comm matrix.
396 |
397 | Finally, a 2D Numpy Array that represents the communication matrix for all P2P
398 | messages of the given trace is returned.
399 |
400 | Note:
401 | The first dimension of the returned 2d array
402 | is senders and the second dimension is receivers
403 | ex) comm_matrix[sender_rank][receiver_rank]
404 | """
405 |
406 | # get the list of ranks/processes
407 | # (mpi messages are sent between processes)
408 | ranks = set(self.events["Process"])
409 |
410 | # create a 2d numpy array that will be returned
411 | # at the end of the function
412 | communication_matrix = np.zeros(shape=(len(ranks), len(ranks)))
413 |
414 | # filter the dataframe by MPI Send and Isend events
415 | sender_dataframe = self.events.loc[
416 | self.events["Name"].isin(["MpiSend", "MpiIsend"]),
417 | ["Process", "Attributes"],
418 | ]
419 |
420 | # get the mpi ranks of all the sender processes
421 | # the length of the list is the total number of messages sent
422 | sender_ranks = sender_dataframe["Process"].to_list()
423 |
424 | # get the corresponding mpi ranks of the receivers
425 | # the length of the list is the total number of messages sent
426 | receiver_ranks = (
427 | sender_dataframe["Attributes"]
428 | .apply(lambda attrDict: attrDict["receiver"])
429 | .to_list()
430 | )
431 |
432 | # the length of the message_volume list created below
433 | # is the total number of messages sent
434 |
435 | # number of bytes communicated for each message sent
436 | if output == "size":
437 | # (1 communication is a single row in the sender dataframe)
438 | message_volume = (
439 | sender_dataframe["Attributes"]
440 | .apply(lambda attrDict: attrDict["msg_length"])
441 | .to_list()
442 | )
443 | elif output == "count":
444 | # 1 message between the pairs of processes
445 | # for each row in the sender dataframe
446 | message_volume = np.full(len(sender_dataframe), 1)
447 |
448 | for i in range(len(sender_ranks)):
449 | """
450 | loops through all the communication events and adds the
451 | message volume to the corresponding entry of the 2d array
452 | using the sender and receiver ranks
453 | """
454 | communication_matrix[sender_ranks[i], receiver_ranks[i]] += message_volume[
455 | i
456 | ]
457 |
458 | return communication_matrix
459 |
460 | def message_histogram(self, bins=20, **kwargs):
461 | """Generates histogram of message frequency by size."""
462 |
463 | # Filter by send events
464 | # TODO: replace with str.match
465 | messages = self.events[self.events["Name"].isin(["MpiSend", "MpiIsend"])]
466 |
467 | # Get message sizes
468 | sizes = messages["Attributes"].map(lambda x: x["msg_length"])
469 |
470 | return np.histogram(sizes, bins=bins, **kwargs)
471 |
472 | def comm_over_time(self, output="size", message_type="send", bins=50, **kwargs):
473 | """Returns histogram of communication volume over time.
474 |
475 | Args:
476 | output (str, optional). Whether to calculate communication by "count" or
477 | "size". Defaults to "size".
478 |
479 | message_type (str, optional): Whether to compute for sends or
480 | receives. Defaults to "send".
481 |
482 | bins (int, optional): Number of bins in the histogram. Defaults to
483 | 50.
484 |
485 | Returns:
486 | hist: Volume in size or number of messages in each time interval
487 | edges: Edges of time intervals
488 | """
489 | # Filter by send or receive events
490 | events = self.events[
491 | self.events["Name"].isin(
492 | ["MpiSend", "MpiIsend"]
493 | if message_type == "send"
494 | else ["MpiRecv", "MpiIrecv"]
495 | )
496 | ]
497 |
498 | # Get timestamps and sizes
499 | timestamps = events["Timestamp (ns)"]
500 | sizes = events["Attributes"].apply(lambda x: x["msg_length"])
501 |
502 | return np.histogram(
503 | timestamps,
504 | bins=bins,
505 | weights=sizes.tolist() if output == "size" else None,
506 | range=[
507 | self.events["Timestamp (ns)"].min(),
508 | self.events["Timestamp (ns)"].max(),
509 | ],
510 | **kwargs
511 | )
512 |
513 | def comm_by_process(self, output="size"):
514 | """Returns total communication volume in size or number of messages per
515 | process.
516 |
517 | Returns:
518 | pd.DataFrame: DataFrame containing total communication volume or
519 | number of messags sent and received by each process.
520 | """
521 | comm_matrix = self.comm_matrix(output=output)
522 |
523 | # Get total sent and received for each process
524 | sent = comm_matrix.sum(axis=1)
525 | received = comm_matrix.sum(axis=0)
526 |
527 | return pd.DataFrame({"Sent": sent, "Received": received}).rename_axis("Process")
528 |
529 | def flat_profile(
530 | self, metrics="time.exc", groupby_column="Name", per_process=False
531 | ):
532 | """
533 | Arguments:
534 | metrics - a string or list of strings containing the metrics to be aggregated
535 | groupby_column - a string or list containing the columns to be grouped by
536 |
537 | Returns:
538 | A Pandas DataFrame that will have the aggregated metrics
539 | for the grouped by columns.
540 | """
541 |
542 | metrics = [metrics] if not isinstance(metrics, list) else metrics
543 |
544 | # calculate inclusive time if needed
545 | if "time.inc" in metrics:
546 | self.calc_inc_metrics(["Timestamp (ns)"])
547 |
548 | # calculate exclusive time if needed
549 | if "time.exc" in metrics:
550 | self.calc_exc_metrics(["Timestamp (ns)"])
551 |
552 | # This first groups by both the process and the specified groupby
553 | # column (like name). It then sums up the metrics for each combination
554 | # of the process and the groupby column.
555 | if per_process:
556 | return (
557 | self.events.loc[self.events["Event Type"] == "Enter"]
558 | .groupby([groupby_column] + self.parallelism_levels, observed=True)[
559 | metrics
560 | ]
561 | .sum()
562 | )
563 | else:
564 | return (
565 | self.events.loc[self.events["Event Type"] == "Enter"]
566 | .groupby([groupby_column] + self.parallelism_levels, observed=True)[
567 | metrics
568 | ]
569 | .sum()
570 | .groupby(groupby_column)
571 | .mean()
572 | )
573 |
574 | def load_imbalance(self, metric="time.exc", num_processes=1):
575 | """
576 | Arguments:
577 | metric - a string denoting the metric to calculate load imbalance for
578 | num_processes - the number of ranks to display for each function that have the
579 | highest load imbalances
580 |
581 | Returns:
582 | A Pandas DataFrame indexed by function name that will have two columns:
583 | one containing the imbalance which (max / mean) time for all ranks
584 | and the other containing a list of num_processes ranks with the highest
585 | imbalances
586 | """
587 |
588 | num_ranks = len(set(self.events["Process"]))
589 | num_display = num_ranks if num_processes > num_ranks else num_processes
590 |
591 | flat_profile = self.flat_profile(metrics=metric, per_process=True)
592 |
593 | imbalance_dict = dict()
594 |
595 | imb_metric = metric + ".imbalance"
596 | imb_ranks = "Top processes"
597 | mean_metric = metric + ".mean"
598 |
599 | imbalance_dict[imb_metric] = []
600 | imbalance_dict[imb_ranks] = []
601 | imbalance_dict[mean_metric] = []
602 |
603 | functions = set(self.events.loc[self.events["Event Type"] == "Enter"]["Name"])
604 | for function in functions:
605 | curr_series = flat_profile.loc[function]
606 |
607 | top_n = curr_series.sort_values(ascending=False).iloc[0:num_display]
608 |
609 | imbalance_dict[mean_metric].append(curr_series.mean())
610 | imbalance_dict[imb_metric].append(top_n.values[0] / curr_series.mean())
611 | imbalance_dict[imb_ranks].append(list(top_n.index))
612 |
613 | imbalance_df = pd.DataFrame(imbalance_dict)
614 | imbalance_df.index = functions
615 | imbalance_df.sort_values(by=mean_metric, axis=0, inplace=True, ascending=False)
616 |
617 | return imbalance_df
618 |
619 | def idle_time(self, idle_functions=["Idle"], mpi_events=False):
620 | # calculate inclusive metrics
621 | if "time.inc" not in self.events.columns:
622 | self.calc_inc_metrics()
623 |
624 | if mpi_events:
625 | idle_functions += ["MPI_Wait", "MPI_Waitall", "MPI_Recv"]
626 |
627 | def calc_idle_time(events):
628 | # assumes events is sorted by time
629 |
630 | # Calculate idle time due to gaps in between events
631 | # This is the total time minus exclusive time spent in functions
632 | total_time = events["Timestamp (ns)"].max() - events["Timestamp (ns)"].min()
633 |
634 | idle_time = total_time - events["time.exc"].sum()
635 |
636 | # Calculate idle time due to idle_functions
637 | idle_time += events[events["Name"].isin(idle_functions)]["time.inc"].sum()
638 | return idle_time
639 |
640 | return (
641 | self.events.groupby(self.parallelism_levels, dropna=False)
642 | .apply(
643 | calc_idle_time,
644 | )
645 | .rename("idle_time")
646 | )
647 |
648 | def _calculate_idle_time_for_process(
649 | self, process, idle_functions=["Idle"], mpi_events=False
650 | ):
651 | # calculate inclusive metrics
652 | if "time.inc" not in self.events.columns:
653 | self.calc_inc_metrics()
654 |
655 | if mpi_events:
656 | idle_functions += ["MPI_Wait", "MPI_Waitall", "MPI_Recv"]
657 | # filter the dataframe to include only 'Enter' events within the specified
658 | # process with the specified function names
659 | df = self.events
660 | filtered_df = (
661 | df.loc[df["Event Type"] == "Enter"]
662 | .loc[df["Process"] == process]
663 | .loc[df["Name"].isin(idle_functions)]
664 | )
665 | # get the sum of the inclusive times of these events
666 | return filtered_df["time.inc"].sum()
667 |
668 | def time_profile(self, num_bins=50, normalized=False):
669 | """Computes time contributed by each function per time interval.
670 |
671 | Args:
672 | num_bins (int, optional): Number of evenly-sized time intervals to compute
673 | time profile for. Defaults to 50.
674 | normalized (bool, optional): Whether to return time contribution as
675 | percentage of time interval. Defaults to False.
676 |
677 | Returns:
678 | pd.DataFrame: Time profile of each function, where each column
679 | represents a function, and each row represents a time interval.
680 | """
681 | # Generate metrics
682 | self._match_caller_callee()
683 | self.calc_inc_metrics(["Timestamp (ns)"])
684 |
685 | # Filter by Enter rows
686 | events = self.events[self.events["Event Type"] == "Enter"].copy(deep=False)
687 | names = events["Name"].unique().tolist()
688 |
689 | # Create equal-sized bins
690 | edges = np.linspace(
691 | self.events["Timestamp (ns)"].min(),
692 | self.events["Timestamp (ns)"].max(),
693 | num_bins + 1,
694 | )
695 | bin_size = edges[1] - edges[0]
696 |
697 | total_bin_duration = bin_size * len(events["Process"].unique())
698 |
699 | profile = []
700 |
701 | def calc_exc_time_in_bin(events):
702 | # TODO: check if the numpy equivalent of the below code is faster
703 | dfx_to_idx = {
704 | dfx: idx
705 | for (dfx, idx) in zip(events.index, [i for i in range(len(events))])
706 | }
707 |
708 | # start out with exc times being a copy of inc times
709 | exc_times = list(events["inc_time_in_bin"].copy(deep=False))
710 |
711 | # filter to events that have children
712 | filtered_df = events.loc[events["_children"].notnull()]
713 |
714 | parent_df_indices, children = (
715 | list(filtered_df.index),
716 | filtered_df["_children"].to_list(),
717 | )
718 |
719 | # Iterate through the events that are parents
720 | for i in range(len(filtered_df)):
721 | curr_parent_idx, curr_children = (
722 | dfx_to_idx[parent_df_indices[i]],
723 | children[i],
724 | )
725 |
726 | # Only consider inc times of children in current bin
727 | for child_df_idx in curr_children:
728 | if child_df_idx in dfx_to_idx:
729 | exc_times[curr_parent_idx] -= exc_times[
730 | dfx_to_idx[child_df_idx]
731 | ]
732 |
733 | events["exc_time_in_bin"] = exc_times
734 |
735 | # For each bin, determine each function's time contribution
736 | for i in range(num_bins):
737 | start = edges[i]
738 | end = edges[i + 1]
739 |
740 | # Find functions that belong in this bin
741 | in_bin = events[
742 | (events["_matching_timestamp"] > start)
743 | & (events["Timestamp (ns)"] < end)
744 | ].copy(deep=False)
745 |
746 | # Calculate inc_time_in_bin for each function
747 | # Case 1 - Function starts in bin
748 | in_bin.loc[in_bin["Timestamp (ns)"] >= start, "inc_time_in_bin"] = (
749 | end - in_bin["Timestamp (ns)"]
750 | )
751 |
752 | # Case 2 - Function ends in bin
753 | in_bin.loc[in_bin["_matching_timestamp"] <= end, "inc_time_in_bin"] = (
754 | in_bin["_matching_timestamp"] - start
755 | )
756 |
757 | # Case 3 - Function spans bin
758 | in_bin.loc[
759 | (in_bin["Timestamp (ns)"] < start)
760 | & (in_bin["_matching_timestamp"] > end),
761 | "inc_time_in_bin",
762 | ] = (
763 | end - start
764 | )
765 |
766 | # Case 4 - Function contained in bin
767 | in_bin.loc[
768 | (in_bin["Timestamp (ns)"] >= start)
769 | & (in_bin["_matching_timestamp"] <= end),
770 | "inc_time_in_bin",
771 | ] = (
772 | in_bin["_matching_timestamp"] - in_bin["Timestamp (ns)"]
773 | )
774 |
775 | # Calculate exc_time_in_bin by subtracting inc_time_in_bin for all children
776 | calc_exc_time_in_bin(in_bin)
777 |
778 | # Sum across all processes
779 | agg = in_bin.groupby("Name")["exc_time_in_bin"].sum()
780 | profile.append(agg.to_dict())
781 |
782 | # Convert to DataFrame
783 | df = pd.DataFrame(profile, columns=names)
784 |
785 | # Add idle_time column
786 | df.insert(0, "idle_time", total_bin_duration - df.sum(axis=1))
787 |
788 | # Threshold for zero
789 | df.mask(df < 0.01, 0, inplace=True)
790 |
791 | # Normalize
792 | if normalized:
793 | df /= total_bin_duration
794 |
795 | # Add bin_start and bin_end
796 | df.insert(0, "bin_start", edges[:-1])
797 | df.insert(0, "bin_end", edges[1:])
798 |
799 | return df
800 |
801 | @staticmethod
802 | def multirun_analysis(
803 | traces, metric_column="Timestamp (ns)", groupby_column="Name"
804 | ):
805 | """
806 | Arguments:
807 | traces - list of pipit traces
808 | metric_column - the column of the metric to be aggregated over
809 | groupby_column - the column that will be grouped by before aggregation
810 |
811 | Returns:
812 | A Pandas DataFrame indexed by the number of processes in the traces, the
813 | columns are the groups of the groupby_column, and the entries of the DataFrame
814 | are the aggregated metrics corresponding to the respective trace and group
815 | """
816 |
817 | # for each trace, collect a flat profile
818 | flat_profiles = []
819 | for trace in traces:
820 | trace.calc_exc_metrics([metric_column])
821 | metric_col = (
822 | "time.exc"
823 | if metric_column == "Timestamp (ns)"
824 | else metric_column + ".exc"
825 | )
826 | flat_profiles.append(
827 | trace.flat_profile(metrics=[metric_col], groupby_column=groupby_column)
828 | )
829 |
830 | # combine these flat profiles and index them by number of processes
831 | combined_df = pd.concat([fp[metric_col] for fp in flat_profiles], axis=1).T
832 | combined_df.index = [len(set(trace.events["Process"])) for trace in traces]
833 | combined_df.index.rename("Number of Processes", inplace=True)
834 |
835 | # sort the columns/groups in descending order of the aggregated metric values
836 | function_sums = combined_df.sum()
837 | combined_df = combined_df[function_sums.sort_values(ascending=False).index]
838 |
839 | return combined_df
840 |
841 | def detect_pattern(
842 | self,
843 | start_event,
844 | iterations=None,
845 | window_size=None,
846 | process=0,
847 | metric="time.exc",
848 | ):
849 | import stumpy
850 |
851 | enter_events = self.events[
852 | (self.events["Name"] == start_event)
853 | & (self.events["Event Type"] == "Enter")
854 | & (self.events["Process"] == process)
855 | ]
856 |
857 | leave_events = self.events[
858 | (self.events["Name"] == start_event)
859 | & (self.events["Event Type"] == "Leave")
860 | & (self.events["Process"] == process)
861 | ]
862 |
863 | # count the number of enter events to
864 | # determine the number of iterations if it's not
865 | # given by the user.
866 | if iterations is None:
867 | iterations = len(enter_events)
868 |
869 | # get the first enter and last leave of
870 | # the given event. we will only investigate
871 | # this portion of the data.
872 | first_loop_enter = enter_events.index[0]
873 | last_loop_leave = leave_events.index[-1]
874 |
875 | df = self.events.iloc[first_loop_enter + 1 : last_loop_leave]
876 | filtered_df = df.loc[(df[metric].notnull()) & (df["Process"] == process)]
877 | y = filtered_df[metric].values[:]
878 |
879 | if window_size is None:
880 | window_size = int(len(y) / iterations)
881 |
882 | matrix_profile = stumpy.stump(y, window_size)
883 | dists, indices = stumpy.motifs(y, matrix_profile[:, 0], max_matches=iterations)
884 |
885 | # Gets the corresponding portion from the original
886 | # dataframe for each pattern.
887 | patterns = []
888 | for idx in indices[0]:
889 | end_idx = idx + window_size
890 |
891 | match_original = self.events.loc[
892 | self.events["Timestamp (ns)"].isin(
893 | filtered_df.iloc[idx:end_idx]["Timestamp (ns)"].values
894 | )
895 | ]
896 | patterns.append(match_original)
897 |
898 | return patterns
899 |
--------------------------------------------------------------------------------
/pipit/util/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright 2023 Parallel Software and Systems Group, University of Maryland.
2 | # See the top-level LICENSE file for details.
3 | #
4 | # SPDX-License-Identifier: MIT
5 |
--------------------------------------------------------------------------------
/pipit/util/cct.py:
--------------------------------------------------------------------------------
1 | # Copyright 2023 Parallel Software and Systems Group, University of Maryland.
2 | # See the top-level LICENSE file for details.
3 | #
4 | # SPDX-License-Identifier: MIT
5 |
6 | from pipit.graph import Graph, Node
7 |
8 |
9 | def create_cct(events):
10 | """
11 | Generic function to iterate through the events dataframe and create a CCT.
12 | Uses pipit's graph data structure for this. Returns a CCT
13 | and creates a new column in the Events DataFrame that stores
14 | a reference to each row's corresponding node in the CCT.
15 | """
16 |
17 | # CCT and list of nodes in DataFrame
18 | graph = Graph()
19 | graph_nodes = [None for i in range(len(events))]
20 |
21 | # determines whether a node exists or not
22 | callpath_to_node = dict()
23 |
24 | node_id = 0 # each node has a unique id
25 |
26 | # Filter the DataFrame to only Enter/Leave
27 | enter_leave_df = events.loc[events["Event Type"].isin(["Enter", "Leave"])]
28 |
29 | # list of processes and/or threads to iterate over
30 | if "Thread" in events.columns:
31 | exec_locations = set(zip(events["Process"], events["Thread"]))
32 | has_thread = True
33 | else:
34 | exec_locations = set(events["Process"])
35 | has_thread = False
36 |
37 | for curr_loc in exec_locations:
38 | # only filter by thread if the trace has a thread column
39 | if has_thread:
40 | curr_process, curr_thread = curr_loc
41 | filtered_df = enter_leave_df.loc[
42 | (enter_leave_df["Process"] == curr_process)
43 | & (enter_leave_df["Thread"] == curr_thread)
44 | ]
45 | else:
46 | filtered_df = enter_leave_df.loc[(enter_leave_df["Process"] == curr_loc)]
47 |
48 | curr_depth, callpath = 0, ""
49 |
50 | """
51 | Iterating over lists instead of
52 | DataFrame columns is more efficient
53 | """
54 | df_indices = filtered_df.index.to_list()
55 | function_names = filtered_df["Name"].to_list()
56 | event_types = filtered_df["Event Type"].to_list()
57 |
58 | # stacks used to iterate through the trace and add nodes to the cct
59 | functions_stack, nodes_stack = [], []
60 |
61 | # iterating over the events of the current thread's trace
62 | for i in range(len(filtered_df)):
63 | curr_df_index, evt_type, function_name = (
64 | df_indices[i],
65 | event_types[i],
66 | function_names[i],
67 | )
68 |
69 | # encounter a new function through its entry point.
70 | if evt_type == "Enter":
71 | # add the function to the stack and get the call path
72 | functions_stack.append(function_name)
73 | callpath = "->".join(functions_stack)
74 |
75 | # get the parent node of the function if it exists
76 | parent_node = None if curr_depth == 0 else nodes_stack[-1]
77 |
78 | if callpath in callpath_to_node:
79 | # don't create new node if callpath is in map
80 | curr_node = callpath_to_node[callpath]
81 | else:
82 | # create new node if callpath isn't in map
83 | curr_node = Node(node_id, parent_node, curr_depth)
84 | callpath_to_node[callpath] = curr_node
85 | node_id += 1
86 |
87 | # add node as root or child of its
88 | # parent depending on current depth
89 | (
90 | graph.add_root(curr_node)
91 | if curr_depth == 0
92 | else parent_node.add_child(curr_node)
93 | )
94 |
95 | # Update nodes stack, column, and current depth
96 | nodes_stack.append(curr_node)
97 | graph_nodes[curr_df_index] = curr_node
98 | curr_depth += 1
99 | else:
100 | # we want to iterate through the stack in reverse order
101 | # until we find the corresponding "Enter" Event
102 | enter_name, j = None, len(functions_stack) - 1
103 | while enter_name != function_name and j > -1:
104 | enter_name = functions_stack[j]
105 | j -= 1
106 |
107 | if enter_name == function_name:
108 | # update stacks and current depth
109 | del functions_stack[j + 1]
110 | del nodes_stack[j + 1]
111 | curr_depth -= 1
112 | else:
113 | continue
114 |
115 | # Update the Trace with the generated cct
116 | events["Graph_Node"] = graph_nodes
117 |
118 | return graph
119 |
--------------------------------------------------------------------------------
/pipit/util/config.py:
--------------------------------------------------------------------------------
1 | # Copyright 2023 Parallel Software and Systems Group, University of Maryland.
2 | # See the top-level LICENSE file for details.
3 | #
4 | # SPDX-License-Identifier: MIT
5 |
6 |
7 | # Validator to check if the value entered is of type bool
8 | def bool_validator(key, value):
9 | if type(value) is not bool:
10 | raise TypeError(
11 | (
12 | 'Error loading configuration: The Value "{}" for Configuration "{}"'
13 | + "must be of type Bool"
14 | ).format(value, key)
15 | )
16 | else:
17 | return True
18 |
19 |
20 | # Validator to check if the value entered is of type string
21 | def str_validator(key, value):
22 | if type(value) is not str:
23 | raise TypeError(
24 | (
25 | 'Error loading configuration: The Value "{}" for Configuration "{}"'
26 | + "must be of type string"
27 | ).format(value, key)
28 | )
29 | else:
30 | return True
31 |
32 |
33 | # Validator to check if the value entered is of type int
34 | def int_validator(key, value):
35 | if type(value) is not int:
36 | raise TypeError(
37 | (
38 | 'Error loading configuration: The Value "{}" for Configuration "{}"'
39 | + "must be of type int"
40 | ).format(value, key)
41 | )
42 | if key == "depth" and value < 1:
43 | raise ValueError("Depth must be greater than 1")
44 | return True
45 |
46 |
47 | # Validator to check if the value entered is of type float
48 | def float_validator(key, value):
49 | if type(value) is not float:
50 | raise TypeError(
51 | (
52 | 'Error loading configuration: The Value "{}" for Configuration "{}"'
53 | + "must be of type float"
54 | ).format(value, key)
55 | )
56 | else:
57 | return True
58 |
59 |
60 | # Validator to check if the value entered is a valid log level
61 | def log_level_validator(key, value):
62 | if value not in ["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"]:
63 | raise ValueError(
64 | (
65 | 'Error loading configuration: The Value "{}" for Configuration "{}"'
66 | + "must be a valid log level"
67 | ).format(value, key)
68 | )
69 | else:
70 | return True
71 |
72 |
73 | # Validator to check if the value entered is a valid URL
74 | def url_validator(key, value):
75 | if value.startswith("http://") or value.startswith("https://"):
76 | return True
77 | else:
78 | raise ValueError(
79 | (
80 | 'Error loading configuration: The Value "{}" for Configuration "{}"'
81 | + "must be a valid URL"
82 | ).format(value, key)
83 | )
84 |
85 |
86 | registered_options = {
87 | "log_level": {
88 | "default": "INFO",
89 | "validator": log_level_validator,
90 | },
91 | "notebook_url": {
92 | "default": "http://localhost:8888",
93 | "validator": url_validator,
94 | },
95 | }
96 |
97 | global_config = {key: registered_options[key]["default"] for key in registered_options}
98 |
99 |
100 | # Returns the current value of the specific config key
101 | def get_option(key):
102 | if not key or key not in registered_options:
103 | raise ValueError("No such keys(s)")
104 | else:
105 | return global_config[key]
106 |
107 |
108 | # Updates the value of the specified key
109 | def set_option(key, val):
110 | if not key or key not in registered_options:
111 | raise ValueError("No such keys(s)")
112 |
113 | validator = registered_options[key]["validator"]
114 |
115 | if validator(key, val):
116 | global_config[key] = val
117 |
118 |
119 | # Resets the value of the specfied key
120 | # If "all" is passed in, resets values of all keys
121 | def reset_option(key):
122 | if not key:
123 | raise ValueError("No such keys(s)")
124 |
125 | if key in registered_options:
126 | global_config[key] = registered_options[key]["default"]
127 | elif key == "all":
128 | for k in registered_options:
129 | global_config[k] = registered_options[k]["default"]
130 | else:
131 | raise ValueError(
132 | "You must specify a valid key. Or, use the special keyword "
133 | '"all" to reset all the options to their default value'
134 | )
135 |
--------------------------------------------------------------------------------
/pipit/writers/chrome_writer.py:
--------------------------------------------------------------------------------
1 | import pandas as pd
2 |
3 |
4 | class ChromeWriter:
5 | """Exports traces to the Chrome Tracing JSON format which can be opened with Chrome
6 | Trace Viewer and Perfetto for analysis using these tools.
7 |
8 | This exports to the older Chrome Tracing JSON format which is still supported by
9 | Perfetto, and not the newer Perfetto binary format.
10 |
11 | See https://docs.google.com/document/d/1CvAClvFfyA5R-PhYUmn5OOQtYMH4h6I0nSsKchNAySU/preview # noqa
12 | """
13 |
14 | def __init__(self, trace, filename="trace.json"):
15 | self.trace = trace
16 | self.filename = filename
17 |
18 | def write(self):
19 | events = self.trace.events
20 |
21 | # Assign the fields as expected by the Chrome Tracing JSON format
22 | # Let's create a new dataframe to avoid modifying the original
23 | df = pd.DataFrame()
24 |
25 | # "name" represents the event name
26 | df["name"] = events["Name"]
27 |
28 | # "ph" represents event type -- also called "phase"
29 | # Rename Enter events to "B" (begin), Leave events to "E" (end),
30 | # and Instant events to "i"
31 | df["ph"] = events["Event Type"].replace(
32 | ["Enter", "Leave", "Instant"], ["B", "E", "i"]
33 | )
34 |
35 | # "ts" represents is the timestamp (in microseconds) of the event
36 | df["ts"] = (events["Timestamp (ns)"] / 1e3).astype(int)
37 |
38 | # "pid" represents the process ID for the process that the event occurs in
39 | df["pid"] = events["Process"]
40 |
41 | # "tid" represents the thread ID for the thread that the event occurs in
42 | if "Thread" in events.columns:
43 | df["tid"] = events["Thread"]
44 |
45 | # Put all of the additional event attributes into the "args" field
46 | if "Attributes" in events.columns:
47 | df["args"] = events["Attributes"]
48 |
49 | # Write the dataframe to a JSON file
50 | return df.to_json(path_or_buf=self.filename, orient="records")
51 |
--------------------------------------------------------------------------------
/pytest.ini:
--------------------------------------------------------------------------------
1 | # Copyright 2022-2023 Parallel Software and Systems Group, University of
2 | # Maryland. See the top-level LICENSE file for details.
3 | #
4 | # SPDX-License-Identifier: MIT
5 |
6 | [pytest]
7 | addopts = --durations=20 -ra
8 | testpaths = pipit/tests
9 | python_files = *.py
10 |
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | numpy
2 | otf2
3 | pandas
4 |
--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | # Copyright 2022-2023 Parallel Software and Systems Group, University of
2 | # Maryland. See the top-level LICENSE file for details.
3 | #
4 | # SPDX-License-Identifier: MIT
5 |
6 | from setuptools import setup
7 |
8 | setup(
9 | name="pipit",
10 | version="0.1.0",
11 | description="A Python library for analyzing parallel execution traces",
12 | url="https://github.com/hpcgroup/pipit",
13 | author="Abhinav Bhatele",
14 | author_email="bhatele@cs.umd.edu",
15 | license="MIT",
16 | classifiers=[
17 | "Development Status :: 4 - Beta",
18 | "License :: OSI Approved :: MIT License",
19 | ],
20 | keywords="distributed computing, parallel computing, GPU traces",
21 | packages=["pipit", "pipit.readers", "pipit.tests", "pipit.util", "pipit.writers"],
22 | install_requires=[
23 | "numpy",
24 | "otf2",
25 | "pandas",
26 | ],
27 | )
28 |
--------------------------------------------------------------------------------