├── .flake8 ├── .github └── workflows │ └── unit-tests.yaml ├── .gitignore ├── .readthedocs.yml ├── LICENSE ├── README.md ├── docs ├── Makefile ├── conf.py ├── developer_guide.rst ├── examples │ ├── csv_reader.py │ ├── hpctoolkit.py │ ├── nsight.py │ ├── otf2_read.py │ └── projections.py ├── getting_started.rst ├── index.rst ├── requirements.txt ├── source │ ├── pipit.readers.rst │ └── pipit.rst └── user_guide.rst ├── logo.png ├── pipit ├── .gitignore ├── __init__.py ├── graph.py ├── readers │ ├── __init__.py │ ├── core_reader.py │ ├── hpctoolkit_reader.py │ ├── nsight_reader.py │ ├── nsight_sqlite_reader.py │ ├── otf2_reader.py │ └── projections_reader.py ├── tests │ ├── config.py │ ├── conftest.py │ ├── data │ │ ├── foo-bar.csv │ │ ├── ping-pong-hpctoolkit │ │ │ ├── FORMATS.md │ │ │ ├── cct.db │ │ │ ├── meta.db │ │ │ ├── metrics │ │ │ │ ├── METRICS.yaml.ex │ │ │ │ └── default.yaml │ │ │ ├── profile.db │ │ │ ├── src │ │ │ │ └── ping-pong.c │ │ │ └── trace.db │ │ ├── ping-pong-otf2-papi │ │ │ ├── MANIFEST.md │ │ │ ├── scorep.cfg │ │ │ ├── traces.def │ │ │ ├── traces.otf2 │ │ │ └── traces │ │ │ │ ├── 0.def │ │ │ │ ├── 0.evt │ │ │ │ ├── 1.def │ │ │ │ └── 1.evt │ │ ├── ping-pong-otf2 │ │ │ ├── MANIFEST.md │ │ │ ├── scorep.cfg │ │ │ ├── traces.def │ │ │ ├── traces.otf2 │ │ │ └── traces │ │ │ │ ├── 0.def │ │ │ │ ├── 0.evt │ │ │ │ ├── 1.def │ │ │ │ └── 1.evt │ │ └── ping-pong-projections │ │ │ ├── pingpong.prj.0.log.gz │ │ │ ├── pingpong.prj.1.log.gz │ │ │ ├── pingpong.prj.projrc │ │ │ └── pingpong.prj.sts │ ├── hpctoolkit.py │ ├── otf2-tests.py │ ├── projections-tests.py │ └── trace.py ├── trace.py ├── util │ ├── __init__.py │ ├── cct.py │ └── config.py └── writers │ └── chrome_writer.py ├── pytest.ini ├── requirements.txt └── setup.py /.flake8: -------------------------------------------------------------------------------- 1 | # -*- conf -*- 2 | # flake8 settings for pipit 3 | # 4 | # These are the minimal flake8 settings recommended by Black 5 | # https://black.readthedocs.io/en/stable/the_black_code_style/current_style.html#code-style 6 | 7 | [flake8] 8 | max-line-length = 88 9 | extend-ignore = E203 10 | -------------------------------------------------------------------------------- /.github/workflows/unit-tests.yaml: -------------------------------------------------------------------------------- 1 | name: unit tests 2 | 3 | on: 4 | push: 5 | branches: [develop] 6 | pull_request: 7 | branches: [develop] 8 | 9 | jobs: 10 | ubuntu: 11 | runs-on: ${{ matrix.os }} 12 | strategy: 13 | matrix: 14 | os: [ubuntu-latest] 15 | python-version: ["3.6", "3.7", "3.8", "3.9", "3.10", "3.11"] 16 | 17 | steps: 18 | - uses: actions/checkout@v2 19 | # use over setup python since this allows us to use older 20 | # Pythons 21 | - uses: mamba-org/setup-micromamba@v2 22 | with: 23 | # the create command looks like this: 24 | # `micromamba create -n test-env python=(version)` 25 | environment-name: test-env 26 | create-args: python=${{ matrix.python-version }} 27 | 28 | - name: Install Python packages 29 | run: | 30 | pip install --upgrade pip 31 | pip install --upgrade numpy pandas pytest otf2 32 | 33 | - name: Lint and format check with flake8 and black 34 | if: ${{ matrix.python-version == 3.9 }} 35 | run: | 36 | pip install --upgrade black flake8 37 | black --diff --check . 38 | flake8 39 | 40 | - name: Basic test with pytest 41 | run: | 42 | PYTHONPATH=. $(which pytest) 43 | 44 | macos: 45 | runs-on: macos-latest 46 | strategy: 47 | matrix: 48 | python-version: ["3.10", "3.11"] 49 | 50 | steps: 51 | - uses: actions/checkout@v2 52 | - uses: actions/setup-python@v2 53 | with: 54 | python-version: ${{ matrix.python-version }} 55 | 56 | - name: Install Python packages 57 | run: | 58 | pip install --upgrade pip 59 | pip install --upgrade numpy pandas pytest otf2 60 | 61 | - name: Basic test with pytest 62 | run: | 63 | PYTHONPATH=. $(which pytest) 64 | 65 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | .cache 3 | .pytest_cache 4 | .ipynb_checkpoints 5 | -------------------------------------------------------------------------------- /.readthedocs.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | 3 | # Build from the docs/ directory with Sphinx 4 | sphinx: 5 | configuration: docs/conf.py 6 | 7 | # Explicitly set the version of Python and its requirements 8 | python: 9 | version: 3.8 10 | install: 11 | - requirements: docs/requirements.txt 12 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2021, Parallel Software and Systems Group, University of 2 | Maryland. 3 | 4 | Permission is hereby granted, free of charge, to any person obtaining a 5 | copy of this software and associated documentation files (the "Software"), 6 | to deal in the Software without restriction, including without limitation 7 | the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 | and/or sell copies of the Software, and to permit persons to whom the 9 | Software is furnished to do so, subject to the following conditions: 10 | 11 | The above copyright notice and this permission notice shall be included in 12 | all copies or substantial portions of the Software. 13 | 14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 17 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 19 | FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 20 | DEALINGS IN THE SOFTWARE. 21 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # pipit 2 | 3 | [![Build Status](https://github.com/hpcgroup/pipit/actions/workflows/unit-tests.yaml/badge.svg)](https://github.com/hpcgroup/pipit/actions) 4 | [![docs](https://readthedocs.org/projects/pipit/badge/?version=latest)](https://pipit.readthedocs.io/en/latest/?badge=latest) 5 | [![Code Style: Black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black) 6 | 7 | A Python-based library for analyzing execution traces from parallel programs. 8 | 9 | ### Contributing 10 | 11 | Pipit is an open source project. We welcome contributions via pull requests, 12 | and questions, feature requests, or bug reports via issues. 13 | 14 | ### License 15 | 16 | Pipit is distributed under the terms of the MIT License. 17 | 18 | All contributions must be made under the the MIT license. Copyrights in the 19 | Pipit project are retained by contributors. No copyright assignment is 20 | required to contribute to Pipit. 21 | 22 | See [LICENSE](https://github.com/pssg-int/trace-analysis/blob/develop/LICENSE) 23 | for details. 24 | 25 | SPDX-License-Identifier: MIT 26 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line, and also 5 | # from the environment for the first two. 6 | SPHINXOPTS ?= 7 | SPHINXBUILD ?= sphinx-build 8 | SOURCEDIR = . 9 | BUILDDIR = _build 10 | 11 | # Put it first so that "make" without argument is like "make help". 12 | help: 13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 14 | 15 | .PHONY: help Makefile 16 | 17 | # Catch-all target: route all unknown targets to Sphinx using the new 18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 19 | %: Makefile 20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 21 | -------------------------------------------------------------------------------- /docs/conf.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022-2023 Parallel Software and Systems Group, University of 2 | # Maryland. See the top-level LICENSE file for details. 3 | # 4 | # SPDX-License-Identifier: MIT 5 | 6 | # Configuration file for the Sphinx documentation builder. 7 | # 8 | # This file only contains a selection of the most common options. For a full 9 | # list see the documentation: 10 | # https://www.sphinx-doc.org/en/master/usage/configuration.html 11 | 12 | # -- Path setup -------------------------------------------------------------- 13 | 14 | # If extensions (or modules to document with autodoc) are in another directory, 15 | # add these directories to sys.path here. If the directory is relative to the 16 | # documentation root, use os.path.abspath to make it absolute, like shown here. 17 | # 18 | # import os 19 | import sys 20 | 21 | # sys.path.insert(0, os.path.abspath('.')) 22 | 23 | # The name of the Pygments (syntax highlighting) style to use. 24 | from pygments.styles.default import DefaultStyle 25 | from pygments.token import Generic 26 | 27 | import pkg_resources 28 | 29 | 30 | # -- Project information ----------------------------------------------------- 31 | 32 | project = "pipit" 33 | copyright = "2022-2023, Parallel Software and Systems Group, University of Maryland" 34 | author = "Abhinav Bhatele" 35 | 36 | # The full version, including alpha/beta/rc tags 37 | release = "0.1.0" 38 | 39 | 40 | # -- General configuration --------------------------------------------------- 41 | 42 | # Add any Sphinx extension module names here, as strings. They can be 43 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom 44 | # ones. 45 | extensions = [ 46 | "sphinx.ext.autodoc", 47 | "sphinx.ext.todo", 48 | "sphinx.ext.imgmath", 49 | "sphinx.ext.viewcode", 50 | "sphinx.ext.githubpages", 51 | "sphinx.ext.napoleon", 52 | ] 53 | 54 | # Add any paths that contain templates here, relative to this directory. 55 | templates_path = ["_templates"] 56 | 57 | # List of patterns, relative to source directory, that match files and 58 | # directories to ignore when looking for source files. 59 | # This pattern also affects html_static_path and html_extra_path. 60 | exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"] 61 | 62 | 63 | # modifications to the default style 64 | class PipitStyle(DefaultStyle): 65 | styles = DefaultStyle.styles.copy() 66 | background_color = "#f4f4f8" 67 | styles[Generic.Output] = "#355" 68 | styles[Generic.Prompt] = "bold #346ec9" 69 | 70 | 71 | dist = pkg_resources.Distribution(__file__) 72 | sys.path.append(".") # make 'conf' module findable 73 | ep = pkg_resources.EntryPoint.parse("pipit = conf:PipitStyle", dist=dist) 74 | dist._ep_map = {"pygments.styles": {"plugin1": ep}} 75 | pkg_resources.working_set.add(dist) 76 | 77 | pygments_style = "pipit" 78 | 79 | 80 | # -- Options for HTML output ------------------------------------------------- 81 | 82 | # The theme to use for HTML and HTML Help pages. See the documentation for 83 | # a list of builtin themes. 84 | # 85 | html_theme = "sphinx_rtd_theme" 86 | 87 | # Theme options are theme-specific and customize the look and feel of a theme 88 | # further. For a list of options available for each theme, see the 89 | # documentation. 90 | # 91 | html_theme_options = { 92 | "canonical_url": "", 93 | "analytics_id": "", 94 | "logo_only": True, 95 | "display_version": True, 96 | "prev_next_buttons_location": "bottom", 97 | "style_external_links": False, 98 | # Toc options 99 | "collapse_navigation": True, 100 | "sticky_navigation": True, 101 | "navigation_depth": 4, 102 | "includehidden": True, 103 | "titles_only": False, 104 | } 105 | 106 | # Add any paths that contain custom static files (such as style sheets) here, 107 | # relative to this directory. They are copied after the builtin static files, 108 | # so a file named "default.css" will overwrite the builtin "default.css". 109 | html_static_path = [] 110 | -------------------------------------------------------------------------------- /docs/developer_guide.rst: -------------------------------------------------------------------------------- 1 | .. Copyright 2023 Parallel Software and Systems Group, University of Maryland. 2 | See the top-level LICENSE file for details. 3 | 4 | SPDX-License-Identifier: MIT 5 | 6 | *************** 7 | Developer Guide 8 | *************** 9 | 10 | -------------------------------------------------------------------------------- /docs/examples/csv_reader.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import pipit as pp 4 | 5 | 6 | if __name__ == "__main__": 7 | # Use pipit's ``from_csv`` API to read in traces in CSV format. 8 | # The result is stored into pipit's Trace data structure. 9 | 10 | trace = pp.Trace.from_csv("../../pipit/tests/data/foo-bar.csv") 11 | 12 | trace.calc_inc_metrics() 13 | print(trace.events) 14 | -------------------------------------------------------------------------------- /docs/examples/hpctoolkit.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import pipit as pp 4 | 5 | 6 | if __name__ == "__main__": 7 | # Path to HPCToolkit traces 8 | dirname = "../../pipit/tests/data/ping-pong-hpctoolkit" 9 | 10 | # Use pipit's ``from_hpctoolkit`` API to read in the traces. 11 | # The result is stored into pipit's Trace data structure. 12 | trace = pp.Trace.from_hpctoolkit(dirname) 13 | 14 | # Printout the DataFrame component of the Trace. 15 | print(trace.events) 16 | -------------------------------------------------------------------------------- /docs/examples/nsight.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import pipit as pp 4 | 5 | 6 | if __name__ == "__main__": 7 | # Path to Nsight traces 8 | filename = "../../pipit/tests/data/nbody-nvtx/trace.csv" 9 | 10 | # Use pipit's ``from_nsight`` API to read in the traces. 11 | # The result is stored into pipit's Trace data structure. 12 | trace = pp.Trace.from_nsight(filename) 13 | 14 | # Printout the DataFrame component of the Trace. 15 | print(trace.events) 16 | -------------------------------------------------------------------------------- /docs/examples/otf2_read.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import pipit as pp 4 | 5 | 6 | if __name__ == "__main__": 7 | # Path to OTF2 traces 8 | dirname = "../../pipit/tests/data/ping-pong-otf2" 9 | 10 | # Use pipit's ``from_otf2`` API to read in the OTF2 traces. 11 | # The result is stored into pipit's Trace data structure. 12 | trace = pp.Trace.from_otf2(dirname) 13 | 14 | # Printout the DataFrame component of the Trace. 15 | print(trace.events) 16 | -------------------------------------------------------------------------------- /docs/examples/projections.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import pipit as pp 4 | 5 | 6 | if __name__ == "__main__": 7 | # Path to OTF2 traces 8 | dirname = "../../pipit/tests/data/ping-pong-projections" 9 | 10 | # Use pipit's ``from_projections`` API to read in the Projections traces. 11 | # The result is stored into pipit's Trace data structure. 12 | trace = pp.Trace.from_projections(dirname) 13 | 14 | # Printout the DataFrame component of the Trace. 15 | print(trace.events) 16 | -------------------------------------------------------------------------------- /docs/getting_started.rst: -------------------------------------------------------------------------------- 1 | .. Copyright 2022-2023 Parallel Software and Systems Group, University of 2 | Maryland. See the top-level LICENSE file for details. 3 | 4 | SPDX-License-Identifier: MIT 5 | 6 | *************** 7 | Getting Started 8 | *************** 9 | 10 | Prerequisites 11 | ============= 12 | 13 | Pipit has the following minimum requirements, which must be installed before 14 | pipit is run: 15 | 16 | #. Python 2 (2.7) or 3 (3.5 - 3.10) 17 | #. pandas 18 | 19 | Pipit is available on `GitHub `_ 20 | 21 | 22 | Installation 23 | ============ 24 | 25 | 26 | Supported data formats 27 | ====================== 28 | 29 | Currently, pipit supports the following data formats as input: 30 | 31 | * `HPCToolkit `_ trace 32 | * OTF2 33 | * Nsight 34 | * Projections 35 | -------------------------------------------------------------------------------- /docs/index.rst: -------------------------------------------------------------------------------- 1 | .. Copyright 2022-2023 Parallel Software and Systems Group, University of 2 | Maryland. See the top-level LICENSE file for details. 3 | 4 | SPDX-License-Identifier: MIT 5 | 6 | .. pipit documentation master file, created by 7 | sphinx-quickstart on Sun Nov 13 14:19:38 2022. 8 | You can adapt this file completely to your liking, but it should at least 9 | contain the root `toctree` directive. 10 | 11 | ##### 12 | Pipit 13 | ##### 14 | 15 | Pipit is a Python library for analyzing parallel execution traces. 16 | 17 | You can get pipit from its `GitHub repository 18 | `_: 19 | 20 | .. code-block:: console 21 | 22 | $ git clone https://github.com/hpcgroup/pipit.git 23 | 24 | 25 | .. toctree:: 26 | :maxdepth: 2 27 | :caption: User Docs 28 | 29 | getting_started 30 | user_guide 31 | 32 | .. toctree:: 33 | :maxdepth: 2 34 | :caption: Developer Docs 35 | 36 | developer_guide 37 | 38 | .. toctree:: 39 | :maxdepth: 2 40 | :caption: API Docs 41 | 42 | Pipit API Docs 43 | 44 | 45 | ################## 46 | Indices and tables 47 | ################## 48 | 49 | * :ref:`genindex` 50 | * :ref:`modindex` 51 | * :ref:`search` 52 | -------------------------------------------------------------------------------- /docs/requirements.txt: -------------------------------------------------------------------------------- 1 | # These dependencies should be installed using pip in order 2 | # to build the documentation. 3 | 4 | sphinx 5 | sphinxcontrib-programoutput 6 | sphinx-rtd-theme 7 | # Restrict to pygments <2.13 8 | pygments <2.13 9 | -------------------------------------------------------------------------------- /docs/source/pipit.readers.rst: -------------------------------------------------------------------------------- 1 | pipit.readers package 2 | ===================== 3 | 4 | Submodules 5 | ---------- 6 | 7 | pipit.readers.hpctoolkit\_reader module 8 | --------------------------------------- 9 | 10 | .. automodule:: pipit.readers.hpctoolkit_reader 11 | :members: 12 | :undoc-members: 13 | :show-inheritance: 14 | 15 | pipit.readers.nsight\_reader module 16 | ----------------------------------- 17 | 18 | .. automodule:: pipit.readers.nsight_reader 19 | :members: 20 | :undoc-members: 21 | :show-inheritance: 22 | 23 | pipit.readers.otf2\_reader module 24 | --------------------------------- 25 | 26 | .. automodule:: pipit.readers.otf2_reader 27 | :members: 28 | :undoc-members: 29 | :show-inheritance: 30 | 31 | pipit.readers.projections\_reader module 32 | ---------------------------------------- 33 | 34 | .. automodule:: pipit.readers.projections_reader 35 | :members: 36 | :undoc-members: 37 | :show-inheritance: 38 | 39 | Module contents 40 | --------------- 41 | 42 | .. automodule:: pipit.readers 43 | :members: 44 | :undoc-members: 45 | :show-inheritance: 46 | -------------------------------------------------------------------------------- /docs/source/pipit.rst: -------------------------------------------------------------------------------- 1 | pipit package 2 | ============= 3 | 4 | Subpackages 5 | ----------- 6 | 7 | .. toctree:: 8 | :maxdepth: 4 9 | 10 | pipit.readers 11 | 12 | Submodules 13 | ---------- 14 | 15 | pipit.graph module 16 | ------------------ 17 | 18 | .. automodule:: pipit.graph 19 | :members: 20 | :undoc-members: 21 | :show-inheritance: 22 | 23 | pipit.trace module 24 | ------------------ 25 | 26 | .. automodule:: pipit.trace 27 | :members: 28 | :undoc-members: 29 | :show-inheritance: 30 | 31 | Module contents 32 | --------------- 33 | 34 | .. automodule:: pipit 35 | :members: 36 | :undoc-members: 37 | :show-inheritance: 38 | -------------------------------------------------------------------------------- /docs/user_guide.rst: -------------------------------------------------------------------------------- 1 | .. Copyright 2022-2023 Parallel Software and Systems Group, University of 2 | Maryland. See the top-level LICENSE file for details. 3 | 4 | SPDX-License-Identifier: MIT 5 | 6 | ********** 7 | User Guide 8 | ********** 9 | 10 | 11 | -------------------------------------------------------------------------------- /logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hpcgroup/pipit/97beb979a126819de6fee1bd221647f4b9e2e6c7/logo.png -------------------------------------------------------------------------------- /pipit/.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | .cache 3 | .pytest_cache 4 | .ipynb_checkpoints 5 | -------------------------------------------------------------------------------- /pipit/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022-2023 Parallel Software and Systems Group, University of 2 | # Maryland. See the top-level LICENSE file for details. 3 | # 4 | # SPDX-License-Identifier: MIT 5 | 6 | from .trace import Trace # noqa: F401 7 | from .util.config import get_option, set_option, reset_option # noqa: F401 8 | -------------------------------------------------------------------------------- /pipit/graph.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022-2023 Parallel Software and Systems Group, University of 2 | # Maryland. See the top-level LICENSE file for details. 3 | # 4 | # SPDX-License-Identifier: MIT 5 | 6 | 7 | class Node: 8 | """Each Node corresponds to a PF tag in the experiment.xml file, and can be 9 | referenced by any calling_context_id directly under it 10 | """ 11 | 12 | def __init__(self, id, parent, level=None) -> None: 13 | self._pipit_nid = id 14 | self.children = [] 15 | self.parent = parent 16 | 17 | if level is None: 18 | self.level = self._calculate_level() 19 | else: 20 | self.level = level 21 | 22 | def add_child(self, child_node): 23 | self.children.append(child_node) 24 | 25 | def get_level(self): 26 | """This function returns the depth of the current node 27 | (a root node would return 0) 28 | """ 29 | return self.level 30 | 31 | def get_intersection(self, node: "Node"): 32 | """Given two nodes, this function returns the interesection of them 33 | starting from their root nodes (least common ancestor) 34 | If the two nodes do not share the same root node, their intersection 35 | would be None, otherwise it returns the nodes that they have in 36 | common (starting from the root) as a new Node 37 | """ 38 | if node is None: 39 | return None 40 | 41 | if self.get_level() > node.get_level(): 42 | node1 = self 43 | node2 = node 44 | else: 45 | node1 = node 46 | node2 = self 47 | 48 | while node1.get_level() > node2.get_level(): 49 | node1 = node1.parent 50 | 51 | while node1 != node2: 52 | node1 = node1.parent 53 | node2 = node2.parent 54 | 55 | return node1 56 | 57 | def get_node_list(self, min_level): 58 | """creates list from current node to node with level min_level 59 | backtracks on the current Node until root or min_level (whichever 60 | comes first) and returns them as a list of Nodes 61 | """ 62 | node = self 63 | return_list = [] 64 | 65 | while node is not None and node.level > min_level: 66 | return_list.append(node) 67 | node = node.parent 68 | 69 | return return_list 70 | 71 | def __str__(self) -> str: 72 | return "ID: " + str(self._pipit_nid) + " -- Level: " + str(self.level) 73 | 74 | def _calculate_level(self): 75 | """private function to get depth of node""" 76 | if self.parent is None: 77 | return 0 78 | else: 79 | return 1 + self.parent._calculate_level() 80 | 81 | def __eq__(self, obj) -> bool: 82 | if isinstance(obj, Node): 83 | return self._pipit_nid == obj._pipit_nid 84 | else: 85 | return False 86 | 87 | 88 | class Graph: 89 | """Represents the calling context tree / call graph""" 90 | 91 | def __init__(self) -> None: 92 | self.roots = [] 93 | 94 | def add_root(self, node): 95 | self.roots.append(node) 96 | 97 | def __str__(self) -> str: 98 | return "Roots: " + str([str(curr_root) for curr_root in self.roots]) 99 | -------------------------------------------------------------------------------- /pipit/readers/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022-2023 Parallel Software and Systems Group, University of 2 | # Maryland. See the top-level LICENSE file for details. 3 | # 4 | # SPDX-License-Identifier: MIT 5 | -------------------------------------------------------------------------------- /pipit/readers/core_reader.py: -------------------------------------------------------------------------------- 1 | from typing import List, Dict 2 | 3 | import pandas 4 | from pipit.trace import Trace 5 | 6 | 7 | class CoreTraceReader: 8 | """ 9 | Helper Object to read traces from different sources and convert them into a common 10 | format 11 | """ 12 | 13 | def __init__(self, start: int = 0, stride: int = 1): 14 | """ 15 | Should be called by each process to create an empty trace per process in the 16 | reader. Creates the following data structures to represent an empty trace: 17 | - events: Dict[int, Dict[int, List[Dict]]] 18 | - stacks: Dict[int, Dict[int, List[int]]] 19 | """ 20 | # keep stride for how much unique id should be incremented 21 | self.stride = stride 22 | 23 | # keep track of a unique id for each event 24 | self.unique_id = start - self.stride 25 | 26 | # events are indexed by process number, then thread number 27 | # stores a list of events 28 | self.events: Dict[int, Dict[int, List[Dict]]] = {} 29 | 30 | # stacks are indexed by process number, then thread number 31 | # stores indices of events in the event list 32 | self.stacks: Dict[int, Dict[int, List[int]]] = {} 33 | 34 | def add_event(self, event: Dict) -> None: 35 | """ 36 | Should be called to add each event to the trace. Will update the event lists and 37 | stacks accordingly. 38 | """ 39 | # get process number -- if not present, set to 0 40 | if "Process" in event: 41 | process = event["Process"] 42 | else: 43 | process = 0 44 | 45 | # get thread number -- if not present, set to 0 46 | if "Thread" in event: 47 | thread = event["Thread"] 48 | else: 49 | thread = 0 50 | # event["Thread"] = 0 51 | 52 | # assign a unique id to the event 53 | event["unique_id"] = self.__get_unique_id() 54 | 55 | # get event list 56 | if process not in self.events: 57 | self.events[process] = {} 58 | if thread not in self.events[process]: 59 | self.events[process][thread] = [] 60 | event_list = self.events[process][thread] 61 | 62 | # get stack 63 | if process not in self.stacks: 64 | self.stacks[process] = {} 65 | if thread not in self.stacks[process]: 66 | self.stacks[process][thread] = [] 67 | stack: List[int] = self.stacks[process][thread] 68 | 69 | # if the event is an enter event, add the event to the stack and update the 70 | # parent-child relationships 71 | if event["Event Type"] == "Enter": 72 | self.__update_parent_child_relationships(event, stack, event_list, False) 73 | elif event["Event Type"] == "Instant": 74 | self.__update_parent_child_relationships(event, stack, event_list, True) 75 | # if the event is a leave event, update the matching event and pop from the 76 | # stack 77 | elif event["Event Type"] == "Leave": 78 | self.__update_match_event(event, stack, event_list) 79 | 80 | # Finally add the event to the event list 81 | event_list.append(event) 82 | 83 | def finalize(self): 84 | """ 85 | Converts the events data structure into a pandas dataframe and returns it 86 | """ 87 | all_events = [] 88 | for process in self.events: 89 | for thread in self.events[process]: 90 | all_events.extend(self.events[process][thread]) 91 | 92 | # create a dataframe 93 | trace_df = pandas.DataFrame(all_events) 94 | 95 | trace_df["_matching_event"].fillna(-1, inplace=True) 96 | trace_df["_parent"].fillna(-1, inplace=True) 97 | trace_df["_matching_timestamp"].fillna(-1, inplace=True) 98 | 99 | # categorical for memory savings 100 | trace_df = trace_df.astype( 101 | { 102 | "Name": "category", 103 | "Event Type": "category", 104 | "Process": "category", 105 | "_matching_event": "int32", 106 | "_parent": "int32", 107 | "_matching_timestamp": "int32", 108 | } 109 | ) 110 | return trace_df 111 | 112 | def __update_parent_child_relationships( 113 | self, event: Dict, stack: List[int], event_list: List[Dict], is_instant: bool 114 | ) -> None: 115 | """ 116 | This method can be thought of the update upon an "Enter" event. It adds to the 117 | stack and CCT 118 | """ 119 | if len(stack) == 0: 120 | # root event 121 | event["_parent"] = -1 122 | else: 123 | parent_event = event_list[stack[-1]] 124 | event["_parent"] = parent_event["unique_id"] 125 | 126 | # update stack 127 | if not is_instant: 128 | stack.append(len(event_list)) 129 | 130 | def __update_match_event( 131 | self, leave_event: Dict, stack: List[int], event_list: List[Dict] 132 | ) -> None: 133 | """ 134 | This method can be thought of the update upon a "Leave" event. It pops from the 135 | stack and updates the event list. We should look into using this function to add 136 | artificial "Leave" events for unmatched "Enter" events 137 | """ 138 | 139 | while len(stack) > 0: 140 | 141 | # popping matched events from the stack 142 | enter_event = event_list[stack.pop()] 143 | 144 | if enter_event["Name"] == leave_event["Name"]: 145 | # matching event found 146 | 147 | # update matching event ids 148 | leave_event["_matching_event"] = enter_event["unique_id"] 149 | enter_event["_matching_event"] = leave_event["unique_id"] 150 | 151 | # update matching timestamps 152 | leave_event["_matching_timestamp"] = enter_event["Timestamp (ns)"] 153 | enter_event["_matching_timestamp"] = leave_event["Timestamp (ns)"] 154 | 155 | break 156 | 157 | def __get_unique_id(self) -> int: 158 | self.unique_id += self.stride 159 | return self.unique_id 160 | 161 | 162 | def concat_trace_data(data_list): 163 | """ 164 | Concatenates the data from multiple trace readers into a single trace reader 165 | """ 166 | trace_data = pandas.concat(data_list, ignore_index=True) 167 | # set index to unique_id 168 | trace_data.set_index("unique_id", inplace=True) 169 | trace_data.sort_values( 170 | by="Timestamp (ns)", axis=0, ascending=True, inplace=True, ignore_index=True 171 | ) 172 | return Trace(None, trace_data, None) 173 | -------------------------------------------------------------------------------- /pipit/readers/nsight_reader.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022-2023 Parallel Software and Systems Group, University of 2 | # Maryland. See the top-level LICENSE file for details. 3 | # 4 | # SPDX-License-Identifier: MIT 5 | 6 | import pandas as pd 7 | import pipit.trace 8 | 9 | 10 | class NsightReader: 11 | """Reader for Nsight trace files""" 12 | 13 | def __init__(self, file_name, create_cct=False) -> None: 14 | self.file_name = file_name 15 | self.df = None 16 | self.create_cct = create_cct 17 | 18 | def read(self): 19 | """ 20 | This read function directly takes in a csv of the trace report and 21 | utilizes pandas to convert it from a csv into a dataframe. 22 | """ 23 | 24 | # Read in csv 25 | self.df = pd.read_csv(self.file_name) 26 | 27 | # Grab the set of the column PID columns to see if 28 | # mutliprocess and convert to a list 29 | pid = set(self.df["PID"]) 30 | 31 | # check if PID and TID are NOT the same. singlethreaded or multithreaded 32 | if self.df["PID"].equals(self.df["TID"]) is False: 33 | # Group the pids together and give each process it's own set of threads 34 | for i in pid: 35 | # Seeing where the rows of the PIDs match. Grabbing the rows in mask 36 | mask = self.df["PID"] == i 37 | # Creating a set from the matching PID rows dataframe of the TIDs 38 | tid = set(self.df[mask]["TID"]) 39 | # Getting the TID set, creating a dictionary, 40 | # and increment the values (0,1,2,...) 41 | tid_dict = dict(zip(tid, range(0, len(tid)))) 42 | # Grabbing the rows with mask and setting the thread column by 43 | # mapping the tids with the tid_dict 44 | self.df.loc[mask, "Thread"] = self.df["TID"].map(tid_dict) 45 | # Converting Thread from float to int 46 | self.df["Thread"] = self.df["Thread"].astype(int) 47 | 48 | # check if PID set is > 1, if so multiprocess or single process 49 | if len(pid) > 1: 50 | # Set Process column to PID 51 | self.df["Process"] = self.df["PID"] 52 | # Getting the PID set, creating a dictionary, 53 | # and increment the values (0,1,2,...) 54 | pid_dict = dict(zip(pid, range(0, len(pid)))) 55 | # Using the dictionary to replace the Process values 56 | self.df["Process"].replace(pid_dict, inplace=True) 57 | 58 | # Copy self.df to create enter and leave rows 59 | df2 = self.df.copy() 60 | 61 | # Create new columns for self.df with start time to create enter rows 62 | self.df["Event Type"] = "Enter" 63 | self.df["Timestamp (ns)"] = self.df["Start (ns)"] 64 | 65 | # Create new columns for df2 with end time to create leave rows 66 | df2["Event Type"] = "Leave" 67 | df2["Timestamp (ns)"] = df2["End (ns)"] 68 | 69 | # Combine dataframes together 70 | self.df = pd.concat([self.df, df2]) 71 | 72 | # Tidy Dataframe 73 | self.df.drop(["Start (ns)", "End (ns)"], axis=1, inplace=True) 74 | 75 | self.df.sort_values(by="Timestamp (ns)", ascending=True, inplace=True) 76 | 77 | self.df.reset_index(drop=True, inplace=True) 78 | 79 | self.df = self.df.astype( 80 | { 81 | "Event Type": "category", 82 | "Name": "category", 83 | "PID": "category", 84 | "TID": "category", 85 | } 86 | ) 87 | 88 | # Grabbing the list of columns and rearranging them to put 89 | # Timestamp, Event Types, Name, Thread (potentially), 90 | # Process(potentially) in the front of the dataframe 91 | cols = list(self.df) 92 | cols.insert(0, cols.pop(cols.index("Timestamp (ns)"))) 93 | cols.insert(1, cols.pop(cols.index("Event Type"))) 94 | cols.insert(2, cols.pop(cols.index("Name"))) 95 | 96 | if "Process" in self.df.columns: 97 | cols.insert(3, cols.pop(cols.index("Process"))) 98 | if "Thread" in self.df.columns: 99 | cols.insert(3, cols.pop(cols.index("Thread"))) 100 | 101 | elif "Thread" in self.df.columns: 102 | cols.insert(3, cols.pop(cols.index("Thread"))) 103 | 104 | # Applying the column list to the dataframe to rearrange 105 | self.df = self.df.loc[:, cols] 106 | 107 | trace = pipit.trace.Trace(None, self.df) 108 | if self.create_cct: 109 | trace.create_cct() 110 | 111 | return trace 112 | -------------------------------------------------------------------------------- /pipit/readers/nsight_sqlite_reader.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | import pipit.trace 4 | import sqlite3 5 | 6 | 7 | class NSightSQLiteReader: 8 | # Dictionary mapping trace type 9 | # (e.g. NVTX, CUDA API to SQL queries) 10 | _trace_queries = { 11 | "nvtx": [ 12 | """ 13 | SELECT 14 | start as Enter, 15 | end as Leave, 16 | 'annotation' as type, 17 | IFNULL(text, StringIds.value) as "Name", 18 | (ne.globalTid >> 24) & 0x00FFFFFF AS "Process", 19 | ne.globalTid & 0x00FFFFFF AS "Thread", 20 | jsonText as meta 21 | FROM 22 | NVTX_EVENTS as ne 23 | LEFT JOIN StringIds 24 | ON StringIds.id = ne.textId 25 | WHERE 26 | -- Filter to only include range start/end and push/pop events 27 | ne.eventType in (59, 60) 28 | """ 29 | ], 30 | "cuda_api": [ 31 | """ 32 | SELECT 33 | start as Enter, 34 | end as Leave, 35 | rname.value AS Name, 36 | (cuda_api.globalTid >> 24) & 0x00FFFFFF AS "Process", 37 | cuda_api.globalTid & 0x00FFFFFF AS "Thread", 38 | correlationId As id, 39 | null as meta 40 | FROM 41 | CUPTI_ACTIVITY_KIND_RUNTIME as cuda_api 42 | JOIN ThreadNames AS tname 43 | ON cuda_api.globalTid == tname.globalTid 44 | JOIN 45 | StringIds AS rname 46 | ON cuda_api.nameId = rname.id 47 | JOIN 48 | StringIds AS rname2 49 | ON tname.nameId = rname2.id 50 | """ 51 | ], 52 | "gpu_trace": [ 53 | """ 54 | SELECT 55 | cuda_gpu.start as Enter, 56 | cuda_gpu.end as Leave, 57 | cuda_gpu.deviceId as gpuId, 58 | value as Name, 59 | cuda_gpu.streamId, 60 | 'kernel' as type, 61 | null as bytes, 62 | cuda_gpu.correlationId as id, 63 | (cuda_api.globalTid >> 24) & 0x00FFFFFF AS "Process", 64 | null as meta 65 | FROM CUPTI_ACTIVITY_KIND_KERNEL as cuda_gpu 66 | JOIN StringIds 67 | ON cuda_gpu.shortName = StringIds.id 68 | JOIN CUPTI_ACTIVITY_KIND_RUNTIME as cuda_api 69 | ON cuda_gpu.correlationId = cuda_api.correlationId 70 | """, 71 | """ 72 | SELECT 73 | cuda_memcpy.start as Enter, 74 | cuda_memcpy.end as Leave, 75 | cuda_memcpy.deviceId as gpuId, 76 | memcpy_labels.name as Name, 77 | cuda_memcpy.streamId, 78 | 'cuda_memcpy' as type, 79 | bytes, 80 | cuda_memcpy.correlationId as id, 81 | (cuda_api.globalTid >> 24) & 0x00FFFFFF AS "Process", 82 | null as meta 83 | FROM CUPTI_ACTIVITY_KIND_MEMCPY as cuda_memcpy 84 | JOIN ENUM_CUDA_MEMCPY_OPER as memcpy_labels 85 | ON cuda_memcpy.copyKind = memcpy_labels.id 86 | JOIN CUPTI_ACTIVITY_KIND_RUNTIME as cuda_api 87 | ON cuda_memcpy.correlationId = cuda_api.correlationId 88 | """, 89 | """ 90 | SELECT 91 | cuda_memset.start as Enter, 92 | cuda_memset.end as Leave, 93 | cuda_memset.deviceId as gpuId, 94 | memset_labels.name as Name, 95 | streamId, 96 | 'cuda_memset' as type, 97 | bytes, 98 | cuda_memset.correlationId as id, 99 | (cuda_api.globalTid >> 24) & 0x00FFFFFF AS "Process", 100 | null as meta 101 | FROM CUPTI_ACTIVITY_KIND_MEMSET as cuda_memset 102 | JOIN ENUM_CUDA_MEM_KIND as memset_labels 103 | ON cuda_memset.memKind = memset_labels.id 104 | JOIN CUPTI_ACTIVITY_KIND_RUNTIME as cuda_api 105 | ON cuda_memset.correlationId = cuda_api.correlationId 106 | """, 107 | """ 108 | SELECT 109 | cuda_sync.start as Enter, 110 | cuda_sync.end as Leave, 111 | cuda_sync.deviceId as gpuId, 112 | sync_labels.name as Name, 113 | cuda_sync.streamId, 114 | 'cuda_sync' as type, 115 | null as bytes, 116 | cuda_sync.correlationId as id, 117 | (cuda_api.globalTid >> 24) & 0x00FFFFFF AS "Process", 118 | null as meta 119 | FROM CUPTI_ACTIVITY_KIND_SYNCHRONIZATION as cuda_sync 120 | JOIN ENUM_CUPTI_SYNC_TYPE as sync_labels 121 | ON cuda_sync.syncType = sync_labels.id 122 | JOIN CUPTI_ACTIVITY_KIND_RUNTIME as cuda_api 123 | ON cuda_sync.correlationId = cuda_api.correlationId 124 | """, 125 | """ 126 | SELECT 127 | cuda_graph.start as Enter, 128 | cuda_graph.end as Leave, 129 | cuda_graph.deviceId as gpuId, 130 | -- CUDA Graphs are not name-able, so we use their id 131 | -- instead 132 | 'CUDA Graph ' || cuda_graph.graphId as Name, 133 | cuda_graph.streamId, 134 | 'cuda_graph' as type, 135 | null as bytes, 136 | cuda_graph.correlationId as id, 137 | (cuda_api.globalTid >> 24) & 0x00FFFFFF AS "Process", 138 | null as meta 139 | FROM CUPTI_ACTIVITY_KIND_GRAPH_TRACE as cuda_graph 140 | JOIN CUPTI_ACTIVITY_KIND_RUNTIME as cuda_api 141 | ON cuda_graph.correlationId = cuda_api.correlationId 142 | """, 143 | ], 144 | # TODO: reading in all the gpu metrics takes up a lot of memory 145 | # We should figure out which ones we want exactly 146 | # "gpu_metrics": """ 147 | # SELECT GENERIC_EVENTS.rawTimestamp, typeId, data 148 | # FROM GPU_METRICS 149 | # LEFT JOIN GENERIC_EVENTS 150 | # ON GENERIC_EVENTS.typeId = GPU_METRICS.typeId 151 | # """ 152 | } 153 | 154 | def __init__(self, filepath, create_cct=False, trace_types="all") -> None: 155 | self.conn = sqlite3.connect(filepath) 156 | self.create_cct = create_cct 157 | # Get all the table names that exist 158 | # Sometimes, things like the GPU metrics and stuff might not 159 | # exist 160 | get_tables_query = """ 161 | SELECT name FROM sqlite_master WHERE type='table' 162 | """ 163 | self.table_names = set(pd.read_sql_query(get_tables_query, self.conn).squeeze()) 164 | self.trace_queries = NSightSQLiteReader._trace_queries.copy() 165 | if trace_types == "all": 166 | # Even nsight has separate analyses for CUDA API summary, etc. 167 | # We do need a way to compare multiple traces side by side, though 168 | 169 | # Some traces (their tables, e.g. NVTX_EVENTS) may not always be present 170 | # in the sqlite db 171 | # Make sure that all tables that we read in queries are accounted for here 172 | self.trace_types = [] 173 | if "NVTX_EVENTS" in self.table_names: 174 | self.trace_types.append("nvtx") 175 | if "CUPTI_ACTIVITY_KIND_RUNTIME" in self.table_names: 176 | self.trace_types.append("cuda_api") 177 | self.trace_types.append("gpu_trace") 178 | 179 | # GPU metrics are disabled, see comment above 180 | # if "GPU_METRICS" in self.table_names: 181 | # self.trace_types.append("gpu_metrics") 182 | else: 183 | self.trace_types = trace_types 184 | 185 | if "gpu_trace" in self.trace_types: 186 | # Check for existance of CUDA_ACTIVITY_KIND_MEMCPY/ 187 | # CUDA_ACTIVITY_KIND_MEMSET since those can sometimes not exist 188 | 189 | gpu_trace_qs = [] 190 | gpu_trace_needed_tbls = [ 191 | "CUPTI_ACTIVITY_KIND_RUNTIME", 192 | "CUPTI_ACTIVITY_KIND_MEMCPY", 193 | "CUPTI_ACTIVITY_KIND_MEMSET", 194 | "CUPTI_ACTIVITY_KIND_SYNCHRONIZATION", 195 | "CUPTI_ACTIVITY_KIND_GRAPH_TRACE", 196 | ] 197 | 198 | for req_tbl, q in zip( 199 | gpu_trace_needed_tbls, 200 | NSightSQLiteReader._trace_queries["gpu_trace"], 201 | strict=True, 202 | ): 203 | if req_tbl in self.table_names: 204 | gpu_trace_qs.append(q) 205 | self.trace_queries["gpu_trace"] = gpu_trace_qs 206 | 207 | def read(self) -> pipit.trace.Trace: 208 | traces = [] 209 | 210 | for typ in self.trace_types: 211 | dfs = [] 212 | for q in self.trace_queries[typ]: 213 | dfs.append(pd.read_sql_query(q, con=self.conn)) 214 | df = pd.concat(dfs, axis=0) 215 | df["Trace Type"] = typ 216 | traces.append(df) 217 | 218 | # concat traces together row wise 219 | trace_df = pd.concat(traces, axis=0) 220 | 221 | # Melt start/end columns into single event type column 222 | trace_df = pd.melt( 223 | trace_df, 224 | # These are the columns we don't want to melt 225 | # Columns not in here will be melted into a single column 226 | id_vars=[col for col in df.columns if col not in {"Enter", "Leave"}], 227 | value_vars=["Enter", "Leave"], 228 | var_name="Event Type", 229 | value_name="Timestamp (ns)", 230 | ) 231 | 232 | # Convert to the pandas nullable dtypes 233 | # This will help preserve e.g. streamId as an 234 | # integer column with nulls instead of casting to 235 | # float64 236 | trace_df = trace_df.convert_dtypes() 237 | 238 | # Cache mapping 239 | trace_df["_matching_event"] = np.concatenate( 240 | [ 241 | np.arange(len(trace_df) // 2, len(trace_df)), 242 | np.arange(0, len(trace_df) // 2), 243 | ] 244 | ) 245 | # Convert to numpy before assignment otherwise pandas 246 | # will try to align indices, which will mess up order 247 | trace_df["_matching_timestamp"] = trace_df["Timestamp (ns)"][ 248 | trace_df["_matching_event"] 249 | ].to_numpy() 250 | 251 | # Cannot use ignore_index = True since that breaks the 252 | # _matching_event col 253 | trace_df = trace_df.sort_values(by="Timestamp (ns)") 254 | 255 | if self.trace_types == ["gpu_trace"]: 256 | parallelism_levels = ["gpuId", "streamId"] 257 | elif self.trace_types == ["cuda_api"]: 258 | parallelism_levels = ["Process"] 259 | else: 260 | parallelism_levels = ["Process", "gpuId", "streamId"] 261 | 262 | trace = pipit.trace.Trace(None, trace_df, parallelism_levels=parallelism_levels) 263 | if self.create_cct: 264 | trace.create_cct() 265 | 266 | # Call match caller callee to recreate hierarchical 267 | # relationship between annotations 268 | trace._match_caller_callee() 269 | 270 | # Associate CUDA API calls with memory operations or 271 | # kernel launches 272 | # Note: looking at _match_caller_callee 273 | # _parent should point to the "Enter" event of the parent 274 | # _children also points to the "Enter" events of the children of 1 node 275 | 276 | enter_mask = trace_df["Event Type"] == "Enter" 277 | cuda_api_mask = trace_df["Trace Type"] == "cuda_api" 278 | calls_that_launch = ( 279 | trace_df.loc[cuda_api_mask & enter_mask] 280 | .reset_index() 281 | .merge( 282 | trace_df.loc[~cuda_api_mask & enter_mask].reset_index(), 283 | on="id", 284 | how="inner", 285 | ) 286 | ) 287 | # TODO: can get rid of the apply if we use an Arrow ListDtype for children 288 | # globally 289 | children = calls_that_launch["index_y"].apply(lambda x: [x]) 290 | # Convert to numpy otherwise the index messes stuff up 291 | trace_df.loc[calls_that_launch["index_x"].to_numpy(), "_children"] = ( 292 | children.to_numpy() 293 | ) 294 | trace_df.loc[calls_that_launch["index_y"].to_numpy(), "_parent"] = ( 295 | calls_that_launch["index_x"].to_numpy() 296 | ) 297 | 298 | return trace 299 | -------------------------------------------------------------------------------- /pipit/readers/otf2_reader.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022-2023 Parallel Software and Systems Group, University of 2 | # Maryland. See the top-level LICENSE file for details. 3 | # 4 | # SPDX-License-Identifier: MIT 5 | 6 | import otf2 7 | import numpy as np 8 | import pandas as pd 9 | import multiprocessing as mp 10 | import pipit.trace 11 | 12 | 13 | class OTF2Reader: 14 | """Reader for OTF2 trace files""" 15 | 16 | def __init__(self, dir_name, num_processes=None, create_cct=False): 17 | self.dir_name = dir_name # directory of otf2 file being read 18 | self.file_name = self.dir_name + "/traces.otf2" 19 | self.create_cct = create_cct 20 | 21 | num_cpus = mp.cpu_count() 22 | if num_processes is None or num_processes < 1 or num_processes > num_cpus: 23 | # uses all processes to parallelize reading by default 24 | self.num_processes = num_cpus 25 | else: 26 | self.num_processes = num_processes 27 | 28 | def field_to_val(self, field): 29 | """ 30 | Handles otf2 and _otf2 objects 31 | 32 | Arguments: 33 | field: an otf2 object, _otf2 object, or any other field 34 | that can have different data types such as strings, ints, etc 35 | 36 | Returns: 37 | if otf2 definition, a string representation of the definition and 38 | its ID such as "Region 19" that the user can use to refer back 39 | to the definitions dataframe 40 | else if other otf2 or _otf2 objects, a simple string representation of 41 | the object 42 | else don't make any changes 43 | 44 | This function also ensures that there is no pickling of otf2 or _otf2 45 | objects, which could cause errors 46 | """ 47 | 48 | """ 49 | Note: any occurrence of [25:-2] or something similar 50 | is some simple string manipulation to only extract the relevant 51 | part of the string and not information like the type such as 52 | otf2.definitions, etc 53 | """ 54 | 55 | field_type = str(type(field)) 56 | if "otf2.definitions" in field_type: 57 | """ 58 | Example: An event can have an attribute called region which corresponds 59 | to a definition. We strip the string and extract only the relevant 60 | information, which is the type of definition such as Region and also 61 | append its id (like Region 6) so that this definition can be accessed 62 | in the Definitions DataFrame 63 | """ 64 | return field_type[25:-2] + " " + str(getattr(field, "_ref")) 65 | elif "_otf2" in field_type or "otf2" in field_type: 66 | """ 67 | Example: A measurement event has an attribute called measurement mode 68 | which is either MeasurementMode.OFF or MeasurementMode.ON. These are not 69 | definitions, but they are an object in the lower level _otf2 library, 70 | and to ensure no pickling errors, I convert these objects to their 71 | string representation 72 | """ 73 | return str(field) 74 | else: 75 | "not an otf2 type, then just return normally" 76 | return field 77 | 78 | def handle_data(self, data): 79 | """ 80 | Handles different data structures 81 | 82 | Arguments: 83 | data: could be a list, tuple, set, dict, or any other python data type 84 | 85 | Returns: 86 | the same data structure as the passed argument but field_to_val is applied 87 | to all of the values it contains 88 | 89 | Note: all of the below cases handle the case where the data structure 90 | could be nested, which is always possibility depending on the trace's 91 | specific attributes 92 | """ 93 | 94 | if isinstance(data, list): 95 | return [self.handle_data(data_element) for data_element in data] 96 | elif isinstance(data, dict): 97 | """ 98 | Example: ProgramBegin events have an attribute that is a definition 99 | and quite ironically, also known as attribute. These are stored in 100 | a dictionary where the key is a definition like "Attribute 2" and 101 | the integer like 15968 102 | """ 103 | return { 104 | self.field_to_val(data_key): self.handle_data(data_value) 105 | for data_key, data_value in data.items() 106 | } 107 | elif isinstance(data, tuple): 108 | """ 109 | Example: There is a definition called CartTopology which has a 110 | field called dimensions that is a tuple of two other definitions 111 | called CartDimensions, showing why this nested structure is needed 112 | """ 113 | return tuple([self.handle_data(data_element) for data_element in data]) 114 | elif isinstance(data, set): 115 | """ 116 | Haven't encountered this type, but added just in case any situations like 117 | the above ones do arise for this data type 118 | """ 119 | return set([self.handle_data(data_element) for data_element in data]) 120 | else: 121 | "this represents the case for most fields/attributes" 122 | return self.field_to_val(data) 123 | 124 | def fields_to_dict(self, def_object): 125 | """ 126 | converts the fields in the attribute column of a definition 127 | object to a dictionary 128 | """ 129 | 130 | fields_dict = {} 131 | # iterates through the fields of the definition 132 | # (ex: region has fields like name, paradigm source file, etc) 133 | for field in def_object._fields: 134 | field_name = str(field.name) 135 | # use the handle_data function to process the field's data appropriately 136 | fields_dict[field_name] = self.handle_data(getattr(def_object, field_name)) 137 | 138 | if len(fields_dict) == 1: 139 | # collapse single dictionaries to a value 140 | return list(fields_dict.values())[0] 141 | else: 142 | return fields_dict 143 | 144 | def events_reader(self, rank_size): 145 | """ 146 | Serial events reader that reads a subset of the trace 147 | 148 | Arguments: 149 | rank_size: a tuple containing the rank of the process 150 | and the size/total number of processors that are being used 151 | 152 | Returns: 153 | a dictionary with a subset of the trace events that can be converted 154 | to a dataframe 155 | """ 156 | 157 | with otf2.reader.open(self.file_name) as trace: 158 | # extracts the rank and size 159 | # and gets all the locations 160 | # of the trace 161 | rank, size = rank_size[0], rank_size[1] 162 | locations = list(trace.definitions._locations) 163 | num_locations = len(locations) 164 | 165 | # base number of locations read by each process 166 | per_process = int(num_locations // size) 167 | 168 | # remainder number of locations to be split evenly 169 | remainder = int(num_locations % size) 170 | 171 | if rank < remainder: 172 | """ 173 | Example: 174 | For the reading of 30 locations split over 14 processes, 175 | first 2 processes will read 3 locations each since the remainder 176 | is 2. 177 | """ 178 | begin_int = rank * (per_process + 1) 179 | end_int = (rank + 1) * (per_process + 1) 180 | else: 181 | """ 182 | Example: 183 | For the reading of 30 locations split over 14 processes, 184 | last 12 processes will read 2 locations each. The starting index 185 | accounts for the fact that the first two will read 3 locations each. 186 | """ 187 | begin_int = (rank * per_process) + remainder 188 | end_int = ((rank + 1) * per_process) + remainder 189 | 190 | # select the locations to read based on above calculations 191 | loc_events = list(trace.events(locations[begin_int:end_int]).__iter__()) 192 | 193 | # columns of the DataFrame 194 | timestamps, event_types, event_attributes, names = [], [], [], [] 195 | 196 | # note: the below lists are for storing logical ids 197 | process_ids, thread_ids = [], [] 198 | 199 | """ 200 | Relevant Documentation for Metrics: 201 | https://scorepci.pages.jsc.fz-juelich.de/otf2-pipelines/doc.r4707/python/basics.html#metrics 202 | """ 203 | 204 | # get members of metric class 205 | metric_members = ( 206 | self.definitions.loc[ 207 | self.definitions["Definition Type"] == "MetricClass" 208 | ]["Attributes"] 209 | .map(lambda attr: attr["members"]) 210 | .values 211 | ) 212 | metric_members = [] if len(metric_members) == 0 else metric_members[0] 213 | 214 | # ids of metric members 215 | metric_ids = list( 216 | map(lambda metric_member: int(metric_member[-1]), metric_members) 217 | ) 218 | 219 | # names of metrics 220 | metric_names = ( 221 | self.definitions.loc[ 222 | (self.definitions["Definition Type"] == "MetricMember") 223 | & (self.definitions["ID"].isin(metric_ids)) 224 | ]["Attributes"] 225 | .map(lambda attr: attr["name"]) 226 | .values 227 | ) 228 | 229 | # maps each metric to a list of its values 230 | metrics_dict = {metric_name: [] for metric_name in metric_names} 231 | 232 | # used to keep track of time that the 233 | # most recent metrics that were read at 234 | prev_metric_time = -1 235 | 236 | # iterates through the events and processes them 237 | for loc_event in loc_events: 238 | # extracts the location and event 239 | # location could be thread, process, etc 240 | loc, event = loc_event[0], loc_event[1] 241 | 242 | # To Do: 243 | # Support for GPU events has to be 244 | # added and unified across readers. 245 | if str(loc.type)[13:] == "CPU_THREAD": 246 | # don't add metric events as a separate row, 247 | # and add their values into columns instead 248 | if isinstance(event, otf2.events.Metric): 249 | # Since the location is a cpu thread, we know 250 | # that the metric event is of type MetricClass, 251 | # which has a list of MetricMembers. 252 | metrics = list( 253 | map(lambda metric: metric.name, event.metric.members) 254 | ) 255 | metric_values = event.values 256 | 257 | # append the values for the metrics 258 | # to their appropriate lists 259 | for i in range(len(metrics)): 260 | metrics_dict[metrics[i]].append(metric_values[i]) 261 | 262 | # store the metrics and their timestamp 263 | prev_metric_time = event.time 264 | else: 265 | # MetricClass metric events are synchronous 266 | # and coupled with an enter or leave event that 267 | # has the same timestamp 268 | if event.time != prev_metric_time: 269 | # if the event is not paired with any metric, then 270 | # add placeholders for all the metric lists 271 | for metric in metric_names: 272 | metrics_dict[metric].append(float("nan")) 273 | 274 | # reset this as a metric event was not read 275 | prev_metric_time = -1 276 | 277 | """ 278 | Below is code to read the primary information about the 279 | non-metric event, such as location, attributes, etc. 280 | """ 281 | 282 | process_id = loc.group._ref 283 | process_ids.append(process_id) 284 | 285 | # subtract the minimum location number of a process 286 | # from the location number to get threads numbered 287 | # 0 to (num_threads per process - 1) for each process. 288 | thread_ids.append( 289 | loc._ref - self.process_threads_map[process_id] 290 | ) 291 | 292 | # type of event - enter, leave, or other types 293 | event_type = str(type(event))[20:-2] 294 | if event_type == "Enter" or event_type == "Leave": 295 | event_types.append(event_type) 296 | else: 297 | event_types.append("Instant") 298 | 299 | if event_type in ["Enter", "Leave"]: 300 | names.append(event.region.name) 301 | else: 302 | names.append(event_type) 303 | 304 | timestamps.append(event.time) 305 | 306 | # only add attributes for non-leave rows so that 307 | # there aren't duplicate attributes for a single event 308 | if event_type != "Leave": 309 | attributes_dict = {} 310 | 311 | # iterates through the event's attributes 312 | # (ex: region, bytes sent, etc) 313 | for key, value in vars(event).items(): 314 | # only adds non-empty attributes 315 | # and ignores time so there isn't a duplicate time 316 | if value is not None and key != "time": 317 | # uses field_to_val to convert all data types 318 | # and ensure that there are no pickling errors 319 | attributes_dict[self.field_to_val(key)] = ( 320 | self.handle_data(value) 321 | ) 322 | event_attributes.append(attributes_dict) 323 | else: 324 | # nan attributes for leave rows 325 | # attributes column is of object dtype 326 | event_attributes.append(None) 327 | 328 | trace.close() # close event files 329 | 330 | # returns dataframe with all events and their fields 331 | trace_df = pd.DataFrame( 332 | { 333 | "Timestamp (ns)": timestamps, 334 | "Event Type": event_types, 335 | "Name": names, 336 | "Thread": thread_ids, 337 | "Process": process_ids, 338 | "Attributes": event_attributes, 339 | } 340 | ) 341 | 342 | for metric, metric_values in metrics_dict.items(): 343 | # only add columns of metrics which are populated with 344 | # some values (sometimes a metric could be defined but not 345 | # appear in the trace itself) 346 | if not np.isnan(metric_values).all(): 347 | trace_df[metric] = metric_values 348 | 349 | return trace_df 350 | 351 | def read_definitions(self, trace): 352 | """ 353 | Reads the definitions from the trace and converts them to a Pandas 354 | DataFrame 355 | """ 356 | 357 | # OTF2 stores locations numbered from 0 to the (total number of threads - 1) 358 | # across all processes. This dict will help us convert those to be orderered 359 | # from 0 to (number of threads for each process - 1) per process instead. 360 | self.process_threads_map = dict() 361 | 362 | # ids are the _ref attribute of an object 363 | # all objects stored in a reference registry 364 | # (such as regions) have such an id 365 | def_name, def_id, attributes = [], [], [] 366 | 367 | # iterating through definition registry attributes 368 | # such as regions, strings, locations, etc 369 | for key in vars(trace.definitions).keys(): 370 | # current attribute such as region, string, etc 371 | def_attribute = getattr(trace.definitions, str(key)) 372 | 373 | # only definition type that is not a registry 374 | if key == "clock_properties": 375 | # clock properties doesn't have an ID 376 | def_id.append(float("NaN")) 377 | def_name.append(str(type(def_attribute))[25:-2]) 378 | attributes.append(self.fields_to_dict(def_attribute)) 379 | 380 | # ignores otf2 wrapper properties (don't provide useful info) 381 | elif "otf2" not in key: 382 | """ 383 | iterate through registry elements 384 | (ex: iterating through all regions 385 | if region is the current definition) 386 | def_object is a single object of that definition 387 | type for example, if def_attribute is regions, 388 | then def_object is a single region being looked at 389 | """ 390 | for def_object in def_attribute.__iter__(): 391 | # add to process threads map dict if you encounter a new location 392 | if ( 393 | key == "_locations" 394 | and str(def_object.type) == "LocationType.CPU_THREAD" 395 | ): 396 | location_num, process_num = ( 397 | def_object._ref, 398 | def_object.group._ref, 399 | ) 400 | 401 | # each process (location group) will be mapped to its 402 | # minimum location number, which we will use to number threads 403 | # appropriately by subtracting that min from its location nums 404 | if process_num not in self.process_threads_map: 405 | self.process_threads_map[process_num] = location_num 406 | elif location_num < self.process_threads_map[process_num]: 407 | self.process_threads_map[process_num] = location_num 408 | 409 | if hasattr(def_object, "_ref"): 410 | # only add ids for those definitions that have it 411 | def_id.append(def_object._ref) 412 | else: 413 | # ID column is of float64 dtype 414 | def_id.append(float("NaN")) 415 | 416 | # name of the definition 417 | def_name.append(str(type(def_object))[25:-2]) 418 | 419 | # converts a definition object to a dictionary of its attributes 420 | # this contains information that a user would have to access the 421 | # definitions DataFrame for 422 | attributes.append(self.fields_to_dict(def_object)) 423 | 424 | # return the definitions as a DataFrame 425 | definitions_dataframe = pd.DataFrame( 426 | {"Definition Type": def_name, "ID": def_id, "Attributes": attributes} 427 | ) 428 | 429 | # Definition column is of categorical dtype 430 | definitions_dataframe = definitions_dataframe.astype( 431 | {"Definition Type": "category"} 432 | ) 433 | 434 | return definitions_dataframe 435 | 436 | def read_events(self): 437 | """ 438 | Writes the events to a Pandas DataFrame 439 | using the multiprocessing library and the events_reader 440 | function 441 | """ 442 | 443 | # parallelizes the reading of events 444 | # using the multiprocessing library 445 | pool_size, pool = self.num_processes, mp.Pool(self.num_processes) 446 | 447 | # list of dataframes returned by the processes pool 448 | events_dataframes = pool.map( 449 | self.events_reader, [(rank, pool_size) for rank in range(pool_size)] 450 | ) 451 | 452 | pool.close() 453 | 454 | # merges the dataframe into one events dataframe 455 | events_dataframe = pd.concat(events_dataframes) 456 | del events_dataframes 457 | 458 | # accessing the clock properties of the trace using the definitions 459 | clock_properties = self.definitions.loc[ 460 | self.definitions["Definition Type"] == "ClockProperties" 461 | ]["Attributes"].values[0] 462 | offset, resolution = ( 463 | clock_properties["global_offset"], 464 | clock_properties["timer_resolution"], 465 | ) 466 | 467 | # shifting the timestamps by the global offset 468 | # and dividing by the resolution to convert to nanoseconds 469 | # as per OTF2's website 470 | events_dataframe["Timestamp (ns)"] -= offset 471 | events_dataframe["Timestamp (ns)"] *= (10**9) / resolution 472 | 473 | # ensures the DataFrame is in order of increasing timestamp 474 | events_dataframe.sort_values( 475 | by="Timestamp (ns)", axis=0, ascending=True, inplace=True, ignore_index=True 476 | ) 477 | 478 | # convert these to ints 479 | # (sometimes they get converted to floats 480 | # while concatenating dataframes) 481 | events_dataframe = events_dataframe.astype( 482 | {"Thread": "int32", "Process": "int32"} 483 | ) 484 | 485 | # using categorical dtypes for memory optimization 486 | # (only efficient when used for categorical data) 487 | events_dataframe = events_dataframe.astype( 488 | { 489 | "Event Type": "category", 490 | "Name": "category", 491 | "Thread": "category", 492 | "Process": "category", 493 | } 494 | ) 495 | 496 | return events_dataframe 497 | 498 | def read(self): 499 | """ 500 | Returns a Trace object for the otf2 file 501 | that has one definitions DataFrame and another 502 | events DataFrame as its primary attributes 503 | """ 504 | 505 | with otf2.reader.open(self.file_name) as trace: # noqa: F821 506 | self.definitions = self.read_definitions(trace) # definitions 507 | 508 | # if a trace has n locations, we should only parallelize 509 | # the reading of events over a number of processes 510 | # equal to n at a maximum 511 | num_locations = len(trace.definitions._locations) 512 | if self.num_processes > num_locations: 513 | self.num_processes = num_locations 514 | 515 | # close the trace and open it later per process 516 | trace.close() 517 | 518 | self.events = self.read_events() # events 519 | 520 | trace = pipit.trace.Trace(self.definitions, self.events) 521 | if self.create_cct: 522 | trace.create_cct() 523 | 524 | return trace 525 | -------------------------------------------------------------------------------- /pipit/readers/projections_reader.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022-2023 Parallel Software and Systems Group, University of 2 | # Maryland. See the top-level LICENSE file for details. 3 | # 4 | # SPDX-License-Identifier: MIT 5 | 6 | import os 7 | import gzip 8 | import pipit.trace 9 | import pandas as pd 10 | import multiprocessing as mp 11 | 12 | 13 | class ProjectionsConstants: 14 | """ 15 | Projection constants are copied over from projections -- used to 16 | determine type of line in log files 17 | """ 18 | 19 | # Message Creation po 20 | CREATION = 1 21 | 22 | BEGIN_PROCESSING = 2 23 | END_PROCESSING = 3 24 | ENQUEUE = 4 25 | DEQUEUE = 5 26 | BEGIN_COMPUTATION = 6 27 | END_COMPUTATION = 7 28 | 29 | BEGIN_INTERRUPT = 8 30 | END_INTERRUPT = 9 31 | MESSAGE_RECV = 10 32 | BEGIN_TRACE = 11 33 | END_TRACE = 12 34 | USER_EVENT = 13 35 | BEGIN_IDLE = 14 36 | END_IDLE = 15 37 | BEGIN_PACK = 16 38 | END_PACK = 17 39 | BEGIN_UNPACK = 18 40 | END_UNPACK = 19 41 | CREATION_BCAST = 20 42 | 43 | CREATION_MULTICAST = 21 44 | 45 | # A record for a user supplied integer value, likely a timestep 46 | USER_SUPPLIED = 26 47 | 48 | # A record for the memory usage 49 | MEMORY_USAGE = 27 50 | 51 | # A record for a user supplied string 52 | USER_SUPPLIED_NOTE = 28 53 | USER_SUPPLIED_BRACKETED_NOTE = 29 54 | 55 | BEGIN_USER_EVENT_PAIR = 98 56 | END_USER_EVENT_PAIR = 99 57 | USER_EVENT_PAIR = 100 58 | USER_STAT = 32 59 | # *** USER category *** 60 | NEW_CHARE_MSG = 0 61 | # NEW_CHARE_NO_BALANCE_MSG = 1; 62 | FOR_CHARE_MSG = 2 63 | BOC_INIT_MSG = 3 64 | # BOC_MSG = 4; 65 | # TERMINATE_TO_ZERO = 5; # never used ?? 66 | # TERMINATE_SYS = 6; # never used ?? 67 | # INIT_COUNT_MSG = 7; 68 | # READ_VAR_MSG = 8; 69 | # READ_MSG_MSG = 9; 70 | # BROADCAST_BOC_MSG = 10; 71 | # DYNAMIC_BOC_INIT_MSG = 11; 72 | 73 | # *** IMMEDIATE category *** 74 | LDB_MSG = 12 75 | # VID_SEND_OVER_MSG = 13; 76 | QD_BOC_MSG = 14 77 | QD_BROADCAST_BOC_MSG = 15 78 | # IMM_BOC_MSG = 16; 79 | # IMM_BROADCAST_BOC_MSG = 17; 80 | # INIT_BARRIER_PHASE_1 = 18; 81 | # INIT_BARRIER_PHASE_2 = 19; 82 | 83 | 84 | class STSReader: 85 | def __init__(self, file_location): 86 | self.sts_file = open(file_location, "r") # self.chares = {} 87 | 88 | # In 'self.entries', each entry stores (entry_name: str, chare_id: int) 89 | self.entries = {} 90 | 91 | # Stores user event names: {user_event_id: user event name} 92 | self.user_events = {} 93 | 94 | # Stores user stat names: {user_event_id: user stat name} 95 | self.user_stats = {} 96 | 97 | self.read_sts_file() 98 | 99 | # to get name of entry print > 100 | def get_entry_name(self, entry_id): 101 | # self.entries[entry_id][1] is the chare_id (index for self.chares) 102 | if entry_id not in self.entries: 103 | return "" 104 | entry_name, chare_id = self.entries[entry_id] 105 | ret_val = entry_name 106 | if chare_id in self.chares: 107 | return self.chares[chare_id][0] + "::" + ret_val 108 | else: 109 | return ret_val 110 | 111 | # To get the dimension of an entry 112 | def get_dimension(self, entry_id): 113 | return self.chares[self.entries[entry_id][1]][1] 114 | 115 | # Gets the user event name from the user_event_id 116 | def get_user_event(self, user_event_id): 117 | return self.user_events[user_event_id] 118 | 119 | # Gets the name of the user stat from the user_event_id 120 | def get_user_stat(self, user_event_id): 121 | return self.user_stats[user_event_id] 122 | 123 | # unsure what this is used for, but necessary to read PROCESSING 124 | def get_num_perf_counts(self): 125 | if hasattr(self, "papi_event_names"): 126 | return len(self.papi_event_names) 127 | else: 128 | return 0 129 | # self.entries[entry_id][1] is the chare_id (index for self.chares) 130 | 131 | # Gets event name from event_id 132 | def get_event_name(self, event_id): 133 | return self.user_events[event_id] 134 | 135 | def read_sts_file(self): 136 | for line in self.sts_file: 137 | line_arr = line.split() 138 | 139 | # Note: I'm disregarding TOTAL_STATS and TOTAL_EVENTS, because 140 | # projections reader disregards them 141 | 142 | # Note: currently not reading/storing VERSION, MACHINE, SMPMODE, 143 | # COMMANDLINE, CHARMVERSION, USERNAME, HOSTNAME 144 | 145 | # create chares array 146 | # In 'self.chares', each entry stores (chare_name: str, dimension: int) 147 | if line_arr[0] == "TOTAL_CHARES": 148 | total_chares = int(line_arr[1]) 149 | self.chares = [None] * total_chares 150 | 151 | elif line_arr[0] == "TOTAL_EPS": 152 | self.num_eps = int(line_arr[1]) 153 | 154 | # get num processors 155 | elif line_arr[0] == "PROCESSORS": 156 | self.num_pes = int(line_arr[1]) 157 | 158 | # create message array 159 | elif line_arr[0] == "TOTAL_MSGS": 160 | total_messages = int(line_arr[1]) 161 | self.message_table = [None] * total_messages 162 | elif line_arr[0] == "TIMESTAMP": 163 | self.timestamp_string = line_arr[1] 164 | 165 | # Add to self.chares 166 | elif line_arr[0] == "CHARE": 167 | id = int(line_arr[1]) 168 | name = " ".join(line_arr[2:-1])[1:-1] 169 | dimensions = int(line_arr[-1]) 170 | self.chares[id] = (name, dimensions) 171 | 172 | # add to self.entries 173 | elif line_arr[0] == "ENTRY": 174 | # Need to concat entry_name 175 | while not line_arr[3].endswith('"'): 176 | line_arr[3] = line_arr[3] + " " + line_arr[4] 177 | del line_arr[4] 178 | 179 | id = int(line_arr[2]) 180 | entry_name = line_arr[3][1 : len(line_arr[3]) - 1] 181 | chare_id = int(line_arr[4]) 182 | self.entries[id] = (entry_name, chare_id) 183 | 184 | # Add to message_table 185 | # Need clarification on this, as message_table is never referenced in 186 | # projections 187 | elif line_arr[0] == "MESSAGE": 188 | id = int(line_arr[1]) 189 | message_size = int(line_arr[2]) 190 | self.message_table[id] = message_size 191 | 192 | # Read/store event 193 | elif line_arr[0] == "EVENT": 194 | id = int(line_arr[1]) 195 | event_name = "" 196 | # rest of line is the event name 197 | for i in range(2, len(line_arr)): 198 | event_name = event_name + line_arr[i] + " " 199 | self.user_events[id] = event_name 200 | 201 | # Read/store user stat 202 | elif line_arr[0] == "STAT": 203 | id = int(line_arr[1]) 204 | event_name = "" 205 | # rest of line is the stat 206 | for i in range(2, len(line_arr)): 207 | event_name = event_name + line_arr[i] + " " 208 | self.user_stats[id] = event_name 209 | 210 | # create papi array 211 | elif line_arr[0] == "TOTAL_PAPI_EVENTS": 212 | num_papi_events = int(line_arr[1]) 213 | self.papi_event_names = [None] * num_papi_events 214 | 215 | # Unsure of what these are for 216 | elif line_arr[0] == "PAPI_EVENT": 217 | id = int(line_arr[1]) 218 | papi_event = line_arr[2] 219 | self.papi_event_names[id] = papi_event 220 | 221 | self.sts_file.close() 222 | 223 | 224 | class ProjectionsReader: 225 | def __init__( 226 | self, projections_directory: str, num_processes=None, create_cct=False 227 | ) -> None: 228 | if not os.path.isdir(projections_directory): 229 | raise ValueError("Not a valid directory.") 230 | 231 | # iterate through files in the directory to find sts file 232 | directory_contents = os.listdir(projections_directory) 233 | for file in directory_contents: 234 | if file.endswith(".sts"): 235 | if hasattr(self, "executable_location"): 236 | raise ValueError( 237 | "Invalid directory for projections - multiple sts files found." 238 | ) 239 | else: 240 | executable_name = file[0:-4] 241 | self.executable_location = os.path.join( 242 | projections_directory, executable_name 243 | ) 244 | 245 | if not hasattr(self, "executable_location"): 246 | raise ValueError("Invalid directory for projections - no sts files found.") 247 | 248 | self.num_pes = STSReader(self.executable_location + ".sts").num_pes 249 | 250 | # make sure all the log files exist 251 | for i in range(self.num_pes): 252 | log_file = executable_name + "." + str(i) + ".log.gz" 253 | if log_file not in directory_contents: 254 | raise ValueError( 255 | ( 256 | "Invalid directory for projections - the sts file states that" 257 | "there are " 258 | ) 259 | + str(i) 260 | + " PEs, but log file " 261 | + log_file 262 | + " is missing." 263 | ) 264 | 265 | num_cpus = mp.cpu_count() 266 | if num_processes is None or num_processes < 1 or num_processes > num_cpus: 267 | # uses all processes to parallelize reading by default 268 | self.num_processes = num_cpus 269 | else: 270 | self.num_processes = num_processes 271 | 272 | self.create_cct = create_cct 273 | 274 | # Returns an empty dict, used for reading log file into dataframe 275 | @staticmethod 276 | def _create_empty_dict() -> dict: 277 | return { 278 | "Name": [], 279 | "Event Type": [], 280 | "Timestamp (ns)": [], 281 | "Process": [], 282 | "Attributes": [], 283 | } 284 | 285 | def read(self): 286 | if self.num_pes < 1: 287 | return None 288 | 289 | if self.num_processes > self.num_pes: 290 | self.num_processes = self.num_pes 291 | 292 | pool_size, pool = self.num_processes, mp.Pool(self.num_processes) 293 | 294 | # Read each log file and store as list of dataframes 295 | dataframes_list = pool.map( 296 | self._read_log_file, [(rank, pool_size) for rank in range(pool_size)] 297 | ) 298 | 299 | pool.close() 300 | 301 | # Concatenate the dataframes list into dataframe containing entire trace 302 | trace_df = pd.concat(dataframes_list, ignore_index=True) 303 | trace_df.sort_values( 304 | by="Timestamp (ns)", axis=0, ascending=True, inplace=True, ignore_index=True 305 | ) 306 | 307 | # categorical for memory savings 308 | trace_df = trace_df.astype( 309 | { 310 | "Name": "category", 311 | "Event Type": "category", 312 | "Process": "category", 313 | } 314 | ) 315 | 316 | # re-order columns 317 | trace_df = trace_df[ 318 | ["Timestamp (ns)", "Event Type", "Name", "Process", "Attributes"] 319 | ] 320 | 321 | trace = pipit.trace.Trace(None, trace_df) 322 | if self.create_cct: 323 | trace.create_cct() 324 | 325 | return trace 326 | 327 | def _read_log_file(self, rank_size) -> pd.DataFrame: 328 | # has information needed in sts file 329 | sts_reader = STSReader(self.executable_location + ".sts") 330 | 331 | rank, size = rank_size[0], rank_size[1] 332 | per_process = int(self.num_pes // size) 333 | remainder = int(self.num_pes % size) 334 | 335 | if rank < remainder: 336 | begin_int = rank * (per_process + 1) 337 | end_int = (rank + 1) * (per_process + 1) 338 | else: 339 | begin_int = (rank * per_process) + remainder 340 | end_int = ((rank + 1) * per_process) + remainder 341 | 342 | dfs = [] 343 | for pe_num in range(begin_int, end_int, 1): 344 | # create an empty dict to append to 345 | data = self._create_empty_dict() 346 | 347 | # opening the log file we need to read 348 | log_file = gzip.open( 349 | self.executable_location + "." + str(pe_num) + ".log.gz", "rt" 350 | ) 351 | 352 | # Basing read on projections log reader and log entry viewer 353 | # Iterated through every line in the file and adds to dict 354 | for line in log_file: 355 | line_arr = line.split() 356 | 357 | if not line_arr[0].isnumeric(): 358 | pass 359 | 360 | elif int(line_arr[0]) == ProjectionsConstants.BEGIN_IDLE: 361 | time = int(line_arr[1]) * 1000 362 | pe = int(line_arr[2]) 363 | 364 | details = {"From PE": pe} 365 | 366 | _add_to_trace_dict(data, "Idle", "Enter", time, pe_num, details) 367 | 368 | elif int(line_arr[0]) == ProjectionsConstants.END_IDLE: 369 | time = int(line_arr[1]) * 1000 370 | pe = int(line_arr[2]) 371 | 372 | details = {"From PE": pe} 373 | 374 | _add_to_trace_dict(data, "Idle", "Leave", time, pe_num, details) 375 | 376 | # Pack message to be sent 377 | elif int(line_arr[0]) == ProjectionsConstants.BEGIN_PACK: 378 | time = int(line_arr[1]) * 1000 379 | pe = int(line_arr[2]) 380 | 381 | details = {"From PE": pe} 382 | 383 | _add_to_trace_dict(data, "Pack", "Enter", time, pe_num, details) 384 | 385 | elif int(line_arr[0]) == ProjectionsConstants.END_PACK: 386 | time = int(line_arr[1]) * 1000 387 | pe = int(line_arr[2]) 388 | 389 | details = {"From PE": pe} 390 | 391 | _add_to_trace_dict(data, "Pack", "Leave", time, pe_num, details) 392 | 393 | # Unpacking a received message 394 | elif int(line_arr[0]) == ProjectionsConstants.BEGIN_UNPACK: 395 | time = int(line_arr[1]) * 1000 396 | pe = int(line_arr[2]) 397 | 398 | details = {"From PE": pe} 399 | 400 | _add_to_trace_dict(data, "Unpack", "Enter", time, pe_num, details) 401 | 402 | elif int(line_arr[0]) == ProjectionsConstants.END_UNPACK: 403 | time = int(line_arr[1]) * 1000 404 | pe = int(line_arr[2]) 405 | 406 | details = {"From PE": pe} 407 | 408 | _add_to_trace_dict(data, "Unpack", "Leave", time, pe_num, details) 409 | 410 | elif int(line_arr[0]) == ProjectionsConstants.USER_SUPPLIED: 411 | user_supplied = line_arr[1] 412 | details = {"User Supplied": user_supplied} 413 | 414 | _add_to_trace_dict( 415 | data, "User Supplied", "Instant", -1, pe_num, details 416 | ) 417 | 418 | elif int(line_arr[0]) == ProjectionsConstants.USER_SUPPLIED_NOTE: 419 | time = line_arr[1] * 1000 420 | note = "" 421 | for i in range(2, len(line_arr)): 422 | note = note + line_arr[i] + " " 423 | 424 | details = {"Note": note} 425 | 426 | _add_to_trace_dict( 427 | data, "User Supplied Note", "Instant", time, pe_num, details 428 | ) 429 | 430 | # Not sure if this should be instant or enter/leave 431 | elif ( 432 | int(line_arr[0]) 433 | == ProjectionsConstants.USER_SUPPLIED_BRACKETED_NOTE 434 | ): 435 | time = line_arr[1] * 1000 436 | end_time = line_arr[2] * 1000 437 | user_event_id = line_arr[3] 438 | note = "" 439 | for i in range(4, len(line_arr)): 440 | note = note + line_arr[i] + " " 441 | note = note + '"' 442 | 443 | details = { 444 | "Event ID": user_event_id, 445 | "Event Name": sts_reader.get_event_name(user_event_id), 446 | "Note": note, 447 | } 448 | 449 | _add_to_trace_dict( 450 | data, 451 | "User Supplied Bracketed Note", 452 | "Enter", 453 | time, 454 | pe_num, 455 | details, 456 | ) 457 | 458 | _add_to_trace_dict( 459 | data, 460 | "User Supplied Bracketed Note", 461 | "Leave", 462 | end_time, 463 | pe_num, 464 | details, 465 | ) 466 | 467 | # Memory Usage at timestamp 468 | elif int(line_arr[0]) == ProjectionsConstants.MEMORY_USAGE: 469 | memory_usage = int(line_arr[1]) 470 | time = int(line_arr[2]) * 1000 471 | 472 | details = {"Memory Usage": memory_usage} 473 | 474 | _add_to_trace_dict( 475 | data, "Memory Usage", "Instant", time, pe_num, details 476 | ) 477 | 478 | # New chare create message being sent 479 | elif int(line_arr[0]) == ProjectionsConstants.CREATION: 480 | mtype = int(line_arr[1]) 481 | entry = int(line_arr[2]) 482 | time = int(line_arr[3]) * 1000 483 | event = int(line_arr[4]) 484 | pe = int(line_arr[5]) 485 | msglen = int(line_arr[6]) 486 | send_time = int(line_arr[7]) * 1000 487 | 488 | details = { 489 | "From PE": pe, 490 | "MType": mtype, 491 | "Entry Type": "Create", 492 | "Message Length": msglen, 493 | "Event ID": event, 494 | "Send Time": send_time, 495 | } 496 | 497 | _add_to_trace_dict( 498 | data, 499 | sts_reader.get_entry_name(entry), 500 | "Instant", 501 | time, 502 | pe_num, 503 | details, 504 | ) 505 | 506 | elif int(line_arr[0]) == ProjectionsConstants.CREATION_MULTICAST: 507 | mtype = int(line_arr[1]) 508 | entry = int(line_arr[2]) 509 | time = int(line_arr[3]) * 1000 510 | event = int(line_arr[4]) 511 | pe = int(line_arr[5]) 512 | msglen = int(line_arr[6]) 513 | send_time = int(line_arr[7]) * 1000 514 | num_procs = int(line_arr[8]) 515 | dest_procs = [] 516 | for i in (0, num_procs): 517 | dest_procs.append(int(line_arr[9 + i])) 518 | 519 | details = { 520 | "From PE": pe, 521 | "Message Type": mtype, 522 | "Entry Type": "Multicast", 523 | "Message Length": msglen, 524 | "Event ID": event, 525 | "Send Time": send_time, 526 | "Destinatopn PEs": dest_procs, 527 | } 528 | 529 | _add_to_trace_dict( 530 | data, 531 | sts_reader.get_entry_name(entry), 532 | "Instant", 533 | time, 534 | pe_num, 535 | "To " + str(num_procs) + "processors", 536 | ) 537 | 538 | # Processing of chare (i.e. execution) ? 539 | elif int(line_arr[0]) == ProjectionsConstants.BEGIN_PROCESSING: 540 | mtype = int(line_arr[1]) 541 | entry = int(line_arr[2]) 542 | time = int(line_arr[3]) * 1000 543 | event = int(line_arr[4]) 544 | pe = int(line_arr[5]) 545 | msglen = int(line_arr[6]) 546 | recv_time = int(line_arr[7]) 547 | dimensions = sts_reader.get_dimension(entry) 548 | id = [] 549 | for i in range(8, 8 + dimensions): 550 | id.append(int(line_arr[i])) 551 | cpu_start_time = int(line_arr[8 + dimensions]) 552 | 553 | num_perf_counts = sts_reader.get_num_perf_counts() 554 | perf_counts = [] 555 | for i in range(9 + dimensions, 9 + dimensions + num_perf_counts): 556 | perf_counts.append(int(line_arr[i])) 557 | 558 | details = { 559 | "From PE": pe, 560 | "Message Type": mtype, 561 | "Entry Type": "Processing", 562 | "Event ID": event, 563 | "Message Length": msglen, 564 | "Receive Time": recv_time, 565 | "ID List": id, 566 | "CPU Start Time": cpu_start_time, 567 | "perf counts list": perf_counts, 568 | } 569 | 570 | _add_to_trace_dict( 571 | data, 572 | sts_reader.get_entry_name(entry), 573 | "Enter", 574 | time, 575 | pe_num, 576 | details, 577 | ) 578 | 579 | elif int(line_arr[0]) == ProjectionsConstants.END_PROCESSING: 580 | mtype = int(line_arr[1]) 581 | entry = int(line_arr[2]) 582 | time = int(line_arr[3]) * 1000 583 | event = int(line_arr[4]) 584 | pe = int(line_arr[5]) 585 | msglen = int(line_arr[6]) 586 | cpu_end_time = int(line_arr[7]) 587 | num_perf_counts = sts_reader.get_num_perf_counts() 588 | perf_counts = [] 589 | for i in range(num_perf_counts): 590 | perf_counts.append(int(line_arr[8 + i])) 591 | 592 | details = { 593 | "From PE": pe, 594 | "Message Type": mtype, 595 | "Entry Name": "Processing", 596 | "Event ID": event, 597 | "Message Length": msglen, 598 | "CPU End Time": cpu_end_time, 599 | "perf counts list": perf_counts, 600 | } 601 | 602 | _add_to_trace_dict( 603 | data, 604 | sts_reader.get_entry_name(entry), 605 | "Leave", 606 | time, 607 | pe_num, 608 | None, 609 | ) 610 | 611 | # For selective tracing - when trace is called inside code 612 | elif int(line_arr[0]) == ProjectionsConstants.BEGIN_TRACE: 613 | time = int(line_arr[1]) * 1000 614 | 615 | _add_to_trace_dict(data, "Trace", "Enter", time, pe_num, None) 616 | 617 | elif int(line_arr[0]) == ProjectionsConstants.END_TRACE: 618 | time = int(line_arr[1]) * 1000 619 | 620 | _add_to_trace_dict(data, "Trace", "Leave", time, pe_num, None) 621 | 622 | # Message Receive ? 623 | elif int(line_arr[0]) == ProjectionsConstants.MESSAGE_RECV: 624 | mtype = int(line_arr[1]) 625 | time = int(line_arr[2]) * 1000 626 | event = int(line_arr[3]) 627 | pe = int(line_arr[4]) 628 | message_length = int(line_arr[5]) 629 | 630 | details = { 631 | "From PE": pe, 632 | "Message Type": mtype, 633 | "Event ID": event, 634 | "Message Length": message_length, 635 | } 636 | 637 | _add_to_trace_dict( 638 | data, "Message Receive", "Instant", time, pe_num, details 639 | ) 640 | 641 | # queueing creation ? 642 | elif int(line_arr[0]) == ProjectionsConstants.ENQUEUE: 643 | mtype = int(line_arr[1]) 644 | time = int(line_arr[2]) * 1000 645 | event = int(line_arr[3]) 646 | pe = int(line_arr[4]) 647 | 648 | details = {"From PE": pe, "Message Type": mtype, "Event ID": event} 649 | 650 | _add_to_trace_dict(data, "Enque", "Instant", time, pe_num, details) 651 | 652 | elif int(line_arr[0]) == ProjectionsConstants.DEQUEUE: 653 | mtype = int(line_arr[1]) 654 | time = int(line_arr[2]) * 1000 655 | event = int(line_arr[3]) 656 | pe = int(line_arr[4]) 657 | 658 | details = {"From PE": pe, "Message Type": mtype, "Event ID": event} 659 | 660 | _add_to_trace_dict(data, "Deque", "Instant", time, pe_num, details) 661 | 662 | # Interrupt from different chare ? 663 | elif int(line_arr[0]) == ProjectionsConstants.BEGIN_INTERRUPT: 664 | time = int(line_arr[1]) * 1000 665 | event = int(line_arr[2]) 666 | pe = int(line_arr[3]) 667 | 668 | details = {"From PE": pe, "Event ID": event} 669 | 670 | _add_to_trace_dict( 671 | data, "Interrupt", "Enter", time, pe_num, details 672 | ) 673 | 674 | elif int(line_arr[0]) == ProjectionsConstants.END_INTERRUPT: 675 | time = int(line_arr[1]) * 1000 676 | event = int(line_arr[2]) 677 | pe = int(line_arr[3]) 678 | 679 | details = {"From PE": pe, "Event ID": event} 680 | 681 | _add_to_trace_dict( 682 | data, "Interrupt", "Leave", time, pe_num, details 683 | ) 684 | 685 | # Very start of the program - encapsulates every other event 686 | elif int(line_arr[0]) == ProjectionsConstants.BEGIN_COMPUTATION: 687 | time = int(line_arr[1]) * 1000 688 | 689 | _add_to_trace_dict(data, "Computation", "Enter", time, pe_num, None) 690 | 691 | elif int(line_arr[0]) == ProjectionsConstants.END_COMPUTATION: 692 | time = int(line_arr[1]) * 1000 693 | 694 | _add_to_trace_dict(data, "Computation", "Leave", time, pe_num, None) 695 | 696 | # User event (in code) 697 | elif int(line_arr[0]) == ProjectionsConstants.USER_EVENT: 698 | user_event_id = int(line_arr[1]) 699 | time = int(line_arr[2]) * 1000 700 | event = int(line_arr[3]) 701 | pe = int(line_arr[4]) 702 | 703 | user_event_name = sts_reader.get_user_event(user_event_id) 704 | 705 | details = { 706 | "From PE": pe, 707 | "Event ID": event, 708 | "Event Type": "User Event", 709 | } 710 | 711 | _add_to_trace_dict( 712 | data, user_event_name, "Instant", time, pe_num, details 713 | ) 714 | 715 | elif int(line_arr[0]) == ProjectionsConstants.USER_EVENT_PAIR: 716 | user_event_id = int(line_arr[1]) 717 | time = int(line_arr[2]) * 1000 718 | event = int(line_arr[3]) 719 | pe = int(line_arr[4]) 720 | nested_id = int(line_arr[5]) 721 | 722 | user_event_name = sts_reader.get_user_event(user_event_id) 723 | 724 | details = { 725 | "From PE": pe, 726 | "Event ID": event, 727 | "Nested ID": nested_id, 728 | "Event Type": "User Event Pair", 729 | } 730 | 731 | _add_to_trace_dict( 732 | data, user_event_name, "Instant", time, pe_num, details 733 | ) 734 | 735 | elif int(line_arr[0]) == ProjectionsConstants.BEGIN_USER_EVENT_PAIR: 736 | user_event_id = int(line_arr[1]) 737 | time = int(line_arr[2]) * 1000 738 | event = int(line_arr[3]) 739 | pe = int(line_arr[4]) 740 | nested_id = int(line_arr[5]) 741 | 742 | details = { 743 | "From PE": pe, 744 | "Event ID": event, 745 | "Nested ID": nested_id, 746 | "User Event Name": sts_reader.get_user_event(user_event_id), 747 | } 748 | 749 | _add_to_trace_dict( 750 | data, "User Event Pair", "Enter", time, pe_num, details 751 | ) 752 | 753 | elif int(line_arr[0]) == ProjectionsConstants.END_USER_EVENT_PAIR: 754 | user_event_id = int(line_arr[1]) 755 | time = int(line_arr[2]) * 1000 756 | event = int(line_arr[3]) 757 | pe = int(line_arr[4]) 758 | nested_id = int(line_arr[5]) 759 | 760 | details = { 761 | "From PE": pe, 762 | "Event ID": event, 763 | "Nested ID": nested_id, 764 | "User Event Name": sts_reader.get_user_event(user_event_id), 765 | } 766 | 767 | _add_to_trace_dict( 768 | "User Event Pair", "Leave", time, pe_num, details 769 | ) 770 | 771 | # User stat (in code) 772 | elif int(line_arr[0]) == ProjectionsConstants.USER_STAT: 773 | time = int(line_arr[1]) * 1000 774 | user_time = int(line_arr[2]) * 1000 775 | stat = float(line_arr[3]) 776 | pe = int(line_arr[4]) 777 | user_event_id = int(line_arr[5]) 778 | 779 | user_stat_name = sts_reader.get_user_stat(user_event_id) 780 | 781 | details = { 782 | "From PE": pe, 783 | "User Time": user_time, 784 | "Stat": stat, 785 | "Event Type": "User Stat", 786 | } 787 | 788 | _add_to_trace_dict( 789 | data, user_stat_name, "Instant", time, pe_num, details 790 | ) 791 | 792 | # Making sure that the log file ends with END_COMPUTATION 793 | if len(data["Name"]) > 0 and data["Name"][-1] != "Computation": 794 | time = data["Timestamp (ns)"][-1] * 1000 795 | _add_to_trace_dict(data, "Computation", "Leave", time, pe_num, None) 796 | 797 | log_file.close() 798 | dfs.append(pd.DataFrame(data)) 799 | 800 | return pd.concat(dfs) 801 | 802 | 803 | def _add_to_trace_dict(data, name, evt_type, time, process, attributes): 804 | data["Name"].append(name) 805 | data["Event Type"].append(evt_type) 806 | data["Timestamp (ns)"].append(time) 807 | data["Process"].append(process) 808 | data["Attributes"].append(attributes) 809 | -------------------------------------------------------------------------------- /pipit/tests/config.py: -------------------------------------------------------------------------------- 1 | # Copyright 2023 Parallel Software and Systems Group, University of Maryland. 2 | # See the top-level LICENSE file for details. 3 | # 4 | # SPDX-License-Identifier: MIT 5 | 6 | 7 | import pipit as pp 8 | 9 | 10 | def test_get_option(): 11 | # assert that default values are returned 12 | assert pp.get_option("log_level") == "INFO" 13 | assert pp.get_option("notebook_url") == "http://localhost:8888" 14 | 15 | # assert that invalid key raises ValueError 16 | try: 17 | pp.get_option("invalid_key") 18 | except ValueError: 19 | pass 20 | else: 21 | assert False 22 | 23 | 24 | def test_set_option(): 25 | # assert that valid values are set 26 | pp.set_option("log_level", "DEBUG") 27 | assert pp.get_option("log_level") == "DEBUG" 28 | 29 | pp.set_option("notebook_url", "http://127.0.0.1:8080") 30 | assert pp.get_option("notebook_url") == "http://127.0.0.1:8080" 31 | 32 | # assert that invalid key raises ValueError 33 | try: 34 | pp.set_option("invalid_key", "invalid_value") 35 | except ValueError: 36 | pass 37 | else: 38 | assert False 39 | 40 | # assert that invalid value raises ValueError 41 | try: 42 | pp.set_option("log_level", "invalid_value") 43 | except ValueError: 44 | pass 45 | else: 46 | assert False 47 | 48 | try: 49 | pp.set_option("notebook_url", "invalid_value") 50 | except ValueError: 51 | pass 52 | else: 53 | assert False 54 | -------------------------------------------------------------------------------- /pipit/tests/conftest.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022-2023 Parallel Software and Systems Group, University of 2 | # Maryland. See the top-level LICENSE file for details. 3 | # 4 | # SPDX-License-Identifier: MIT 5 | 6 | import os 7 | import shutil 8 | from glob import glob 9 | 10 | import pytest 11 | 12 | 13 | @pytest.fixture 14 | def data_dir(): 15 | """Return path to the top-level data directory for tests.""" 16 | parent = os.path.dirname(__file__) 17 | return os.path.join(parent, "data") 18 | 19 | 20 | @pytest.fixture 21 | def ping_pong_hpct_trace(data_dir, tmpdir): 22 | """Builds a temporary directory containing the ping-pong traces.""" 23 | hpct_db_dir = os.path.join(data_dir, "ping-pong-hpctoolkit") 24 | 25 | for f in glob(os.path.join(str(hpct_db_dir), "*.db")): 26 | shutil.copy(f, str(tmpdir)) 27 | 28 | return tmpdir 29 | 30 | 31 | @pytest.fixture 32 | def ping_pong_projections_trace(data_dir, tmpdir): 33 | """Builds a temporary directory containing the ping-pong traces.""" 34 | projections_dir = os.path.join(data_dir, "ping-pong-projections") 35 | 36 | shutil.copy(os.path.join(projections_dir, "pingpong.prj.sts"), str(tmpdir)) 37 | shutil.copy(os.path.join(projections_dir, "pingpong.prj.0.log.gz"), str(tmpdir)) 38 | shutil.copy(os.path.join(projections_dir, "pingpong.prj.1.log.gz"), str(tmpdir)) 39 | 40 | return tmpdir 41 | 42 | 43 | @pytest.fixture 44 | def ping_pong_otf2_trace(data_dir, tmpdir): 45 | """Builds a temporary directory containing the ping-pong traces.""" 46 | otf2_dir = os.path.join(data_dir, "ping-pong-otf2") 47 | 48 | shutil.copytree(os.path.join(str(otf2_dir), "traces"), str(tmpdir) + "/traces") 49 | shutil.copy(os.path.join(str(otf2_dir), "scorep.cfg"), str(tmpdir)) 50 | shutil.copy(os.path.join(str(otf2_dir), "traces.def"), str(tmpdir)) 51 | shutil.copy(os.path.join(str(otf2_dir), "traces.otf2"), str(tmpdir)) 52 | 53 | return tmpdir 54 | 55 | 56 | @pytest.fixture 57 | def ping_pong_otf2_papi_trace(data_dir, tmpdir): 58 | """Builds a temporary directory containing the ping-pong traces.""" 59 | otf2_dir = os.path.join(data_dir, "ping-pong-otf2-papi") 60 | 61 | shutil.copytree(os.path.join(str(otf2_dir), "traces"), str(tmpdir) + "/traces") 62 | shutil.copy(os.path.join(str(otf2_dir), "scorep.cfg"), str(tmpdir)) 63 | shutil.copy(os.path.join(str(otf2_dir), "traces.def"), str(tmpdir)) 64 | shutil.copy(os.path.join(str(otf2_dir), "traces.otf2"), str(tmpdir)) 65 | 66 | return tmpdir 67 | -------------------------------------------------------------------------------- /pipit/tests/data/foo-bar.csv: -------------------------------------------------------------------------------- 1 | Timestamp (s), Event Type, Name, Process 2 | 0, Enter, main(), 0 3 | 1, Enter, foo(), 0 4 | 3, Enter, MPI_Send, 0 5 | 5, Leave, MPI_Send, 0 6 | 8, Enter, baz(), 0 7 | 18, Leave, baz(), 0 8 | 25, Leave, foo(), 0 9 | 100, Leave, main(), 0 10 | 0, Enter, main(), 1 11 | 1, Enter, bar(), 1 12 | 2, Enter, Idle, 1 13 | 10, Leave, Idle, 1 14 | 10, Enter, MPI_Recv, 1 15 | 14, Leave, MPI_Recv, 1 16 | 39, Leave, bar(), 1 17 | 39, Enter, Idle, 1 18 | 57, Leave, Idle, 1 19 | 57, Enter, grault(), 1 20 | 77, Leave, grault(), 1 21 | 100, Leave, main(), 1 22 | -------------------------------------------------------------------------------- /pipit/tests/data/ping-pong-hpctoolkit/cct.db: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hpcgroup/pipit/97beb979a126819de6fee1bd221647f4b9e2e6c7/pipit/tests/data/ping-pong-hpctoolkit/cct.db -------------------------------------------------------------------------------- /pipit/tests/data/ping-pong-hpctoolkit/meta.db: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hpcgroup/pipit/97beb979a126819de6fee1bd221647f4b9e2e6c7/pipit/tests/data/ping-pong-hpctoolkit/meta.db -------------------------------------------------------------------------------- /pipit/tests/data/ping-pong-hpctoolkit/metrics/METRICS.yaml.ex: -------------------------------------------------------------------------------- 1 | %YAML 1.2 2 | --- 3 | # Specification and example document for metric taxonomies. 4 | 5 | # Each HPCToolkit database provides post-processed performance data for every 6 | # calling context, application thread and performance metric. Performance 7 | # metrics are generally very specific and the impact on the application 8 | # performance is not always clear (eg. is 98% of the GPU L2 misses on a single 9 | # line a problem?). 10 | 11 | # Files of this format provide a full "taxonomy" of metrics, structured to aid 12 | # manual performance analysis. Very general metrics (eg. time) are presented 13 | # first to give a sense for *where* significant performance issues are, which 14 | # can be expanded to present increasingly specific metrics to determine the 15 | # *why* and *how*. In other words, the majority of an HPCToolkit database 16 | # (see FORMATS.md) provides raw performance metrics, while METRICS.yaml files 17 | # provide the interpretation. 18 | 19 | # This format is primarily intended to be read by the GUI application of 20 | # HPCToolkit, HPCViewer. A number of keys in this file only make sense in this 21 | # context, for instance options on how to present the final metric values. 22 | 23 | # NOTE: !!-type specifiers when used below indicate the type(s) allowed for 24 | # the various keys. They are not required and match up with the default type 25 | # as interpreted by most general YAML parsers. 26 | 27 | # Version of the METRICS.yaml format required by this file. Can be used by 28 | # readers to error gracefully without reading the entire file. If omitted 29 | # version checks are disabled. 30 | version: !!int 0 31 | 32 | # Set of all performance metrics used by this taxonomy. These correspond to 33 | # performance metrics listed in the meta.db file. 34 | # Anchors are used to refer to these metrics later in the file. 35 | inputs: !!seq 36 | - &in-cycles-E 37 | # Canonical name for the performance metric. 38 | # See Performance Metric Specification in FORMATS.md for details. 39 | metric: !!str perf::cycles 40 | # Name of the propagation scope for the value referenced. 41 | # See Performance Metric Specification in FORMATS.md for details. 42 | scope: !!str function 43 | # Unary function used to generate summary statistic values, see Performance 44 | # Metric Specification in FORMATS.md for details. 45 | # This is a formula in the same format as the variants:formula:* keys in 46 | # in the metric description below, with the following differences: 47 | # - The formula must consist of a single !!str, not a !!seq or other 48 | # formula structure ("$$" is used as the variable), and 49 | # - The formula is canonicalized: whitespace and extraneous paratheticals 50 | # should be removed to achieve a match. 51 | # Defaults to '$$'. 52 | formula: !!str $$ 53 | # Combination function use to generate summary statistic values, see 54 | # Performance Metric Specification in FORMATS.md for details. 55 | # One of 'sum', 'min' or 'max'. Defaults to 'sum'. 56 | combine: !!str sum 57 | # Merge keys can be used to lower the repetition of common fields: 58 | - &in-cycles-I 59 | <<: *in-cycles-E 60 | scope: execution 61 | - &in-cycles-E-cnt 62 | <<: *in-cycles-E 63 | formula: 1 64 | - &in-cycles-I-cnt 65 | <<: *in-cycles-I 66 | formula: 1 67 | 68 | - &in-l1-miss-E 69 | metric: perf::l1-cache-miss 70 | scope: function 71 | - &in-l1-miss-I 72 | <<: *in-l1-miss-E 73 | scope: execution 74 | - &in-l1-miss-E-cnt 75 | <<: *in-l1-miss-E 76 | formula: 1 77 | - &in-l1-miss-I-cnt 78 | <<: *in-l1-miss-I 79 | formula: 1 80 | 81 | - &in-l2-miss-E 82 | metric: perf::l2-cache-miss 83 | scope: function 84 | - &in-l2-miss-I 85 | <<: *in-l2-miss-E 86 | scope: execution 87 | - &in-l2-miss-E-cnt 88 | <<: *in-l2-miss-E 89 | formula: 1 90 | - &in-l2-miss-I-cnt 91 | <<: *in-l2-miss-I 92 | formula: 1 93 | 94 | - &in-l3-miss-E 95 | metric: perf::l3-cache-miss 96 | scope: function 97 | - &in-l3-miss-I 98 | <<: *in-l3-miss-E 99 | scope: execution 100 | - &in-l3-miss-E-cnt 101 | <<: *in-l3-miss-E 102 | formula: 1 103 | - &in-l3-miss-I-cnt 104 | <<: *in-l3-miss-I 105 | formula: 1 106 | 107 | # Sequence of root metrics provided in this taxonomy. Every metric listed in the 108 | # taxonomy is a descendant of one of these. 109 | roots: 110 | - # Name for the metric. 111 | name: !!str CPU Cycles 112 | # Longer description of the metric, written in Markdown. 113 | # Defaults to the `short description:` if given. 114 | description: > 115 | Cycles spent: 116 | - In the CPU doing actual work (FLOPs), or 117 | - Waiting for outside operations to complete (memory stalls). 118 | # Short description of the metric, used for cases where a long description 119 | # would not be suitable. 120 | # Defaults to `description:` up to the first period or newline. 121 | short description: !!str Cycles spent in the CPU. 122 | 123 | # Whether this metric should be visible in the Viewer by default, default 124 | # true. If false, the Viewer may require that the metric be enabled in the 125 | # metric list before it will be presented. 126 | visible by default: true 127 | 128 | # How the values in the metrics rooted here will be presented in the Viewer 129 | # by default. One of: 130 | # - 'column': Columns of data that can be expanded to show inner metrics. 131 | # Defaults to 'column'. Only allowed on root metrics. 132 | presentation: !!str column 133 | 134 | # Sequence of child metrics, format is the same as a root metric. 135 | # If omitted there are no child metrics. 136 | children: !!seq 137 | - name: L2 Bound 138 | description: Rough cycles spent accessing the L2 cache 139 | 140 | # List of formula variations for this taxonomic metric. Metric values are 141 | # always attributed to an application thread, however for large executions 142 | # this gives too much data to present clearly. Instead, the Viewer 143 | # presents on "summary" values by applying statistics across threads. 144 | # The `inputs:` key above lists the "partial" results required for 145 | # calculating statistics, this key lists the final formulas to generate 146 | # presentable values. 147 | # 148 | # Keys in this map are the human-readable names of the variants. 149 | variants: !!map 150 | !!str Sum: 151 | # How the final value(s) for this metric variant should be rendered. 152 | # Orderless set of elements to be rendered in the metric cell, the 153 | # following options are available: 154 | # - 'number': Numerical rendering (see `format:`). 155 | # - 'percentage': Percentage of the global inclusive value. Only 156 | # allowed if `formula:inclusive:` is given. 157 | # - 'hidden': Mark as hiding (some) inner values (`*`). 158 | # - 'colorbar': Color bar visually indicating the relative sizes of 159 | # values in child metrics. An additional "grey" color is added to 160 | # the bar to indicate the difference between sum-of-children and 161 | # this metric variant's value. (Note that this difference will be 162 | # exactly 0 if `formula:` is 'sum'.) 163 | # The Viewer will order the elements reasonably, and may elide 164 | # elements if screen real estate is tight. 165 | render: !!seq [number, percent] # eg: 1.23e+04 56.7% 166 | # Can also be given as a !!str for a single element: 167 | render: !!str 'number' # eg: 1.23e+04 168 | 169 | # Printf-like format to use when rendering the metric value(s) as a 170 | # number (`render: number`). The input to "printf" is a single double 171 | # value. Defaults to '%.2e'. 172 | # 173 | # In more detail, this string must be of the form: 174 | # [prefix]%(#0- +')*[field width][.precision](eEfFgGaA)[suffix] 175 | # Where "prefix" and "suffix" use %% to generate a literal %. 176 | format: !!str '%.2e' 177 | 178 | # Which variant child metric values are gotten from. Also used as the 179 | # default variant when first expanding this metric variant. Explicitly 180 | # lists the variant to use for each child metric in order. 181 | child variant: !!seq 182 | - Sum # Use Sum value(s) from first child 183 | - Mean # Use Mean value(s) from second child 184 | # Or can also be given as a !!str if the variant is the same. 185 | child variant: !!str Sum # Use Sum value(s) from all children 186 | # Defaults to the name of this variant. 187 | 188 | # Formula(s) for calculating the final value(s) for this metric 189 | # variant. Ignored unless `render:` contains a numerical element 190 | # (ie. everything except 'hidden'). Can be one of: 191 | # - 'first': Value(s) for this variant are copied from the value(s) 192 | # of the first child. Invalid if `render:` contains 'colorbar'. 193 | # - 'sum': Value(s) are generated by summing child value(s). 194 | # In all cases value(s) are generated vector-wise (ie. inclusive 195 | # values come from inclusive child values, exclusive from exclusive, 196 | # etc.), and null child values generate null values in the parent 197 | # (ie. they aren't replaced with 0). 198 | formula: !!str first 199 | # Can also be written as a !!map listing the vector of formulas. 200 | formula: !!map 201 | # The following keys define the formulas used to generate metrics. 202 | # Formulas are roughly written as a C-like math expression, except: 203 | # - "Variables" are references to other nodes, which can be other 204 | # formulas (sub-expressions) or an entry in the global `inputs:`. 205 | # Eg: `*in-cycles-E` is an input metric value. 206 | # - Parentheses are represented with a YAML !!seq ([...]), breaks 207 | # between elements (,) are considered whitespace. 208 | # Eg: `2 * (3 + 4)` -> `[2 *,[3,+,4]]` 209 | # - Number constants and infix operators can be represented by 210 | # !!int, !!float and !!str YAML elements (as appropriate), and 211 | # need not be separated by an element break (whitespace suffices). 212 | # Eg: `[2 *,[3,+,4]]` == `[2,*,[3+4]]` 213 | # The following operators are available in increasing precedence: 214 | # + - # Addition and subtraction 215 | # * / # Multiplication and (true) division 216 | # ^ # Exponentiation 217 | # - Function calls are represented by a YAML !!map with a single 218 | # pair. The key is the function name and the value is a !!seq 219 | # listing the arguments. 220 | # Eg: `foo(1, 2, 3)` -> `[foo:[1,2,3]]`, 221 | # and `foo(1+x)` -> `[foo:[ [1+,*x] ]]` 222 | # The following functions are available: 223 | # sum:[...] # Sum of arguments 224 | # prod:[...] # Product of arguments 225 | # pow:[a, b] # a raised to the b 226 | # sqrt:[a] # Square root of a (pow(a, .5)) 227 | # log:[a, b] # Logarithm of a base-b 228 | # log:[a] # Natural logarithm of a 229 | # min:[...] # Smallest of arguments 230 | # max:[...] # Largest of arguments 231 | # floor:[a] # Largest integer less than or equal to a 232 | # strict floor:[a] # Largest integer less than a 233 | # ceil:[a] # Smallest integer greater than or equal to a 234 | # strict ceil:[a] # Smallest integer greater than a 235 | 236 | # Formulas to generate "inclusive" cost values. Defaults to null. 237 | inclusive: 238 | # Custom formula used when no special properties are required of 239 | # the formulation. Defaults to the value of `standard:`. 240 | custom: [4*,[*in-l1-miss-I,-,*in-l2-miss-I]] 241 | 242 | # Version of the formula based completely on well-defined metric 243 | # inputs, which refer only to non-custom propagation scopes. Used 244 | # in the bottom-up and flat views, where this property is required 245 | # for accurate analysis. Defaults to null. 246 | # See the meta.db Performance Metrics section for details. 247 | standard: [4*,[*in-l1-miss-I,-,*in-l2-miss-I]] 248 | 249 | # Formulas to generate "exclusive" cost values. Defaults to null. 250 | exclusive: 251 | standard: [4*,[*in-l1-miss-E,-,*in-l2-miss-E]] 252 | 253 | # Another example variant for "L2 Bound" 254 | Mean: 255 | render: [number, percent] 256 | formula: 257 | inclusive: [4*,[*in-l1-miss-I,/,*in-l1-miss-I-cnt, -,*in-l2-miss-I,/,*in-l2-miss-I-cnt]] 258 | exclusive: [4*,[*in-l1-miss-E,/,*in-l1-miss-E-cnt, -,*in-l2-miss-E,/,*in-l2-miss-E-cnt]] 259 | 260 | # Sibling metric, still under "CPU Cycles" 261 | - name: L3 Bound 262 | description: Rough cycles spent accessing L3 cache 263 | variants: 264 | Sum: 265 | render: number 266 | formula: 267 | inclusive: [64*,[*in-l2-miss-I, -,*in-l3-miss-I]] 268 | exclusive: [64*,[*in-l2-miss-E, -,*in-l3-miss-E]] 269 | Mean: 270 | render: [number, percent] 271 | formula: 272 | inclusive: [64*,[*in-l2-miss-I,/,*in-l2-miss-I-cnt, -,*in-l3-miss-I,/,*in-l3-miss-I-cnt]] 273 | exclusive: [64*,[*in-l2-miss-E,/,*in-l2-miss-E-cnt, -,*in-l3-miss-E,/,*in-l3-miss-E-cnt]] 274 | 275 | # Parameters for the root "CPU Cycles" metric 276 | variants: 277 | Sum: 278 | render: number 279 | formula: 280 | inclusive: *in-cycles-I 281 | exclusive: *in-cycles-E 282 | Mean: 283 | render: [number, colorbar] 284 | formula: 285 | inclusive: [*in-cycles-I,/,*in-cycles-I-cnt] 286 | exclusive: [*in-cycles-E,/,*in-cycles-E-cnt] 287 | -------------------------------------------------------------------------------- /pipit/tests/data/ping-pong-hpctoolkit/metrics/default.yaml: -------------------------------------------------------------------------------- 1 | version: 0 2 | inputs: 3 | - &CPUTIMEx20_x28_secx29_-sum-x5b_0x0x5d_-execution 4 | metric: CPUTIME (sec) 5 | scope: execution 6 | formula: $$ 7 | combine: sum 8 | - &CPUTIMEx20_x28_secx29_-sum-x5b_0x0x5d_-lex_aware 9 | metric: CPUTIME (sec) 10 | scope: lex_aware 11 | formula: $$ 12 | combine: sum 13 | - &CPUTIMEx20_x28_secx29_-sum-x5b_0x0x5d_-function 14 | metric: CPUTIME (sec) 15 | scope: function 16 | formula: $$ 17 | combine: sum 18 | roots: 19 | - name: CPUTIME (sec) 20 | description: CPUTIME (sec) 21 | variants: 22 | Sum: 23 | render: [number, percent] 24 | formula: 25 | inclusive: 26 | standard: *CPUTIMEx20_x28_secx29_-sum-x5b_0x0x5d_-execution 27 | exclusive: 28 | custom: *CPUTIMEx20_x28_secx29_-sum-x5b_0x0x5d_-lex_aware 29 | standard: *CPUTIMEx20_x28_secx29_-sum-x5b_0x0x5d_-function -------------------------------------------------------------------------------- /pipit/tests/data/ping-pong-hpctoolkit/profile.db: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hpcgroup/pipit/97beb979a126819de6fee1bd221647f4b9e2e6c7/pipit/tests/data/ping-pong-hpctoolkit/profile.db -------------------------------------------------------------------------------- /pipit/tests/data/ping-pong-hpctoolkit/src/ping-pong.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | int main(int argc, char *argv[]) 6 | { 7 | /* ------------------------------------------------------------------------------------------- 8 | MPI Initialization 9 | --------------------------------------------------------------------------------------------*/ 10 | MPI_Init(&argc, &argv); 11 | 12 | int size; 13 | MPI_Comm_size(MPI_COMM_WORLD, &size); 14 | 15 | int rank; 16 | MPI_Comm_rank(MPI_COMM_WORLD, &rank); 17 | 18 | MPI_Status stat; 19 | 20 | if(size != 2){ 21 | if(rank == 0){ 22 | printf("This program requires exactly 2 MPI ranks, but you are attempting to use %d! Exiting...\n", size); 23 | } 24 | MPI_Finalize(); 25 | exit(0); 26 | } 27 | 28 | /* ------------------------------------------------------------------------------------------- 29 | Loop from 8 B to 1 GB 30 | --------------------------------------------------------------------------------------------*/ 31 | 32 | for(int i=11; i<=18; i++){ 33 | 34 | long int N = 1 << i; 35 | 36 | // Allocate memory for A on CPU 37 | double *A = (double*)malloc(N*sizeof(double)); 38 | 39 | // Initialize all elements of A to 0.0 40 | for(int i=0; i 0x24680 [libpsm2.so.2.2]", 58 | "MPID_Finalize [libmpi.so.12.1.1]", 59 | "MPID_Recv [libmpi.so.12.1.1]", 60 | "MPI_Finalize", 61 | "PMPI_Finalize [libmpi.so.12.1.1]", 62 | "PMPI_Recv [libmpi.so.12.1.1]", 63 | "PMPI_Send [libmpi.so.12.1.1]", 64 | "__GI___munmap [libc-2.17.so]", 65 | "__GI___unlink [libc-2.17.so]", 66 | "__GI_process_vm_readv [libc-2.17.so]", 67 | "loop", 68 | "main", 69 | "main thread", 70 | "psm2_ep_close [libpsm2.so.2.2]", 71 | "psm2_mq_ipeek2 [libpsm2.so.2.2]", 72 | "psm2_mq_irecv2 [libpsm2.so.2.2]", 73 | "psm_dofinalize [libmpi.so.12.1.1]", 74 | "psm_progress_wait [libmpi.so.12.1.1]", 75 | "psm_recv [libmpi.so.12.1.1]", 76 | "psm_try_complete [libmpi.so.12.1.1]", 77 | "shm_unlink [librt-2.17.so]", 78 | "targ5030 [libpsm2.so.2.2]", 79 | } 80 | 81 | # Test correct number of MPI Send/Recv events 82 | mpi_send_df = events_df.loc[events_df["Name"].str.contains("PMPI_Send")].loc[ 83 | events_df["Event Type"] == "Enter" 84 | ] 85 | mpi_recv_df = events_df.loc[events_df["Name"].str.contains("PMPI_Recv")].loc[ 86 | events_df["Event Type"] == "Enter" 87 | ] 88 | 89 | # Process 0 has 6 MPI Sends and 5 MPI Recvs 90 | assert len(mpi_send_df.loc[events_df["Process"] == 0]) == 7 91 | assert len(mpi_recv_df.loc[events_df["Process"] == 0]) == 7 92 | 93 | # Process 1 has 5 MPI Sends and 5 MPI Recvs 94 | assert len(mpi_send_df.loc[events_df["Process"] == 1]) == 7 95 | assert len(mpi_recv_df.loc[events_df["Process"] == 1]) == 7 96 | 97 | # Timestamps should be sorted in increasing order 98 | assert (np.diff(events_df["Timestamp (ns)"]) >= 0).all() 99 | -------------------------------------------------------------------------------- /pipit/tests/otf2-tests.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022-2023 Parallel Software and Systems Group, University of 2 | # Maryland. See the top-level LICENSE file for details. 3 | # 4 | # SPDX-License-Identifier: MIT 5 | 6 | import numpy as np 7 | from pipit import Trace 8 | 9 | 10 | def test_events(data_dir, ping_pong_otf2_trace): 11 | trace = Trace.from_otf2(str(ping_pong_otf2_trace)) 12 | events_df = trace.events 13 | 14 | # 120 total events in ping pong trace 15 | assert len(events_df) == 120 16 | 17 | # event types for trace (instant events are program begin/end and mpi send/recv) 18 | assert set(events_df["Event Type"]) == set(["Enter", "Instant", "Leave"]) 19 | 20 | # all event names in the trace 21 | assert set(events_df["Name"]) == set( 22 | [ 23 | "ProgramBegin", 24 | "ProgramEnd", 25 | "MPI_Send", 26 | "MPI_Recv", 27 | "MpiSend", 28 | "MpiRecv", 29 | "MPI_Init", 30 | "MPI_Finalize", 31 | "MPI_Comm_rank", 32 | "MPI_Comm_size", 33 | "int main(int, char**)", 34 | ] 35 | ) 36 | 37 | # 8 sends per rank, so 16 sends total -> 32 including both enter and leave rows 38 | assert len(events_df.loc[events_df["Name"] == "MPI_Send"]) == 32 39 | 40 | assert len(set(events_df["Process"])) == 2 # 2 ranks for ping pong trace 41 | 42 | assert len(set(events_df["Thread"])) == 1 # 1 thread per rank 43 | 44 | assert len(events_df.loc[events_df["Process"] == 0]) == 60 # 60 events per rank 45 | 46 | assert ( 47 | len(events_df.loc[events_df["Thread"] == 0]) == 120 48 | ) # all 120 events associated with the main thread 49 | 50 | # timestamps should be sorted in increasing order 51 | assert (np.diff(events_df["Timestamp (ns)"]) > 0).all() 52 | 53 | 54 | def test_definitions(data_dir, ping_pong_otf2_trace): 55 | trace = Trace.from_otf2(str(ping_pong_otf2_trace)) 56 | definitions_df = trace.definitions 57 | 58 | assert len(definitions_df) == 533 59 | 60 | # 17 unique definition types in trace 61 | assert len(set(definitions_df["Definition Type"])) == 17 62 | 63 | # 2 ranks, so 2 location definitions in the trace 64 | assert len(definitions_df.loc[definitions_df["Definition Type"] == "Location"]) == 2 65 | 66 | # communicator should evidently be present in the ping pong trace definitions 67 | assert "Comm" in set(definitions_df["Definition Type"]) 68 | -------------------------------------------------------------------------------- /pipit/tests/projections-tests.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022-2023 Parallel Software and Systems Group, University of 2 | # Maryland. See the top-level LICENSE file for details. 3 | # 4 | # SPDX-License-Identifier: MIT 5 | 6 | from pipit import Trace 7 | 8 | 9 | def test_events(data_dir, ping_pong_projections_trace): 10 | trace = Trace.from_projections(str(ping_pong_projections_trace)) 11 | events_df = trace.events 12 | 13 | # The projections trace has 2 PEs 14 | assert set(events_df["Process"]) == {0, 1} 15 | 16 | # event types for trace 17 | assert set(events_df["Event Type"]) == {"Enter", "Instant", "Leave"} 18 | 19 | # all event names in the trace 20 | assert set(events_df["Name"]) == { 21 | "Computation", 22 | "Idle", 23 | "Pack", 24 | "Ping1()", 25 | "Ping2()", 26 | "Ping3()", 27 | "PingC()", 28 | "PingC(IdMsg* impl_msg)", 29 | "PingF()", 30 | "PingMarshall()", 31 | "Unpack", 32 | "dummy_thread_ep", 33 | "exchange(IdMsg* impl_msg)", 34 | "maindone()", 35 | "recv(PingMsg* impl_msg)", 36 | "recvHandle(const CkNcpyBuffer &destInfo)", 37 | "recv_zerocopy(CkNcpyBuffer ncpyBuffer_msg, int size)", 38 | "remoteDoneInserting()", 39 | "start(const bool &reportTime)", 40 | "traceProjectionsParallelShutdown(int impl_noname_8)", 41 | "trecv(PingMsg* impl_msg)", 42 | "updateLocation(const CkArrayIndex &idx, const CkLocEntry &e)", 43 | } 44 | 45 | # PE 1 has 68 create events (which are the only instant events) 46 | assert ( 47 | len( 48 | events_df.loc[events_df["Process"] == 1].loc[ 49 | events_df["Event Type"] == "Instant" 50 | ] 51 | ) 52 | == 68 53 | ) 54 | # PE 0 has 77 create events (which are the only instant events) 55 | assert ( 56 | len( 57 | events_df.loc[events_df["Process"] == 0].loc[ 58 | events_df["Event Type"] == "Instant" 59 | ] 60 | ) 61 | == 77 62 | ) 63 | 64 | # PE0 has 161 Begin Processing Events 65 | len( 66 | events_df.loc[events_df["Process"] == 0] 67 | .loc[events_df["Event Type"] == "Enter"] 68 | .loc[events_df["Name"] == "Processing"] 69 | ) == 161 70 | # PE0 has 146 Begin Processing Events 71 | len( 72 | events_df.loc[events_df["Process"] == 1] 73 | .loc[events_df["Event Type"] == "Enter"] 74 | .loc[events_df["Name"] == "Processing"] 75 | ) == 146 76 | 77 | # Each log file starts/ends with a Computation Event 78 | assert events_df.loc[events_df["Process"] == 1].iloc[0]["Name"] == "Computation" 79 | assert events_df.loc[events_df["Process"] == 1].iloc[-1]["Name"] == "Computation" 80 | 81 | assert events_df.loc[events_df["Process"] == 0].iloc[0]["Name"] == "Computation" 82 | assert events_df.loc[events_df["Process"] == 0].iloc[-1]["Name"] == "Computation" 83 | -------------------------------------------------------------------------------- /pipit/tests/trace.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022-2023 Parallel Software and Systems Group, University of 2 | # Maryland. See the top-level LICENSE file for details. 3 | # 4 | # SPDX-License-Identifier: MIT 5 | 6 | import numpy as np 7 | from pipit import Trace 8 | 9 | from numpy.testing import assert_allclose 10 | 11 | 12 | def test_comm_matrix(data_dir, ping_pong_otf2_trace): 13 | # bytes sent between pairs of processes 14 | size_comm_matrix = Trace.from_otf2(str(ping_pong_otf2_trace)).comm_matrix() 15 | 16 | # number of messages sent between pairs of processes 17 | count_comm_matrix = Trace.from_otf2(str(ping_pong_otf2_trace)).comm_matrix("count") 18 | 19 | # 2 ranks in ping pong trace, so comm matrix should have shape 2 x 2 20 | assert size_comm_matrix.shape == size_comm_matrix.shape == (2, 2) 21 | 22 | # no messages from ranks to themselves 23 | # note: comm matrix elements accessed using matrix[sender_rank][receiver_rank] 24 | assert ( 25 | size_comm_matrix[0][0] 26 | == size_comm_matrix[1][1] 27 | == count_comm_matrix[0][0] 28 | == count_comm_matrix[1][1] 29 | == 0 30 | ) 31 | 32 | # 8 sends from each process (total of 4177920 bytes ~ 3.984 mebibytes) 33 | assert size_comm_matrix[0][1] == size_comm_matrix[1][0] == 4177920 34 | assert count_comm_matrix[0][1] == count_comm_matrix[1][0] == 8 35 | 36 | 37 | def test_comm_over_time(data_dir, ping_pong_otf2_trace): 38 | ping_pong = Trace.from_otf2(str(ping_pong_otf2_trace)) 39 | 40 | hist, edges = ping_pong.comm_over_time(output="size", message_type="send", bins=5) 41 | 42 | assert len(edges) == 6 43 | assert all(hist[0:3] == 0) 44 | assert hist[4] == 4177920 * 2 45 | 46 | hist, edges = ping_pong.comm_over_time( 47 | output="count", message_type="receive", bins=5 48 | ) 49 | 50 | assert len(edges) == 6 51 | assert all(hist[0:3] == 0) 52 | assert hist[4] == 8 * 2 53 | 54 | 55 | def test_comm_by_process(data_dir, ping_pong_otf2_trace): 56 | ping_pong = Trace.from_otf2(str(ping_pong_otf2_trace)) 57 | 58 | sizes = ping_pong.comm_by_process() 59 | 60 | assert sizes.loc[0]["Sent"] == 4177920 61 | assert sizes.loc[0]["Received"] == 4177920 62 | assert sizes.loc[1]["Sent"] == 4177920 63 | assert sizes.loc[1]["Received"] == 4177920 64 | 65 | counts = ping_pong.comm_by_process(output="count") 66 | 67 | assert counts.loc[0]["Sent"] == 8 68 | assert counts.loc[0]["Received"] == 8 69 | assert counts.loc[1]["Sent"] == 8 70 | assert counts.loc[1]["Received"] == 8 71 | 72 | 73 | def test_match_events(data_dir, ping_pong_otf2_trace): 74 | trace = Trace.from_otf2(str(ping_pong_otf2_trace)) 75 | trace._match_events() 76 | 77 | df = trace.events 78 | 79 | # test both ranks 80 | rank_0_df = df.loc[(df["Process"] == 0) & (df["Event Type"] != "Instant")] 81 | rank_1_df = df.loc[(df["Process"] == 1) & (df["Event Type"] != "Instant")] 82 | 83 | # Make lists of normal and matching columns for both indices and 84 | # timestamps. Compares the values of these lists to ensure the pairing 85 | # functions produced correct results. 86 | rank_0_indices = rank_0_df.index.to_list() 87 | rank_0_matching_indices = rank_0_df["_matching_event"].to_list() 88 | rank_0_timestamps = rank_0_df["Timestamp (ns)"].to_list() 89 | rank_0_matching_timestamps = rank_0_df["_matching_timestamp"].to_list() 90 | 91 | # All events in ping pong trace except main are leaves in the call tree, 92 | # so the leave row occurs immediately after the enter. The below assertions 93 | # test this. 94 | for i in range(len(rank_0_df)): 95 | if ( 96 | rank_0_df["Event Type"].iloc[i] == "Enter" 97 | and rank_0_df["Name"].iloc[i] != "int main(int, char**)" 98 | ): 99 | # the matching event and timestamp for enter rows 100 | # should occur right after (ex: (Enter: 45, Leave: 46)) 101 | assert rank_0_matching_indices[i] == rank_0_indices[i + 1] 102 | assert rank_0_matching_timestamps[i] == rank_0_timestamps[i + 1] 103 | elif rank_0_df["Name"].iloc[i] != "int main(int, char**)": 104 | # the matching event and timestamp for leave rows 105 | # should occur right before (ex: (Enter: 45, Leave: 46)) 106 | assert rank_0_matching_indices[i] == rank_0_indices[i - 1] 107 | assert rank_0_matching_timestamps[i] == rank_0_timestamps[i - 1] 108 | 109 | # tests all the same as mentioned above, except for rank 1 as well 110 | rank_1_indices = rank_1_df.index.to_list() 111 | rank_1_matching_indices = rank_1_df["_matching_event"].to_list() 112 | rank_1_timestamps = rank_1_df["Timestamp (ns)"].to_list() 113 | rank_1_matching_timestamps = rank_1_df["_matching_timestamp"].to_list() 114 | 115 | for i in range(len(rank_1_df)): 116 | if ( 117 | rank_1_df["Event Type"].iloc[i] == "Enter" 118 | and rank_1_df["Name"].iloc[i] != "int main(int, char**)" 119 | ): 120 | assert rank_1_matching_indices[i] == rank_1_indices[i + 1] 121 | assert rank_1_matching_timestamps[i] == rank_1_timestamps[i + 1] 122 | elif rank_1_df["Name"].iloc[i] != "int main(int, char**)": 123 | assert rank_1_matching_indices[i] == rank_1_indices[i - 1] 124 | assert rank_1_matching_timestamps[i] == rank_1_timestamps[i - 1] 125 | 126 | # Checks that the Matching Indices and Timestamps for the Enter rows are 127 | # greater than their values 128 | assert ( 129 | np.array(df.loc[df["Event Type"] == "Enter"]["_matching_event"]) 130 | > np.array(df.loc[df["Event Type"] == "Enter"].index) 131 | ).all() 132 | assert ( 133 | np.array(df.loc[df["Event Type"] == "Enter"]["_matching_timestamp"]) 134 | > np.array(df.loc[df["Event Type"] == "Enter"]["Timestamp (ns)"]) 135 | ).all() 136 | 137 | 138 | def test_match_caller_callee(data_dir, ping_pong_otf2_trace): 139 | trace = Trace.from_otf2(str(ping_pong_otf2_trace)) 140 | trace._match_caller_callee() 141 | 142 | df = trace.events 143 | 144 | # nodes with a parent = 40 145 | assert len(df.loc[df["_parent"].notnull()]) == 40 146 | 147 | # nodes with children = 2 148 | assert len(df.loc[df["_children"].notnull()]) == 2 149 | 150 | 151 | def test_time_profile(data_dir, ping_pong_otf2_trace): 152 | trace = Trace.from_otf2(str(ping_pong_otf2_trace)) 153 | trace.calc_exc_metrics(["Timestamp (ns)"]) 154 | 155 | time_profile = trace.time_profile(num_bins=62) 156 | 157 | # check length 158 | assert len(time_profile) == 62 159 | 160 | # check bin sizes 161 | exp_duration = trace.events["Timestamp (ns)"].max() 162 | exp_bin_size = exp_duration / 62 163 | bin_sizes = time_profile["bin_end"] - time_profile["bin_start"] 164 | 165 | assert_allclose(bin_sizes, exp_bin_size) 166 | 167 | # check that sum of function contributions per bin equals bin duration 168 | exp_bin_total_duration = exp_bin_size * 2 169 | time_profile.drop(columns=["bin_start", "bin_end"], inplace=True) 170 | 171 | assert_allclose(time_profile.sum(axis=1), exp_bin_total_duration) 172 | 173 | # check for each function that sum of exc time per bin equals total exc time 174 | total_exc_times = trace.events.groupby("Name")["time.exc"].sum() 175 | 176 | for column in time_profile: 177 | if column == "idle_time": 178 | continue 179 | 180 | assert_allclose(time_profile[column].sum(), total_exc_times[column]) 181 | 182 | # check normalization 183 | norm = trace.time_profile(num_bins=62, normalized=True) 184 | norm.drop(columns=["bin_start", "bin_end"], inplace=True) 185 | 186 | assert (time_profile / exp_bin_total_duration).equals(norm) 187 | 188 | # check against ground truth 189 | # generated using Vampir's Function Summary chart (step size=16) 190 | assert_allclose(norm.loc[0]["int main(int, char**)"], 0.00299437, rtol=1e-05) 191 | assert_allclose(norm.loc[0]["MPI_Init"], 0.93999815) 192 | assert_allclose(norm.loc[0]["MPI_Comm_size"], 0.0) 193 | assert_allclose(norm.loc[0]["MPI_Comm_rank"], 0.0) 194 | assert_allclose(norm.loc[0]["MPI_Send"], 0.0) 195 | assert_allclose(norm.loc[0]["MPI_Recv"], 0.0) 196 | assert_allclose(norm.loc[0]["MPI_Finalize"], 0.0) 197 | 198 | assert_allclose(norm.loc[1:59]["int main(int, char**)"], 0.0) 199 | assert_allclose(norm.loc[1:59]["MPI_Init"], 1.0) 200 | assert_allclose(norm.loc[1:59]["MPI_Comm_size"], 0.0) 201 | assert_allclose(norm.loc[1:59]["MPI_Comm_rank"], 0.0) 202 | assert_allclose(norm.loc[1:59]["MPI_Send"], 0.0) 203 | assert_allclose(norm.loc[1:59]["MPI_Recv"], 0.0) 204 | assert_allclose(norm.loc[1:59]["MPI_Finalize"], 0.0) 205 | 206 | assert_allclose(norm.loc[60]["int main(int, char**)"], 0.39464799) 207 | assert_allclose(norm.loc[60]["MPI_Init"], 0.14843661) 208 | assert_allclose(norm.loc[60]["MPI_Send"], 0.24594134) 209 | assert_allclose(norm.loc[60]["MPI_Recv"], 0.21017099) 210 | assert_allclose(norm.loc[60]["MPI_Comm_size"], 0.00046047, rtol=1e-05) 211 | assert_allclose(norm.loc[60]["MPI_Comm_rank"], 0.00034261, rtol=1e-05) 212 | assert_allclose(norm.loc[60]["MPI_Finalize"], 0.0) 213 | 214 | assert_allclose(norm.loc[61]["int main(int, char**)"], 0.43560727) 215 | assert_allclose(norm.loc[61]["MPI_Init"], 0.0) 216 | assert_allclose(norm.loc[61]["MPI_Send"], 0.29640222) 217 | assert_allclose(norm.loc[61]["MPI_Recv"], 0.24300865) 218 | assert_allclose(norm.loc[61]["MPI_Comm_size"], 0.0) 219 | assert_allclose(norm.loc[61]["MPI_Comm_rank"], 0.0) 220 | assert_allclose(norm.loc[61]["MPI_Finalize"], 0.01614835, rtol=1e-05) 221 | -------------------------------------------------------------------------------- /pipit/trace.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022-2023 Parallel Software and Systems Group, University of 2 | # Maryland. See the top-level LICENSE file for details. 3 | # 4 | # SPDX-License-Identifier: MIT 5 | 6 | import numpy as np 7 | import pandas as pd 8 | from pipit.util.cct import create_cct 9 | 10 | 11 | class Trace: 12 | """ 13 | A trace dataset is read into an object of this type, which 14 | includes one or more dataframes and a calling context tree. 15 | """ 16 | 17 | def __init__(self, definitions, events, cct=None, parallelism_levels=None): 18 | """Create a new Trace object.""" 19 | self.definitions = definitions 20 | self.events = events 21 | self.cct = cct 22 | if parallelism_levels is None: 23 | self.parallelism_levels = ["Process"] 24 | else: 25 | assert isinstance(parallelism_levels, list) 26 | self.parallelism_levels = parallelism_levels 27 | 28 | # list of numeric columns which we can calculate inc/exc metrics with 29 | self.numeric_cols = list( 30 | self.events.select_dtypes(include=[np.number]).columns.values 31 | ) 32 | 33 | # will store columns names for inc/exc metrics 34 | self.inc_metrics = [] 35 | self.exc_metrics = [] 36 | 37 | def create_cct(self): 38 | # adds a column of cct nodes to the events dataframe 39 | # and stores the graph object in self.cct 40 | self.cct = create_cct(self.events) 41 | 42 | @staticmethod 43 | def from_otf2(dirname, num_processes=None, create_cct=False): 44 | """Read an OTF2 trace into a new Trace object.""" 45 | # import this lazily to avoid circular dependencies 46 | from .readers.otf2_reader import OTF2Reader 47 | 48 | return OTF2Reader(dirname, num_processes, create_cct).read() 49 | 50 | @staticmethod 51 | def from_hpctoolkit(dirname): 52 | """Read an HPCToolkit trace into a new Trace object.""" 53 | # import this lazily to avoid circular dependencies 54 | from .readers.hpctoolkit_reader import HPCToolkitReader 55 | 56 | return HPCToolkitReader(dirname).read() 57 | 58 | @staticmethod 59 | def from_projections(dirname, num_processes=None, create_cct=False): 60 | """Read a Projections trace into a new Trace object.""" 61 | # import this lazily to avoid circular dependencies 62 | from .readers.projections_reader import ProjectionsReader 63 | 64 | return ProjectionsReader(dirname, num_processes, create_cct).read() 65 | 66 | @staticmethod 67 | def from_nsight(filename, create_cct=False): 68 | """Read an Nsight trace into a new Trace object.""" 69 | # import this lazily to avoid circular dependencies 70 | from .readers.nsight_reader import NsightReader 71 | 72 | return NsightReader(filename, create_cct).read() 73 | 74 | @staticmethod 75 | def from_nsight_sqlite(filename, create_cct=False, trace_types="all"): 76 | """Read an Nsight trace into a new Trace object.""" 77 | # import this lazily to avoid circular dependencies 78 | from .readers.nsight_sqlite_reader import NSightSQLiteReader 79 | 80 | return NSightSQLiteReader(filename, create_cct, trace_types).read() 81 | 82 | @staticmethod 83 | def from_csv(filename): 84 | events_dataframe = pd.read_csv(filename, skipinitialspace=True) 85 | 86 | # if timestamps are in seconds, convert them to nanoseconds 87 | if "Timestamp (s)" in events_dataframe.columns: 88 | events_dataframe["Timestamp (s)"] *= 10**9 89 | events_dataframe.rename( 90 | columns={"Timestamp (s)": "Timestamp (ns)"}, inplace=True 91 | ) 92 | 93 | # ensure that ranks are ints 94 | events_dataframe = events_dataframe.astype({"Process": "int32"}) 95 | 96 | # make certain columns categorical 97 | events_dataframe = events_dataframe.astype( 98 | { 99 | "Event Type": "category", 100 | "Name": "category", 101 | "Process": "category", 102 | } 103 | ) 104 | 105 | # sort the dataframe by Timestamp 106 | events_dataframe.sort_values( 107 | by="Timestamp (ns)", axis=0, ascending=True, inplace=True, ignore_index=True 108 | ) 109 | 110 | return Trace(None, events_dataframe) 111 | 112 | def to_chrome(self, filename=None): 113 | """Export as Chrome Tracing JSON, which can be opened 114 | in Perfetto.""" 115 | from .writers.chrome_writer import ChromeWriter 116 | 117 | return ChromeWriter(self, filename).write() 118 | 119 | def _match_events(self): 120 | """Matches corresponding enter/leave events and adds two columns to the 121 | dataframe: _matching_event and _matching_timestamp 122 | """ 123 | 124 | if "_matching_event" not in self.events.columns: 125 | matching_events = [float("nan")] * len(self.events) 126 | matching_times = [float("nan")] * len(self.events) 127 | 128 | # only pairing enter and leave rows 129 | enter_leave_df = self.events.loc[ 130 | self.events["Event Type"].isin(["Enter", "Leave"]) 131 | ] 132 | 133 | # list of processes and/or threads to iterate over 134 | if "Thread" in self.events.columns: 135 | exec_locations = set(zip(self.events["Process"], self.events["Thread"])) 136 | has_thread = True 137 | else: 138 | exec_locations = set(self.events["Process"]) 139 | has_thread = False 140 | 141 | for curr_loc in exec_locations: 142 | # only filter by thread if the trace has a thread column 143 | if has_thread: 144 | curr_process, curr_thread = curr_loc 145 | filtered_df = enter_leave_df.loc[ 146 | (enter_leave_df["Process"] == curr_process) 147 | & (enter_leave_df["Thread"] == curr_thread) 148 | ] 149 | else: 150 | filtered_df = enter_leave_df.loc[ 151 | (enter_leave_df["Process"] == curr_loc) 152 | ] 153 | 154 | stack = [] 155 | 156 | # Note: The reason that we are creating lists that are 157 | # copies of the dataframe columns below and iterating over 158 | # those instead of using pandas iterrows is due to an 159 | # observed improvement in performance when using lists. 160 | 161 | event_types = list(filtered_df["Event Type"]) 162 | df_indices, timestamps, names = ( 163 | list(filtered_df.index), 164 | list(filtered_df["Timestamp (ns)"]), 165 | list(filtered_df.Name), 166 | ) 167 | 168 | # Iterate through all events of filtered DataFrame 169 | for i in range(len(filtered_df)): 170 | curr_df_index, curr_timestamp, evt_type, curr_name = ( 171 | df_indices[i], 172 | timestamps[i], 173 | event_types[i], 174 | names[i], 175 | ) 176 | 177 | if evt_type == "Enter": 178 | # Add current dataframe index and timestamp to stack 179 | stack.append((curr_df_index, curr_timestamp, curr_name)) 180 | else: 181 | # we want to iterate through the stack in reverse order 182 | # until we find the corresponding "Enter" Event 183 | enter_name, i = None, len(stack) - 1 184 | while enter_name != curr_name and i > -1: 185 | enter_df_index, enter_timestamp, enter_name = stack[i] 186 | i -= 1 187 | 188 | if enter_name == curr_name: 189 | # remove matched event from the stack 190 | del stack[i + 1] 191 | 192 | # Fill in the lists with the matching values if event found 193 | matching_events[enter_df_index] = curr_df_index 194 | matching_events[curr_df_index] = enter_df_index 195 | 196 | matching_times[enter_df_index] = curr_timestamp 197 | matching_times[curr_df_index] = enter_timestamp 198 | else: 199 | continue 200 | 201 | self.events["_matching_event"] = matching_events 202 | self.events["_matching_timestamp"] = matching_times 203 | 204 | self.events = self.events.astype({"_matching_event": "Int32"}) 205 | 206 | def _match_caller_callee(self): 207 | """Matches callers (parents) to callees (children) and adds three 208 | columns to the dataframe: 209 | _depth, _parent, and _children 210 | _depth is the depth of the event in the call tree (starting from 0 for root) 211 | _parent is the dataframe index of a row's parent event. 212 | _children is a list of dataframe indices of a row's children events. 213 | """ 214 | 215 | if "_children" not in self.events.columns: 216 | # match events so we can 217 | # ignore unmatched ones 218 | self._match_events() 219 | 220 | def _match_caller_callee_by_level(filtered_df): 221 | # Matches caller/callee for each parallelism level 222 | children = np.array([None] * len(filtered_df)) 223 | depth, parent = [float("nan")] * len(filtered_df), [float("nan")] * len( 224 | filtered_df 225 | ) 226 | 227 | # Depth is the level in the 228 | # Call Tree starting from 0 229 | curr_depth = 0 230 | 231 | stack = [] 232 | event_types = list(filtered_df["Event Type"]) 233 | 234 | # loop through the events of the filtered dataframe 235 | for i in range(len(filtered_df)): 236 | evt_type = event_types[i] 237 | 238 | if evt_type == "Enter": 239 | if curr_depth > 0: # if event is a child of some other event 240 | parent_df_index = stack[-1] 241 | 242 | if children[parent_df_index] is None: 243 | # create a new list of children for the 244 | # parent if the current event is the first 245 | # child being added 246 | children[parent_df_index] = [filtered_df.index[i]] 247 | else: 248 | children[parent_df_index].append(filtered_df.index[i]) 249 | 250 | parent[i] = filtered_df.index[parent_df_index] 251 | 252 | depth[i] = curr_depth 253 | curr_depth += 1 254 | 255 | # add enter dataframe index to stack 256 | stack.append(i) 257 | else: 258 | # pop event off stack once matching leave found 259 | # Note: parent, and children for a leave row 260 | # can be found using the matching index that 261 | # corresponds to the enter row 262 | stack.pop() 263 | 264 | curr_depth -= 1 265 | 266 | new_df = filtered_df.copy() # don't mutate in transform! 267 | new_df["_depth"] = depth 268 | new_df["_parent"] = parent 269 | new_df["_children"] = children 270 | return new_df 271 | 272 | # only use enter and leave rows 273 | # to determine calling relationships 274 | enter_leave_mask = self.events["Event Type"].isin(["Enter", "Leave"]) & ( 275 | self.events["_matching_event"].notnull() 276 | ) 277 | enter_leave_df = self.events.loc[enter_leave_mask] 278 | 279 | # add dummy values for depth/parent/children 280 | # (otherwise loc won't insert the values) 281 | self.events["_depth"] = 0 282 | self.events["_parent"] = None 283 | self.events["_children"] = None 284 | self.events.loc[enter_leave_mask] = enter_leave_df.groupby( 285 | self.parallelism_levels, group_keys=False, dropna=False 286 | ).apply(_match_caller_callee_by_level) 287 | 288 | self.events = self.events.astype({"_depth": "Int32", "_parent": "Int32"}) 289 | self.events = self.events.astype({"_depth": "category", "_parent": "category"}) 290 | 291 | def calc_inc_metrics(self, columns=None): 292 | # if no columns are specified by the user, then we calculate 293 | # inclusive metrics for all the numeric columns in the trace 294 | columns = self.numeric_cols if columns is None else columns 295 | 296 | # pair enter and leave rows 297 | if "_matching_event" not in self.events.columns: 298 | self._match_events() 299 | 300 | # only filter to enters that have a matching event 301 | enter_df = self.events.loc[ 302 | (self.events["Event Type"] == "Enter") 303 | & (self.events["_matching_event"].notnull()) 304 | ] 305 | 306 | # calculate inclusive metric for each column specified 307 | for col in columns: 308 | # name of column for this inclusive metric 309 | metric_col_name = ("time" if col == "Timestamp (ns)" else col) + ".inc" 310 | 311 | if metric_col_name not in self.events.columns: 312 | # calculate the inclusive metric by subtracting 313 | # the values at the enter rows from the values 314 | # at the corresponding leave rows 315 | self.events.loc[ 316 | (self.events["_matching_event"].notnull()) 317 | & (self.events["Event Type"] == "Enter"), 318 | metric_col_name, 319 | ] = ( 320 | self.events[col][enter_df["_matching_event"]].values 321 | - enter_df[col].values 322 | ) 323 | 324 | self.inc_metrics.append(metric_col_name) 325 | 326 | def calc_exc_metrics(self, columns=None): 327 | # calculate exc metrics for all numeric columns if not specified 328 | columns = self.numeric_cols if columns is None else columns 329 | 330 | # match caller and callee rows 331 | self._match_caller_callee() 332 | 333 | # exclusive metrics only change for rows that have children 334 | filtered_df = self.events.loc[self.events["_children"].notnull()] 335 | parent_df_indices, children = ( 336 | list(filtered_df.index), 337 | filtered_df["_children"].to_list(), 338 | ) 339 | 340 | for col in columns: 341 | # get the corresponding inclusive column name for this metric 342 | inc_col_name = ("time" if col == "Timestamp (ns)" else col) + ".inc" 343 | if inc_col_name not in self.events.columns: 344 | self.calc_inc_metrics([col]) 345 | 346 | # name of column for this exclusive metric 347 | metric_col_name = ("time" if col == "Timestamp (ns)" else col) + ".exc" 348 | 349 | if metric_col_name not in self.events.columns: 350 | # exc metric starts out as a copy of the inc metric values 351 | exc_values = self.events[inc_col_name].copy() 352 | inc_values = self.events[inc_col_name] 353 | 354 | for i in range(len(filtered_df)): 355 | curr_parent_idx, curr_children = parent_df_indices[i], children[i] 356 | for child_idx in curr_children: 357 | # subtract each child's inclusive metric from the total 358 | # to calculate the exclusive metric for the parent 359 | 360 | # if the exclusive metric is time, we only want to subtract 361 | # the overlapping portion of time between the parent and child 362 | # this is important for e.g. GPUs where execution happens async 363 | # relative to e.g. a kernel launch 364 | inc_metric = inc_values[child_idx] 365 | if col == "Timestamp (ns)": 366 | # calculate overlap between 367 | # start of child event and end of parent event 368 | end_time = min( 369 | self.events.loc[curr_parent_idx, "_matching_timestamp"], 370 | self.events.loc[child_idx, "_matching_timestamp"], 371 | ) 372 | inc_metric = max( 373 | end_time - self.events.loc[child_idx, "Timestamp (ns)"], 374 | 0, 375 | ) 376 | exc_values[curr_parent_idx] -= inc_metric 377 | 378 | self.events[metric_col_name] = exc_values 379 | self.exc_metrics.append(metric_col_name) 380 | 381 | def comm_matrix(self, output="size"): 382 | """ 383 | Communication Matrix for Peer-to-Peer (P2P) MPI messages 384 | 385 | Arguments: 386 | 1) output - 387 | string to choose whether the communication volume should be measured 388 | by bytes transferred between two processes or the number of messages 389 | sent (two choices - "size" or "count") 390 | 391 | Returns: 392 | Creates three lists - sender ranks, receiver ranks, and message volume. 393 | All of these lists are the length of the number of messages sent in the trace. 394 | It then loops through these lists containing individual message pairs 395 | and volume for those messages and updates the comm matrix. 396 | 397 | Finally, a 2D Numpy Array that represents the communication matrix for all P2P 398 | messages of the given trace is returned. 399 | 400 | Note: 401 | The first dimension of the returned 2d array 402 | is senders and the second dimension is receivers 403 | ex) comm_matrix[sender_rank][receiver_rank] 404 | """ 405 | 406 | # get the list of ranks/processes 407 | # (mpi messages are sent between processes) 408 | ranks = set(self.events["Process"]) 409 | 410 | # create a 2d numpy array that will be returned 411 | # at the end of the function 412 | communication_matrix = np.zeros(shape=(len(ranks), len(ranks))) 413 | 414 | # filter the dataframe by MPI Send and Isend events 415 | sender_dataframe = self.events.loc[ 416 | self.events["Name"].isin(["MpiSend", "MpiIsend"]), 417 | ["Process", "Attributes"], 418 | ] 419 | 420 | # get the mpi ranks of all the sender processes 421 | # the length of the list is the total number of messages sent 422 | sender_ranks = sender_dataframe["Process"].to_list() 423 | 424 | # get the corresponding mpi ranks of the receivers 425 | # the length of the list is the total number of messages sent 426 | receiver_ranks = ( 427 | sender_dataframe["Attributes"] 428 | .apply(lambda attrDict: attrDict["receiver"]) 429 | .to_list() 430 | ) 431 | 432 | # the length of the message_volume list created below 433 | # is the total number of messages sent 434 | 435 | # number of bytes communicated for each message sent 436 | if output == "size": 437 | # (1 communication is a single row in the sender dataframe) 438 | message_volume = ( 439 | sender_dataframe["Attributes"] 440 | .apply(lambda attrDict: attrDict["msg_length"]) 441 | .to_list() 442 | ) 443 | elif output == "count": 444 | # 1 message between the pairs of processes 445 | # for each row in the sender dataframe 446 | message_volume = np.full(len(sender_dataframe), 1) 447 | 448 | for i in range(len(sender_ranks)): 449 | """ 450 | loops through all the communication events and adds the 451 | message volume to the corresponding entry of the 2d array 452 | using the sender and receiver ranks 453 | """ 454 | communication_matrix[sender_ranks[i], receiver_ranks[i]] += message_volume[ 455 | i 456 | ] 457 | 458 | return communication_matrix 459 | 460 | def message_histogram(self, bins=20, **kwargs): 461 | """Generates histogram of message frequency by size.""" 462 | 463 | # Filter by send events 464 | # TODO: replace with str.match 465 | messages = self.events[self.events["Name"].isin(["MpiSend", "MpiIsend"])] 466 | 467 | # Get message sizes 468 | sizes = messages["Attributes"].map(lambda x: x["msg_length"]) 469 | 470 | return np.histogram(sizes, bins=bins, **kwargs) 471 | 472 | def comm_over_time(self, output="size", message_type="send", bins=50, **kwargs): 473 | """Returns histogram of communication volume over time. 474 | 475 | Args: 476 | output (str, optional). Whether to calculate communication by "count" or 477 | "size". Defaults to "size". 478 | 479 | message_type (str, optional): Whether to compute for sends or 480 | receives. Defaults to "send". 481 | 482 | bins (int, optional): Number of bins in the histogram. Defaults to 483 | 50. 484 | 485 | Returns: 486 | hist: Volume in size or number of messages in each time interval 487 | edges: Edges of time intervals 488 | """ 489 | # Filter by send or receive events 490 | events = self.events[ 491 | self.events["Name"].isin( 492 | ["MpiSend", "MpiIsend"] 493 | if message_type == "send" 494 | else ["MpiRecv", "MpiIrecv"] 495 | ) 496 | ] 497 | 498 | # Get timestamps and sizes 499 | timestamps = events["Timestamp (ns)"] 500 | sizes = events["Attributes"].apply(lambda x: x["msg_length"]) 501 | 502 | return np.histogram( 503 | timestamps, 504 | bins=bins, 505 | weights=sizes.tolist() if output == "size" else None, 506 | range=[ 507 | self.events["Timestamp (ns)"].min(), 508 | self.events["Timestamp (ns)"].max(), 509 | ], 510 | **kwargs 511 | ) 512 | 513 | def comm_by_process(self, output="size"): 514 | """Returns total communication volume in size or number of messages per 515 | process. 516 | 517 | Returns: 518 | pd.DataFrame: DataFrame containing total communication volume or 519 | number of messags sent and received by each process. 520 | """ 521 | comm_matrix = self.comm_matrix(output=output) 522 | 523 | # Get total sent and received for each process 524 | sent = comm_matrix.sum(axis=1) 525 | received = comm_matrix.sum(axis=0) 526 | 527 | return pd.DataFrame({"Sent": sent, "Received": received}).rename_axis("Process") 528 | 529 | def flat_profile( 530 | self, metrics="time.exc", groupby_column="Name", per_process=False 531 | ): 532 | """ 533 | Arguments: 534 | metrics - a string or list of strings containing the metrics to be aggregated 535 | groupby_column - a string or list containing the columns to be grouped by 536 | 537 | Returns: 538 | A Pandas DataFrame that will have the aggregated metrics 539 | for the grouped by columns. 540 | """ 541 | 542 | metrics = [metrics] if not isinstance(metrics, list) else metrics 543 | 544 | # calculate inclusive time if needed 545 | if "time.inc" in metrics: 546 | self.calc_inc_metrics(["Timestamp (ns)"]) 547 | 548 | # calculate exclusive time if needed 549 | if "time.exc" in metrics: 550 | self.calc_exc_metrics(["Timestamp (ns)"]) 551 | 552 | # This first groups by both the process and the specified groupby 553 | # column (like name). It then sums up the metrics for each combination 554 | # of the process and the groupby column. 555 | if per_process: 556 | return ( 557 | self.events.loc[self.events["Event Type"] == "Enter"] 558 | .groupby([groupby_column] + self.parallelism_levels, observed=True)[ 559 | metrics 560 | ] 561 | .sum() 562 | ) 563 | else: 564 | return ( 565 | self.events.loc[self.events["Event Type"] == "Enter"] 566 | .groupby([groupby_column] + self.parallelism_levels, observed=True)[ 567 | metrics 568 | ] 569 | .sum() 570 | .groupby(groupby_column) 571 | .mean() 572 | ) 573 | 574 | def load_imbalance(self, metric="time.exc", num_processes=1): 575 | """ 576 | Arguments: 577 | metric - a string denoting the metric to calculate load imbalance for 578 | num_processes - the number of ranks to display for each function that have the 579 | highest load imbalances 580 | 581 | Returns: 582 | A Pandas DataFrame indexed by function name that will have two columns: 583 | one containing the imbalance which (max / mean) time for all ranks 584 | and the other containing a list of num_processes ranks with the highest 585 | imbalances 586 | """ 587 | 588 | num_ranks = len(set(self.events["Process"])) 589 | num_display = num_ranks if num_processes > num_ranks else num_processes 590 | 591 | flat_profile = self.flat_profile(metrics=metric, per_process=True) 592 | 593 | imbalance_dict = dict() 594 | 595 | imb_metric = metric + ".imbalance" 596 | imb_ranks = "Top processes" 597 | mean_metric = metric + ".mean" 598 | 599 | imbalance_dict[imb_metric] = [] 600 | imbalance_dict[imb_ranks] = [] 601 | imbalance_dict[mean_metric] = [] 602 | 603 | functions = set(self.events.loc[self.events["Event Type"] == "Enter"]["Name"]) 604 | for function in functions: 605 | curr_series = flat_profile.loc[function] 606 | 607 | top_n = curr_series.sort_values(ascending=False).iloc[0:num_display] 608 | 609 | imbalance_dict[mean_metric].append(curr_series.mean()) 610 | imbalance_dict[imb_metric].append(top_n.values[0] / curr_series.mean()) 611 | imbalance_dict[imb_ranks].append(list(top_n.index)) 612 | 613 | imbalance_df = pd.DataFrame(imbalance_dict) 614 | imbalance_df.index = functions 615 | imbalance_df.sort_values(by=mean_metric, axis=0, inplace=True, ascending=False) 616 | 617 | return imbalance_df 618 | 619 | def idle_time(self, idle_functions=["Idle"], mpi_events=False): 620 | # calculate inclusive metrics 621 | if "time.inc" not in self.events.columns: 622 | self.calc_inc_metrics() 623 | 624 | if mpi_events: 625 | idle_functions += ["MPI_Wait", "MPI_Waitall", "MPI_Recv"] 626 | 627 | def calc_idle_time(events): 628 | # assumes events is sorted by time 629 | 630 | # Calculate idle time due to gaps in between events 631 | # This is the total time minus exclusive time spent in functions 632 | total_time = events["Timestamp (ns)"].max() - events["Timestamp (ns)"].min() 633 | 634 | idle_time = total_time - events["time.exc"].sum() 635 | 636 | # Calculate idle time due to idle_functions 637 | idle_time += events[events["Name"].isin(idle_functions)]["time.inc"].sum() 638 | return idle_time 639 | 640 | return ( 641 | self.events.groupby(self.parallelism_levels, dropna=False) 642 | .apply( 643 | calc_idle_time, 644 | ) 645 | .rename("idle_time") 646 | ) 647 | 648 | def _calculate_idle_time_for_process( 649 | self, process, idle_functions=["Idle"], mpi_events=False 650 | ): 651 | # calculate inclusive metrics 652 | if "time.inc" not in self.events.columns: 653 | self.calc_inc_metrics() 654 | 655 | if mpi_events: 656 | idle_functions += ["MPI_Wait", "MPI_Waitall", "MPI_Recv"] 657 | # filter the dataframe to include only 'Enter' events within the specified 658 | # process with the specified function names 659 | df = self.events 660 | filtered_df = ( 661 | df.loc[df["Event Type"] == "Enter"] 662 | .loc[df["Process"] == process] 663 | .loc[df["Name"].isin(idle_functions)] 664 | ) 665 | # get the sum of the inclusive times of these events 666 | return filtered_df["time.inc"].sum() 667 | 668 | def time_profile(self, num_bins=50, normalized=False): 669 | """Computes time contributed by each function per time interval. 670 | 671 | Args: 672 | num_bins (int, optional): Number of evenly-sized time intervals to compute 673 | time profile for. Defaults to 50. 674 | normalized (bool, optional): Whether to return time contribution as 675 | percentage of time interval. Defaults to False. 676 | 677 | Returns: 678 | pd.DataFrame: Time profile of each function, where each column 679 | represents a function, and each row represents a time interval. 680 | """ 681 | # Generate metrics 682 | self._match_caller_callee() 683 | self.calc_inc_metrics(["Timestamp (ns)"]) 684 | 685 | # Filter by Enter rows 686 | events = self.events[self.events["Event Type"] == "Enter"].copy(deep=False) 687 | names = events["Name"].unique().tolist() 688 | 689 | # Create equal-sized bins 690 | edges = np.linspace( 691 | self.events["Timestamp (ns)"].min(), 692 | self.events["Timestamp (ns)"].max(), 693 | num_bins + 1, 694 | ) 695 | bin_size = edges[1] - edges[0] 696 | 697 | total_bin_duration = bin_size * len(events["Process"].unique()) 698 | 699 | profile = [] 700 | 701 | def calc_exc_time_in_bin(events): 702 | # TODO: check if the numpy equivalent of the below code is faster 703 | dfx_to_idx = { 704 | dfx: idx 705 | for (dfx, idx) in zip(events.index, [i for i in range(len(events))]) 706 | } 707 | 708 | # start out with exc times being a copy of inc times 709 | exc_times = list(events["inc_time_in_bin"].copy(deep=False)) 710 | 711 | # filter to events that have children 712 | filtered_df = events.loc[events["_children"].notnull()] 713 | 714 | parent_df_indices, children = ( 715 | list(filtered_df.index), 716 | filtered_df["_children"].to_list(), 717 | ) 718 | 719 | # Iterate through the events that are parents 720 | for i in range(len(filtered_df)): 721 | curr_parent_idx, curr_children = ( 722 | dfx_to_idx[parent_df_indices[i]], 723 | children[i], 724 | ) 725 | 726 | # Only consider inc times of children in current bin 727 | for child_df_idx in curr_children: 728 | if child_df_idx in dfx_to_idx: 729 | exc_times[curr_parent_idx] -= exc_times[ 730 | dfx_to_idx[child_df_idx] 731 | ] 732 | 733 | events["exc_time_in_bin"] = exc_times 734 | 735 | # For each bin, determine each function's time contribution 736 | for i in range(num_bins): 737 | start = edges[i] 738 | end = edges[i + 1] 739 | 740 | # Find functions that belong in this bin 741 | in_bin = events[ 742 | (events["_matching_timestamp"] > start) 743 | & (events["Timestamp (ns)"] < end) 744 | ].copy(deep=False) 745 | 746 | # Calculate inc_time_in_bin for each function 747 | # Case 1 - Function starts in bin 748 | in_bin.loc[in_bin["Timestamp (ns)"] >= start, "inc_time_in_bin"] = ( 749 | end - in_bin["Timestamp (ns)"] 750 | ) 751 | 752 | # Case 2 - Function ends in bin 753 | in_bin.loc[in_bin["_matching_timestamp"] <= end, "inc_time_in_bin"] = ( 754 | in_bin["_matching_timestamp"] - start 755 | ) 756 | 757 | # Case 3 - Function spans bin 758 | in_bin.loc[ 759 | (in_bin["Timestamp (ns)"] < start) 760 | & (in_bin["_matching_timestamp"] > end), 761 | "inc_time_in_bin", 762 | ] = ( 763 | end - start 764 | ) 765 | 766 | # Case 4 - Function contained in bin 767 | in_bin.loc[ 768 | (in_bin["Timestamp (ns)"] >= start) 769 | & (in_bin["_matching_timestamp"] <= end), 770 | "inc_time_in_bin", 771 | ] = ( 772 | in_bin["_matching_timestamp"] - in_bin["Timestamp (ns)"] 773 | ) 774 | 775 | # Calculate exc_time_in_bin by subtracting inc_time_in_bin for all children 776 | calc_exc_time_in_bin(in_bin) 777 | 778 | # Sum across all processes 779 | agg = in_bin.groupby("Name")["exc_time_in_bin"].sum() 780 | profile.append(agg.to_dict()) 781 | 782 | # Convert to DataFrame 783 | df = pd.DataFrame(profile, columns=names) 784 | 785 | # Add idle_time column 786 | df.insert(0, "idle_time", total_bin_duration - df.sum(axis=1)) 787 | 788 | # Threshold for zero 789 | df.mask(df < 0.01, 0, inplace=True) 790 | 791 | # Normalize 792 | if normalized: 793 | df /= total_bin_duration 794 | 795 | # Add bin_start and bin_end 796 | df.insert(0, "bin_start", edges[:-1]) 797 | df.insert(0, "bin_end", edges[1:]) 798 | 799 | return df 800 | 801 | @staticmethod 802 | def multirun_analysis( 803 | traces, metric_column="Timestamp (ns)", groupby_column="Name" 804 | ): 805 | """ 806 | Arguments: 807 | traces - list of pipit traces 808 | metric_column - the column of the metric to be aggregated over 809 | groupby_column - the column that will be grouped by before aggregation 810 | 811 | Returns: 812 | A Pandas DataFrame indexed by the number of processes in the traces, the 813 | columns are the groups of the groupby_column, and the entries of the DataFrame 814 | are the aggregated metrics corresponding to the respective trace and group 815 | """ 816 | 817 | # for each trace, collect a flat profile 818 | flat_profiles = [] 819 | for trace in traces: 820 | trace.calc_exc_metrics([metric_column]) 821 | metric_col = ( 822 | "time.exc" 823 | if metric_column == "Timestamp (ns)" 824 | else metric_column + ".exc" 825 | ) 826 | flat_profiles.append( 827 | trace.flat_profile(metrics=[metric_col], groupby_column=groupby_column) 828 | ) 829 | 830 | # combine these flat profiles and index them by number of processes 831 | combined_df = pd.concat([fp[metric_col] for fp in flat_profiles], axis=1).T 832 | combined_df.index = [len(set(trace.events["Process"])) for trace in traces] 833 | combined_df.index.rename("Number of Processes", inplace=True) 834 | 835 | # sort the columns/groups in descending order of the aggregated metric values 836 | function_sums = combined_df.sum() 837 | combined_df = combined_df[function_sums.sort_values(ascending=False).index] 838 | 839 | return combined_df 840 | 841 | def detect_pattern( 842 | self, 843 | start_event, 844 | iterations=None, 845 | window_size=None, 846 | process=0, 847 | metric="time.exc", 848 | ): 849 | import stumpy 850 | 851 | enter_events = self.events[ 852 | (self.events["Name"] == start_event) 853 | & (self.events["Event Type"] == "Enter") 854 | & (self.events["Process"] == process) 855 | ] 856 | 857 | leave_events = self.events[ 858 | (self.events["Name"] == start_event) 859 | & (self.events["Event Type"] == "Leave") 860 | & (self.events["Process"] == process) 861 | ] 862 | 863 | # count the number of enter events to 864 | # determine the number of iterations if it's not 865 | # given by the user. 866 | if iterations is None: 867 | iterations = len(enter_events) 868 | 869 | # get the first enter and last leave of 870 | # the given event. we will only investigate 871 | # this portion of the data. 872 | first_loop_enter = enter_events.index[0] 873 | last_loop_leave = leave_events.index[-1] 874 | 875 | df = self.events.iloc[first_loop_enter + 1 : last_loop_leave] 876 | filtered_df = df.loc[(df[metric].notnull()) & (df["Process"] == process)] 877 | y = filtered_df[metric].values[:] 878 | 879 | if window_size is None: 880 | window_size = int(len(y) / iterations) 881 | 882 | matrix_profile = stumpy.stump(y, window_size) 883 | dists, indices = stumpy.motifs(y, matrix_profile[:, 0], max_matches=iterations) 884 | 885 | # Gets the corresponding portion from the original 886 | # dataframe for each pattern. 887 | patterns = [] 888 | for idx in indices[0]: 889 | end_idx = idx + window_size 890 | 891 | match_original = self.events.loc[ 892 | self.events["Timestamp (ns)"].isin( 893 | filtered_df.iloc[idx:end_idx]["Timestamp (ns)"].values 894 | ) 895 | ] 896 | patterns.append(match_original) 897 | 898 | return patterns 899 | -------------------------------------------------------------------------------- /pipit/util/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2023 Parallel Software and Systems Group, University of Maryland. 2 | # See the top-level LICENSE file for details. 3 | # 4 | # SPDX-License-Identifier: MIT 5 | -------------------------------------------------------------------------------- /pipit/util/cct.py: -------------------------------------------------------------------------------- 1 | # Copyright 2023 Parallel Software and Systems Group, University of Maryland. 2 | # See the top-level LICENSE file for details. 3 | # 4 | # SPDX-License-Identifier: MIT 5 | 6 | from pipit.graph import Graph, Node 7 | 8 | 9 | def create_cct(events): 10 | """ 11 | Generic function to iterate through the events dataframe and create a CCT. 12 | Uses pipit's graph data structure for this. Returns a CCT 13 | and creates a new column in the Events DataFrame that stores 14 | a reference to each row's corresponding node in the CCT. 15 | """ 16 | 17 | # CCT and list of nodes in DataFrame 18 | graph = Graph() 19 | graph_nodes = [None for i in range(len(events))] 20 | 21 | # determines whether a node exists or not 22 | callpath_to_node = dict() 23 | 24 | node_id = 0 # each node has a unique id 25 | 26 | # Filter the DataFrame to only Enter/Leave 27 | enter_leave_df = events.loc[events["Event Type"].isin(["Enter", "Leave"])] 28 | 29 | # list of processes and/or threads to iterate over 30 | if "Thread" in events.columns: 31 | exec_locations = set(zip(events["Process"], events["Thread"])) 32 | has_thread = True 33 | else: 34 | exec_locations = set(events["Process"]) 35 | has_thread = False 36 | 37 | for curr_loc in exec_locations: 38 | # only filter by thread if the trace has a thread column 39 | if has_thread: 40 | curr_process, curr_thread = curr_loc 41 | filtered_df = enter_leave_df.loc[ 42 | (enter_leave_df["Process"] == curr_process) 43 | & (enter_leave_df["Thread"] == curr_thread) 44 | ] 45 | else: 46 | filtered_df = enter_leave_df.loc[(enter_leave_df["Process"] == curr_loc)] 47 | 48 | curr_depth, callpath = 0, "" 49 | 50 | """ 51 | Iterating over lists instead of 52 | DataFrame columns is more efficient 53 | """ 54 | df_indices = filtered_df.index.to_list() 55 | function_names = filtered_df["Name"].to_list() 56 | event_types = filtered_df["Event Type"].to_list() 57 | 58 | # stacks used to iterate through the trace and add nodes to the cct 59 | functions_stack, nodes_stack = [], [] 60 | 61 | # iterating over the events of the current thread's trace 62 | for i in range(len(filtered_df)): 63 | curr_df_index, evt_type, function_name = ( 64 | df_indices[i], 65 | event_types[i], 66 | function_names[i], 67 | ) 68 | 69 | # encounter a new function through its entry point. 70 | if evt_type == "Enter": 71 | # add the function to the stack and get the call path 72 | functions_stack.append(function_name) 73 | callpath = "->".join(functions_stack) 74 | 75 | # get the parent node of the function if it exists 76 | parent_node = None if curr_depth == 0 else nodes_stack[-1] 77 | 78 | if callpath in callpath_to_node: 79 | # don't create new node if callpath is in map 80 | curr_node = callpath_to_node[callpath] 81 | else: 82 | # create new node if callpath isn't in map 83 | curr_node = Node(node_id, parent_node, curr_depth) 84 | callpath_to_node[callpath] = curr_node 85 | node_id += 1 86 | 87 | # add node as root or child of its 88 | # parent depending on current depth 89 | ( 90 | graph.add_root(curr_node) 91 | if curr_depth == 0 92 | else parent_node.add_child(curr_node) 93 | ) 94 | 95 | # Update nodes stack, column, and current depth 96 | nodes_stack.append(curr_node) 97 | graph_nodes[curr_df_index] = curr_node 98 | curr_depth += 1 99 | else: 100 | # we want to iterate through the stack in reverse order 101 | # until we find the corresponding "Enter" Event 102 | enter_name, j = None, len(functions_stack) - 1 103 | while enter_name != function_name and j > -1: 104 | enter_name = functions_stack[j] 105 | j -= 1 106 | 107 | if enter_name == function_name: 108 | # update stacks and current depth 109 | del functions_stack[j + 1] 110 | del nodes_stack[j + 1] 111 | curr_depth -= 1 112 | else: 113 | continue 114 | 115 | # Update the Trace with the generated cct 116 | events["Graph_Node"] = graph_nodes 117 | 118 | return graph 119 | -------------------------------------------------------------------------------- /pipit/util/config.py: -------------------------------------------------------------------------------- 1 | # Copyright 2023 Parallel Software and Systems Group, University of Maryland. 2 | # See the top-level LICENSE file for details. 3 | # 4 | # SPDX-License-Identifier: MIT 5 | 6 | 7 | # Validator to check if the value entered is of type bool 8 | def bool_validator(key, value): 9 | if type(value) is not bool: 10 | raise TypeError( 11 | ( 12 | 'Error loading configuration: The Value "{}" for Configuration "{}"' 13 | + "must be of type Bool" 14 | ).format(value, key) 15 | ) 16 | else: 17 | return True 18 | 19 | 20 | # Validator to check if the value entered is of type string 21 | def str_validator(key, value): 22 | if type(value) is not str: 23 | raise TypeError( 24 | ( 25 | 'Error loading configuration: The Value "{}" for Configuration "{}"' 26 | + "must be of type string" 27 | ).format(value, key) 28 | ) 29 | else: 30 | return True 31 | 32 | 33 | # Validator to check if the value entered is of type int 34 | def int_validator(key, value): 35 | if type(value) is not int: 36 | raise TypeError( 37 | ( 38 | 'Error loading configuration: The Value "{}" for Configuration "{}"' 39 | + "must be of type int" 40 | ).format(value, key) 41 | ) 42 | if key == "depth" and value < 1: 43 | raise ValueError("Depth must be greater than 1") 44 | return True 45 | 46 | 47 | # Validator to check if the value entered is of type float 48 | def float_validator(key, value): 49 | if type(value) is not float: 50 | raise TypeError( 51 | ( 52 | 'Error loading configuration: The Value "{}" for Configuration "{}"' 53 | + "must be of type float" 54 | ).format(value, key) 55 | ) 56 | else: 57 | return True 58 | 59 | 60 | # Validator to check if the value entered is a valid log level 61 | def log_level_validator(key, value): 62 | if value not in ["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"]: 63 | raise ValueError( 64 | ( 65 | 'Error loading configuration: The Value "{}" for Configuration "{}"' 66 | + "must be a valid log level" 67 | ).format(value, key) 68 | ) 69 | else: 70 | return True 71 | 72 | 73 | # Validator to check if the value entered is a valid URL 74 | def url_validator(key, value): 75 | if value.startswith("http://") or value.startswith("https://"): 76 | return True 77 | else: 78 | raise ValueError( 79 | ( 80 | 'Error loading configuration: The Value "{}" for Configuration "{}"' 81 | + "must be a valid URL" 82 | ).format(value, key) 83 | ) 84 | 85 | 86 | registered_options = { 87 | "log_level": { 88 | "default": "INFO", 89 | "validator": log_level_validator, 90 | }, 91 | "notebook_url": { 92 | "default": "http://localhost:8888", 93 | "validator": url_validator, 94 | }, 95 | } 96 | 97 | global_config = {key: registered_options[key]["default"] for key in registered_options} 98 | 99 | 100 | # Returns the current value of the specific config key 101 | def get_option(key): 102 | if not key or key not in registered_options: 103 | raise ValueError("No such keys(s)") 104 | else: 105 | return global_config[key] 106 | 107 | 108 | # Updates the value of the specified key 109 | def set_option(key, val): 110 | if not key or key not in registered_options: 111 | raise ValueError("No such keys(s)") 112 | 113 | validator = registered_options[key]["validator"] 114 | 115 | if validator(key, val): 116 | global_config[key] = val 117 | 118 | 119 | # Resets the value of the specfied key 120 | # If "all" is passed in, resets values of all keys 121 | def reset_option(key): 122 | if not key: 123 | raise ValueError("No such keys(s)") 124 | 125 | if key in registered_options: 126 | global_config[key] = registered_options[key]["default"] 127 | elif key == "all": 128 | for k in registered_options: 129 | global_config[k] = registered_options[k]["default"] 130 | else: 131 | raise ValueError( 132 | "You must specify a valid key. Or, use the special keyword " 133 | '"all" to reset all the options to their default value' 134 | ) 135 | -------------------------------------------------------------------------------- /pipit/writers/chrome_writer.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | 3 | 4 | class ChromeWriter: 5 | """Exports traces to the Chrome Tracing JSON format which can be opened with Chrome 6 | Trace Viewer and Perfetto for analysis using these tools. 7 | 8 | This exports to the older Chrome Tracing JSON format which is still supported by 9 | Perfetto, and not the newer Perfetto binary format. 10 | 11 | See https://docs.google.com/document/d/1CvAClvFfyA5R-PhYUmn5OOQtYMH4h6I0nSsKchNAySU/preview # noqa 12 | """ 13 | 14 | def __init__(self, trace, filename="trace.json"): 15 | self.trace = trace 16 | self.filename = filename 17 | 18 | def write(self): 19 | events = self.trace.events 20 | 21 | # Assign the fields as expected by the Chrome Tracing JSON format 22 | # Let's create a new dataframe to avoid modifying the original 23 | df = pd.DataFrame() 24 | 25 | # "name" represents the event name 26 | df["name"] = events["Name"] 27 | 28 | # "ph" represents event type -- also called "phase" 29 | # Rename Enter events to "B" (begin), Leave events to "E" (end), 30 | # and Instant events to "i" 31 | df["ph"] = events["Event Type"].replace( 32 | ["Enter", "Leave", "Instant"], ["B", "E", "i"] 33 | ) 34 | 35 | # "ts" represents is the timestamp (in microseconds) of the event 36 | df["ts"] = (events["Timestamp (ns)"] / 1e3).astype(int) 37 | 38 | # "pid" represents the process ID for the process that the event occurs in 39 | df["pid"] = events["Process"] 40 | 41 | # "tid" represents the thread ID for the thread that the event occurs in 42 | if "Thread" in events.columns: 43 | df["tid"] = events["Thread"] 44 | 45 | # Put all of the additional event attributes into the "args" field 46 | if "Attributes" in events.columns: 47 | df["args"] = events["Attributes"] 48 | 49 | # Write the dataframe to a JSON file 50 | return df.to_json(path_or_buf=self.filename, orient="records") 51 | -------------------------------------------------------------------------------- /pytest.ini: -------------------------------------------------------------------------------- 1 | # Copyright 2022-2023 Parallel Software and Systems Group, University of 2 | # Maryland. See the top-level LICENSE file for details. 3 | # 4 | # SPDX-License-Identifier: MIT 5 | 6 | [pytest] 7 | addopts = --durations=20 -ra 8 | testpaths = pipit/tests 9 | python_files = *.py 10 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | numpy 2 | otf2 3 | pandas 4 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022-2023 Parallel Software and Systems Group, University of 2 | # Maryland. See the top-level LICENSE file for details. 3 | # 4 | # SPDX-License-Identifier: MIT 5 | 6 | from setuptools import setup 7 | 8 | setup( 9 | name="pipit", 10 | version="0.1.0", 11 | description="A Python library for analyzing parallel execution traces", 12 | url="https://github.com/hpcgroup/pipit", 13 | author="Abhinav Bhatele", 14 | author_email="bhatele@cs.umd.edu", 15 | license="MIT", 16 | classifiers=[ 17 | "Development Status :: 4 - Beta", 18 | "License :: OSI Approved :: MIT License", 19 | ], 20 | keywords="distributed computing, parallel computing, GPU traces", 21 | packages=["pipit", "pipit.readers", "pipit.tests", "pipit.util", "pipit.writers"], 22 | install_requires=[ 23 | "numpy", 24 | "otf2", 25 | "pandas", 26 | ], 27 | ) 28 | --------------------------------------------------------------------------------