├── .gitmodules
├── proto.lock
├── requirements-dev.txt
├── doc
    └── timeline_visualizer.png
├── tests
    ├── data
    │   ├── json_trace.tar.gz
    │   ├── 1.0.2-chakra.0.0.4.tgz
    │   └── feeder_tests_trace.tar.gz
    ├── trace_link
    │   ├── test_chakra_host_trace_loader.py
    │   ├── test_chakra_device_trace_loader.py
    │   ├── test_unique_id_assigner.py
    │   └── test_kineto_operator.py
    ├── jsonizer
    │   └── test_jsonizer.py
    ├── converter
    │   ├── test_pytorch_tensor.py
    │   ├── test_pytorch_node.py
    │   └── test_pytorch_converter.py
    ├── visualizer
    │   └── test_visualizer.py
    └── feeder
    │   ├── tests.cpp
    │   └── wrapper_tests.cpp
├── setup.cfg
├── .gitignore
├── .github
    ├── CODEOWNERS
    ├── ISSUE_TEMPLATE
    │   ├── general_question.md
    │   ├── feature_request.md
    │   └── bug_report.md
    ├── workflows
    │   ├── cpp_lint.yml
    │   ├── python_lint.yml
    │   ├── python_tests.yml
    │   ├── end_to_end_tests.yml
    │   ├── feeder_tests.yml
    │   ├── cla.yml
    │   └── codeql.yml
    └── PULL_REQUEST_TEMPLATE.md
├── setup.py
├── CONTRIBUTING.md
├── src
    ├── jsonizer
    │   └── jsonizer.py
    ├── feeder
    │   ├── et_feeder.h
    │   ├── wrapper_node.h
    │   ├── et_feeder_node.h
    │   ├── json_node.h
    │   ├── json_node.cpp
    │   ├── et_feeder.cpp
    │   └── et_feeder_node.cpp
    ├── trace_link
    │   ├── chakra_host_trace_loader.py
    │   ├── trace_link.py
    │   ├── unique_id_assigner.py
    │   ├── kineto_operator.py
    │   └── chakra_device_trace_loader.py
    ├── visualizer
    │   └── visualizer.py
    ├── converter
    │   ├── pytorch_tensor.py
    │   ├── converter.py
    │   └── pytorch_node.py
    ├── timeline_visualizer
    │   └── timeline_visualizer.py
    ├── third_party
    │   └── utils
    │   │   ├── protoio.hh
    │   │   ├── protolib.py
    │   │   └── protoio.cc
    └── generator
    │   └── generator.py
├── README.md
├── .clang-format
├── pyproject.toml
├── schema
    └── protobuf
    │   └── et_def.proto
├── USER_GUIDE.md
└── LICENSE.md


/.gitmodules:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/proto.lock:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/requirements-dev.txt:
--------------------------------------------------------------------------------
1 | pyright==1.1.359
2 | pytest==8.1.1
3 | ruff==0.3.7
4 | vulture==2.11
5 | 


--------------------------------------------------------------------------------
/doc/timeline_visualizer.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mlcommons/chakra/HEAD/doc/timeline_visualizer.png


--------------------------------------------------------------------------------
/tests/data/json_trace.tar.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mlcommons/chakra/HEAD/tests/data/json_trace.tar.gz


--------------------------------------------------------------------------------
/tests/data/1.0.2-chakra.0.0.4.tgz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mlcommons/chakra/HEAD/tests/data/1.0.2-chakra.0.0.4.tgz


--------------------------------------------------------------------------------
/tests/data/feeder_tests_trace.tar.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mlcommons/chakra/HEAD/tests/data/feeder_tests_trace.tar.gz


--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
1 | [build_grpc]
2 | proto_files = et_def.proto
3 | grpc_files = et_def.proto
4 | proto_path = schema/protobuf/
5 | output_path = schema/protobuf/
6 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | *.egg-info/
 2 | build/
 3 | *_pb2*.py*
 4 | *.pyc
 5 | __pycache__/
 6 | *.egg
 7 | *.et
 8 | *.dot
 9 | .pyre
10 | *et_def.pb.cc
11 | *et_def.pb.h


--------------------------------------------------------------------------------
/.github/CODEOWNERS:
--------------------------------------------------------------------------------
1 | # These owners will be the default owners for everything in the repo.
2 | # Unless a later match takes precedence,they will be requested for review when someone opens a pull request.
3 | * @mlcommons/wg-chakra
4 | 
5 | /CODEOWNERS @mlcommons/staff
6 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/general_question.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: General question
 3 | about: Ask a question or seek clarification about the project
 4 | title: ''
 5 | labels: 'question'
 6 | assignees: ''
 7 | ---
 8 | 
 9 | > Please provide a detailed description of your question or the information you seek.
10 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/feature_request.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Feature request
 3 | about: Suggest an idea for this project
 4 | title: ''
 5 | labels: ''
 6 | assignees: ''
 7 | ---
 8 | 
 9 | ## Problem Related to the Feature
10 | > A clear and concise description of what the problem is.
11 | 
12 | ## Proposed Solution
13 | > A clear and concise description of what you want to happen.
14 | 


--------------------------------------------------------------------------------
/.github/workflows/cpp_lint.yml:
--------------------------------------------------------------------------------
 1 | name: C++ Lint
 2 | 
 3 | on: pull_request
 4 | 
 5 | jobs:
 6 |   cpp-lint:
 7 |     runs-on: ubuntu-latest
 8 | 
 9 |     steps:
10 |     - name: Checkout Code
11 |       uses: actions/checkout@v2
12 | 
13 |     - name: Format and Lint C++ Code
14 |       uses: DoozyX/clang-format-lint-action@v0.18.1
15 |       with:
16 |         source: '.'
17 |         extensions: 'cc,cpp,h,hh'
18 |         clangFormatVersion: 16
19 |         style: file
20 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/bug_report.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Bug report
 3 | about: Create a report to help us improve
 4 | title: ''
 5 | labels: ''
 6 | assignees: ''
 7 | ---
 8 | 
 9 | ## Describe the Bug
10 | > A clear and concise description of what the bug is.
11 | 
12 | ## Steps to Reproduce
13 | > Steps to reproduce the behavior.
14 | > Please include the version information where the bug was observed.
15 | 
16 | ## Expected Behavior
17 | > A clear and concise description of what you expected to happen.
18 | 
19 | ## Screenshots
20 | > If applicable, add screenshots to help explain your problem.
21 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import setup
 2 | from setuptools.command.build import build
 3 | 
 4 | class build_grpc(build):
 5 |     """
 6 |     Custom build class to include gRPC build commands.
 7 | 
 8 |     This class modifies the default build process to include additional sub-commands
 9 |     necessary for building gRPC components.
10 | 
11 |     Attributes
12 |         sub_commands (list): List of sub-commands to be executed during the build process.
13 |     """
14 | 
15 |     sub_commands = [("build_grpc", None)] + build.sub_commands
16 | 
17 | 
18 | setup(cmdclass={"build": build_grpc})
19 | 


--------------------------------------------------------------------------------
/.github/workflows/python_lint.yml:
--------------------------------------------------------------------------------
 1 | name: Python Lint
 2 | 
 3 | on: pull_request
 4 | 
 5 | jobs:
 6 |   python-lint:
 7 |     runs-on: ubuntu-latest
 8 | 
 9 |     steps:
10 |     - name: Checkout Code
11 |       uses: actions/checkout@v2
12 | 
13 |     - name: Setup Python Environment
14 |       uses: actions/setup-python@v2
15 |       with:
16 |         python-version: '3.10.14'
17 | 
18 |     - name: Install Dependencies
19 |       run: |
20 |         pip install -r requirements-dev.txt
21 | 
22 |     - name: Lint Python Code
23 |       run: |
24 |         ruff format .
25 |         ruff check .
26 | 
27 |     - name: Run Pyright
28 |       run: |
29 |         pyright
30 | 
31 |     - name: Run vulture check
32 |       run: vulture src/ tests/
33 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | ## Contributing
 2 | 
 3 | The best way to contribute to the MLCommons is to get involved with one of our many project communities. You find more information about getting involved with MLCommons [here](https://mlcommons.org/en/get-involved/#getting-started). 
 4 | 
 5 | Generally we encourage people to become a MLCommons member if they wish to contribute to MLCommons projects, but outside pull requests are very welcome too.
 6 | 
 7 | Regardless of if you are a member, your organization needs to sign the MLCommons CLA. Please fill out this [CLA sign up form](https://forms.gle/Ew1KkBVpyeJDuRw67) form to get started.
 8 | 
 9 | MLCommons project work is tracked with issue trackers and pull requests. Modify the project in your own fork and issue a pull request once you want other developers to take a look at what you have done and discuss the proposed changes. Ensure that cla-bot and other checks pass for your Pull requests.
10 | 


--------------------------------------------------------------------------------
/.github/PULL_REQUEST_TEMPLATE.md:
--------------------------------------------------------------------------------
 1 | ## Summary
 2 | Provide a concise summary of the changes introduced by this pull request. Detail the purpose and scope of the changes, referencing any relevant issues or discussions. Explain how these changes address the problem or improve the project.
 3 | 
 4 | ## Test Plan
 5 | In this section, describe the testing you have performed to verify the changes. Include:
 6 | - A clear description of the testing environment.
 7 | - The steps you followed to test the new features or bug fixes.
 8 | - Any specific commands used during testing, along with their outputs.
 9 | - A description of the results and observations from your testing.
10 | This information is crucial for reviewers to understand how the changes have been validated.
11 | 
12 | ## Additional Notes
13 | Include any other notes or comments about the pull request here. This can include challenges faced, future considerations, or context that reviewers might find helpful.
14 | 


--------------------------------------------------------------------------------
/.github/workflows/python_tests.yml:
--------------------------------------------------------------------------------
 1 | name: Python Unit Tests
 2 | 
 3 | on: pull_request
 4 | 
 5 | jobs:
 6 |   python-tests:
 7 |     runs-on: ubuntu-latest
 8 | 
 9 |     steps:
10 |     - name: Checkout Code
11 |       uses: actions/checkout@v2
12 | 
13 |     - name: Setup Python Environment
14 |       uses: actions/setup-python@v2
15 |       with:
16 |         python-version: '3.10.14'
17 | 
18 |     - name: Install Chakra
19 |       run: |
20 |         pip install .
21 | 
22 |     - name: Install PARAM
23 |       run: |
24 |         git clone https://github.com/facebookresearch/param.git
25 |         cd param/et_replay
26 |         git checkout 7b19f586dd8b267333114992833a0d7e0d601630
27 |         pip install .
28 | 
29 |     - name: Install HTA
30 |       run: |
31 |         git clone https://github.com/facebookresearch/HolisticTraceAnalysis.git
32 |         cd HolisticTraceAnalysis
33 |         git checkout d731cc2e2249976c97129d409a83bd53d93051f6
34 |         git submodule update --init
35 |         pip install -r requirements.txt
36 |         pip install -e .
37 | 
38 |     - name: Install Dependencies
39 |       run: |
40 |         pip install -r requirements-dev.txt
41 | 
42 |     - name: Run Unit Tests
43 |       run: |
44 |         python -m pytest -vv tests
45 | 


--------------------------------------------------------------------------------
/src/jsonizer/jsonizer.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | 
 3 | from google.protobuf.json_format import MessageToJson
 4 | 
 5 | from ...schema.protobuf.et_def_pb2 import (
 6 |     GlobalMetadata,
 7 | )
 8 | from ...schema.protobuf.et_def_pb2 import (
 9 |     Node as ChakraNode,
10 | )
11 | from ..third_party.utils.protolib import decodeMessage as decode_message
12 | from ..third_party.utils.protolib import openFileRd as open_file_rd
13 | 
14 | 
15 | def main() -> None:
16 |     parser = argparse.ArgumentParser(description="Converts Chakra execution trace to JSON format.")
17 |     parser.add_argument(
18 |         "--input_filename", type=str, required=True, help="Specifies the input filename of the Chakra execution trace."
19 |     )
20 |     parser.add_argument(
21 |         "--output_filename", type=str, required=True, help="Specifies the output filename for the JSON data."
22 |     )
23 |     args = parser.parse_args()
24 | 
25 |     execution_trace = open_file_rd(args.input_filename)
26 |     node = ChakraNode()
27 |     with open(args.output_filename, "w") as file:
28 |         global_metadata = GlobalMetadata()
29 |         decode_message(execution_trace, global_metadata)
30 |         file.write(MessageToJson(global_metadata))
31 |         while decode_message(execution_trace, node):
32 |             file.write(MessageToJson(node))
33 |     execution_trace.close()
34 | 
35 | 
36 | if __name__ == "__main__":
37 |     main()
38 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Chakra
 2 | 
 3 | Chakra is an open and interoperable graph-based representation of AI/ML workloads focused on enabling and accelerating AI SW/HW co-design. Chakra execution traces represent key operations, such as compute, memory, and communication, data and control dependencies, timing, and resource constraints.
 4 | 
 5 | This is a repository of Chakra schema and a complementary set of tools and capabilities to enable the collection, analysis, generation, and adoption of Chakra execution traces by a broad range of simulators, emulators, and replay tools.
 6 | 
 7 | Chakra is under active development as a [MLCommons](https://mlcommons.org/en)® research project. Please see [MLCommons Chakra Working Group](https://mlcommons.org/en/groups/research-chakratracebench/) for more details for participating in this effort.
 8 | 
 9 | A detailed description of the original motivation and guiding principles can be found [here](https://arxiv.org/abs/2305.14516). The paper was published prior to Chakra becoming a MLCommons project. Please cite this repository to refer to the latest Chakra schema and tools.
10 | 
11 | ## Installation
12 | 
13 | Check out [`USER_GUIDE`](USER_GUIDE.md) for details.
14 | 
15 | ## License
16 | 
17 | Chakra is released under the MIT license. Please see the [`LICENSE.md`](LICENSE.md) file for more information.
18 | 
19 | ## Contributing
20 | 
21 | We actively welcome your pull requests! Please see [`CONTRIBUTING.md`](CONTRIBUTING.md) for more info.
22 | 


--------------------------------------------------------------------------------
/tests/trace_link/test_chakra_host_trace_loader.py:
--------------------------------------------------------------------------------
 1 | from unittest.mock import MagicMock
 2 | 
 3 | import pytest
 4 | from chakra.src.trace_link.chakra_host_trace_loader import ChakraHostTraceLoader
 5 | from et_replay.execution_trace import Node as PyTorchOperator
 6 | 
 7 | 
 8 | @pytest.fixture
 9 | def mock_trace():
10 |     """Fixture to create a mock trace with a specific structure."""
11 |     # Create a mock trace node structure
12 |     root_node = MagicMock(spec=PyTorchOperator)
13 |     child_node1 = MagicMock(spec=PyTorchOperator)
14 |     child_node2 = MagicMock(spec=PyTorchOperator)
15 | 
16 |     # Setup mock hierarchy
17 |     root_node.children = [child_node1, child_node2]
18 |     root_node.id = 1
19 |     child_node1.children = []
20 |     child_node1.id = 2
21 |     child_node2.children = []
22 |     child_node2.id = 3
23 | 
24 |     mock_trace = MagicMock()
25 |     mock_trace.get_nodes.return_value = [None, root_node]
26 | 
27 |     return mock_trace
28 | 
29 | 
30 | @pytest.fixture
31 | def loader():
32 |     """Fixture to create a ChakraHostTraceLoader instance."""
33 |     return ChakraHostTraceLoader()
34 | 
35 | 
36 | def test_extract_chakra_host_ops(loader, mock_trace):
37 |     """Test the extract_chakra_host_ops method."""
38 |     root_node = mock_trace.get_nodes()[1]
39 | 
40 |     result = loader.extract_chakra_host_ops(root_node)
41 | 
42 |     assert len(result) == 3
43 |     assert result[0].id == 1
44 |     assert result[1].id == 2
45 |     assert result[2].id == 3
46 | 


--------------------------------------------------------------------------------
/tests/jsonizer/test_jsonizer.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | import tempfile
 3 | from unittest.mock import mock_open, patch
 4 | 
 5 | from chakra.schema.protobuf.et_def_pb2 import GlobalMetadata
 6 | from chakra.schema.protobuf.et_def_pb2 import Node as ChakraNode
 7 | from chakra.src.jsonizer.jsonizer import main
 8 | from google.protobuf.json_format import MessageToJson
 9 | 
10 | 
11 | @patch("chakra.src.jsonizer.jsonizer.open_file_rd")
12 | @patch("chakra.src.jsonizer.jsonizer.decode_message")
13 | @patch("builtins.open", new_callable=mock_open)
14 | def test_main(mock_file_open, mock_decode_message, mock_open_file_rd) -> None:
15 |     """
16 |     Tests the main function for converting Chakra execution trace to JSON format.
17 |     """
18 |     with tempfile.NamedTemporaryFile(suffix=".json") as temp_output:
19 |         args = argparse.Namespace(input_filename="input_file", output_filename=temp_output.name)
20 |         mock_node = ChakraNode()
21 |         mock_global_metadata = GlobalMetadata()
22 | 
23 |         mock_decode_message.side_effect = [mock_global_metadata, mock_node, False]
24 | 
25 |         with patch("argparse.ArgumentParser.parse_args", return_value=args):
26 |             main()
27 | 
28 |         mock_open_file_rd.assert_called_with("input_file")
29 |         mock_decode_message.assert_called()
30 |         mock_file_open.assert_called_with(temp_output.name, "w")
31 |         mock_file_open().write.assert_any_call(MessageToJson(mock_global_metadata))
32 |         mock_file_open().write.assert_any_call(MessageToJson(mock_node))
33 | 


--------------------------------------------------------------------------------
/.github/workflows/end_to_end_tests.yml:
--------------------------------------------------------------------------------
 1 | name: End-to-End Tests
 2 | 
 3 | on: pull_request
 4 | 
 5 | jobs:
 6 |   end-to-end-tests:
 7 |     runs-on: ubuntu-latest
 8 | 
 9 |     steps:
10 |     - name: Checkout Code
11 |       uses: actions/checkout@v2
12 | 
13 |     - name: Setup Python Environment
14 |       uses: actions/setup-python@v2
15 |       with:
16 |         python-version: '3.10.14'
17 | 
18 |     - name: Install Chakra
19 |       run: |
20 |         pip install .
21 | 
22 |     - name: Install PARAM
23 |       run: |
24 |         git clone https://github.com/facebookresearch/param.git
25 |         cd param/et_replay
26 |         git checkout 7b19f586dd8b267333114992833a0d7e0d601630
27 |         pip install .
28 | 
29 |     - name: Install HTA
30 |       run: |
31 |         git clone https://github.com/facebookresearch/HolisticTraceAnalysis.git
32 |         cd HolisticTraceAnalysis
33 |         git checkout d731cc2e2249976c97129d409a83bd53d93051f6
34 |         git submodule update --init
35 |         pip install -r requirements.txt
36 |         pip install -e .
37 | 
38 |     - name: Test chakra_trace_link Without Arguments
39 |       run: |
40 |         chakra_trace_link || [ $? -eq 2 ]
41 | 
42 |     - name: Test chakra_converter Without Arguments
43 |       run: |
44 |         chakra_converter || [ $? -eq 2 ]
45 | 
46 |     - name: Test chakra_visualizer Without Arguments
47 |       run: |
48 |         chakra_visualizer || [ $? -eq 2 ]
49 | 
50 |     - name: Test chakra_jsonizer Without Arguments
51 |       run: |
52 |         chakra_jsonizer || [ $? -eq 2 ]
53 | 
54 |     - name: Test chakra_timeline_visualizer Without Arguments
55 |       run: |
56 |         chakra_timeline_visualizer || [ $? -eq 2 ]
57 | 


--------------------------------------------------------------------------------
/src/feeder/et_feeder.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <memory>
 4 | #include <queue>
 5 | #include <unordered_map>
 6 | #include <unordered_set>
 7 | #include <vector>
 8 | 
 9 | #include "et_feeder_node.h"
10 | #include "protoio.hh"
11 | 
12 | namespace Chakra {
13 | struct CompareNodes : public std::binary_function<
14 |                           std::shared_ptr<ETFeederNode>,
15 |                           std::shared_ptr<ETFeederNode>,
16 |                           bool> {
17 |   bool operator()(
18 |       const std::shared_ptr<ETFeederNode> lhs,
19 |       const std::shared_ptr<ETFeederNode> rhs) const {
20 |     return lhs->getChakraNode()->id() > rhs->getChakraNode()->id();
21 |   }
22 | };
23 | 
24 | class ETFeeder {
25 |  public:
26 |   ETFeeder(std::string filename);
27 |   ~ETFeeder();
28 | 
29 |   void addNode(std::shared_ptr<ETFeederNode> node);
30 |   void removeNode(uint64_t node_id);
31 |   bool hasNodesToIssue();
32 |   std::shared_ptr<ETFeederNode> getNextIssuableNode();
33 |   void pushBackIssuableNode(uint64_t node_id);
34 |   std::shared_ptr<ETFeederNode> lookupNode(uint64_t node_id);
35 |   void freeChildrenNodes(uint64_t node_id);
36 |   void readGlobalMetadata();
37 |   std::shared_ptr<ETFeederNode> readNode();
38 |   void readNextWindow();
39 |   void resolveDep();
40 | 
41 |  private:
42 |   ProtoInputStream trace_;
43 |   const uint32_t window_size_;
44 |   bool et_complete_;
45 | 
46 |   std::unordered_map<uint64_t, std::shared_ptr<ETFeederNode>> dep_graph_{};
47 |   std::unordered_set<uint64_t> dep_free_node_id_set_{};
48 |   std::priority_queue<
49 |       std::shared_ptr<ETFeederNode>,
50 |       std::vector<std::shared_ptr<ETFeederNode>>,
51 |       CompareNodes>
52 |       dep_free_node_queue_{};
53 |   std::unordered_set<std::shared_ptr<ETFeederNode>> dep_unresolved_node_set_{};
54 | };
55 | 
56 | } // namespace Chakra


--------------------------------------------------------------------------------
/.github/workflows/feeder_tests.yml:
--------------------------------------------------------------------------------
 1 | name: Feeder tests
 2 | 
 3 | on: [ push, pull_request ]
 4 | 
 5 | jobs:
 6 |   feeder-tests:
 7 |     runs-on: ubuntu-latest
 8 | 
 9 |     steps:
10 |     - name: Checkout Code
11 |       uses: actions/checkout@v4
12 |     - name: Install protoc
13 |       run: |
14 |         sudo apt update
15 |         sudo apt install protobuf-compiler libprotobuf-dev
16 |     - name: Install Google Test Framework
17 |       run: |
18 |         sudo apt update
19 |         sudo apt install libgtest-dev
20 |     - name: Extract trace for feeder tests
21 |       run: tar -xvf tests/data/feeder_tests_trace.tar.gz
22 |     - name: Build
23 |       run: |
24 |         SCRIPT_DIR=.
25 |         BUILD_DIR="${SCRIPT_DIR:?}"/build
26 |         CHAKRA_ET_DIR="${SCRIPT_DIR:?}"/schema/protobuf
27 |         protoc et_def.proto \
28 |           --proto_path="${CHAKRA_ET_DIR:?}" \
29 |           --cpp_out="${CHAKRA_ET_DIR:?}"
30 |         g++ -Wall -I src/third_party/utils -I schema/protobuf -I src/feeder -c schema/protobuf/et_def.pb.cc -o schema/protobuf/et_def.pb.o
31 |         g++ -Wall -I src/third_party/utils -I schema/protobuf -I src/feeder -c src/feeder/et_feeder.cpp -o src/feeder/et_feeder.o
32 |         g++ -Wall -I src/third_party/utils -I schema/protobuf -I src/feeder -c src/feeder/et_feeder_node.cpp -o src/feeder/et_feeder_node.o
33 |         g++ -Wall -I src/third_party/utils -I schema/protobuf -I src/feeder -c src/third_party/utils/protoio.cc -o src/third_party/utils/protoio.o
34 |         g++ -Wall -I src/third_party/utils -I schema/protobuf -I src/feeder -c tests/feeder/tests.cpp -o tests/feeder/tests.o
35 |         g++ -Wall -I src/third_party/utils -I schema/protobuf -I src/feeder -o feeder_tests schema/protobuf/et_def.pb.o src/feeder/et_feeder.o src/feeder/et_feeder_node.o src/third_party/utils/protoio.o tests/feeder/tests.o -lgtest -lgtest_main -lprotobuf -lpthread
36 |     - name: Run tests
37 |       run: ./feeder_tests


--------------------------------------------------------------------------------
/tests/trace_link/test_chakra_device_trace_loader.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | from chakra.src.trace_link.chakra_device_trace_loader import ChakraDeviceTraceLoader
 3 | from chakra.src.trace_link.kineto_operator import KinetoOperator
 4 | 
 5 | 
 6 | @pytest.fixture
 7 | def trace_loader():
 8 |     return ChakraDeviceTraceLoader()
 9 | 
10 | 
11 | @pytest.mark.parametrize(
12 |     "kineto_ops, expected_exclusive_durs",
13 |     [
14 |         (
15 |             [
16 |                 {"ts": 100, "dur": 10, "inclusive_dur": 10},
17 |                 {"ts": 105, "dur": 3, "inclusive_dur": 3},
18 |                 {"ts": 108, "dur": 1, "inclusive_dur": 1},
19 |             ],
20 |             [6, 3, 1],  # Expected exclusive durations
21 |         ),
22 |         (
23 |             [
24 |                 {"ts": 100, "dur": 20, "inclusive_dur": 20},
25 |                 {"ts": 105, "dur": 5, "inclusive_dur": 5},
26 |                 {"ts": 110, "dur": 5, "inclusive_dur": 5},
27 |             ],
28 |             [10, 5, 5],  # Expected exclusive durations
29 |         ),
30 |     ],
31 | )
32 | def test_calculate_exclusive_dur(trace_loader, kineto_ops, expected_exclusive_durs):
33 |     kineto_tid_cpu_ops_map = {1: [KinetoOperator(op) for op in kineto_ops]}
34 |     trace_loader.calculate_exclusive_dur(kineto_tid_cpu_ops_map)
35 | 
36 |     for i, op in enumerate(kineto_tid_cpu_ops_map[1]):
37 |         assert op.exclusive_dur == expected_exclusive_durs[i]
38 | 
39 | 
40 | @pytest.mark.parametrize(
41 |     "intervals, expected_result",
42 |     [
43 |         ([(1, 3), (2, 6), (8, 10), (15, 18)], [(1, 6), (8, 10), (15, 18)]),
44 |         ([(1, 4), (4, 5)], [(1, 5)]),
45 |         ([], []),
46 |         ([(1, 2), (2, 3), (3, 4)], [(1, 4)]),
47 |         ([(1, 5), (2, 6), (6, 8), (7, 9)], [(1, 9)]),
48 |     ],
49 | )
50 | def test_merge_overlapping_intervals(intervals, expected_result):
51 |     result = ChakraDeviceTraceLoader.merge_overlapping_intervals(intervals)
52 |     assert result == expected_result
53 | 


--------------------------------------------------------------------------------
/.github/workflows/cla.yml:
--------------------------------------------------------------------------------
 1 | name: "cla-bot"
 2 | on:
 3 |   issue_comment:
 4 |     types: [created]
 5 |   pull_request_target:
 6 |     types: [opened,closed,synchronize]
 7 | 
 8 | jobs:
 9 |   cla-check:
10 |     runs-on: ubuntu-latest
11 |     steps:
12 |       - name: "MLCommons CLA bot check"
13 |         if: (github.event.comment.body == 'recheck') || github.event_name == 'pull_request_target'
14 |         # Alpha Release
15 |         uses: mlcommons/cla-bot@master
16 |         env:
17 |           GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
18 |           # the below token should have repo scope and must be manually added by you in the repository's secret
19 |           PERSONAL_ACCESS_TOKEN : ${{ secrets.MLCOMMONS_BOT_CLA_TOKEN }}
20 |         with:
21 |           path-to-signatures: 'cla-bot/v1/cla.json'
22 |           # branch should not be protected
23 |           branch: 'main'
24 |           allowlist: user1,bot*
25 |           remote-organization-name: mlcommons
26 |           remote-repository-name: systems
27 | 
28 |          #below are the optional inputs - If the optional inputs are not given, then default values will be taken
29 |           #remote-organization-name: enter the remote organization name where the signatures should be stored (Default is storing the signatures in the same repository)
30 |           #remote-repository-name:  enter the  remote repository name where the signatures should be stored (Default is storing the signatures in the same repository)
31 |           #create-file-commit-message: 'For example: Creating file for storing CLA Signatures'
32 |           #signed-commit-message: 'For example: $contributorName has signed the CLA in #$pullRequestNo'
33 |           #custom-notsigned-prcomment: 'pull request comment with Introductory message to ask new contributors to sign'
34 |           #custom-pr-sign-comment: 'The signature to be committed in order to sign the CLA'
35 |           #custom-allsigned-prcomment: 'pull request comment when all contributors has signed, defaults to **CLA Assistant Lite bot** All Contributors have signed the CLA.'
36 | 


--------------------------------------------------------------------------------
/tests/trace_link/test_unique_id_assigner.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | from src.trace_link.unique_id_assigner import UniqueIdAssigner
 4 | 
 5 | 
 6 | @pytest.fixture
 7 | def assigner():
 8 |     """Fixture to create a new UniqueIdAssigner instance for each test."""
 9 |     return UniqueIdAssigner()
10 | 
11 | 
12 | def test_assign_or_retrieve_id_new(assigner):
13 |     """
14 |     Test that a new unique ID is correctly assigned to a new original ID.
15 |     """
16 |     first_id = assigner.assign_or_retrieve_id(10)
17 |     assert first_id == 0  # Expect the first assigned ID to be 0
18 | 
19 | 
20 | def test_assign_or_retrieve_id_existing(assigner):
21 |     """
22 |     Test that the same original ID retrieves the same unique ID upon subsequent calls.
23 |     """
24 |     first_id = assigner.assign_or_retrieve_id(10)
25 |     second_id = assigner.assign_or_retrieve_id(10)
26 |     assert second_id == first_id  # Ensure it retrieves the same ID
27 | 
28 | 
29 | def test_assign_or_retrieve_id_distinct(assigner):
30 |     """
31 |     Test that different original IDs receive different unique IDs.
32 |     """
33 |     first_id = assigner.assign_or_retrieve_id(10)
34 |     second_id = assigner.assign_or_retrieve_id(20)
35 |     assert second_id != first_id
36 |     assert second_id == 1  # This should be the next unique ID
37 | 
38 | 
39 | def test_generate_new_id_sequence(assigner):
40 |     """
41 |     Test that generate_new_id consistently returns incrementing IDs.
42 |     """
43 |     ids = [assigner.generate_new_id() for _ in range(5)]
44 |     expected_ids = list(range(5))
45 |     assert ids == expected_ids
46 | 
47 | 
48 | def test_lookup_new_id_assigned(assigner):
49 |     """
50 |     Test lookup of new IDs, ensuring assigned IDs return the correct new ID.
51 |     """
52 |     original_id = 30
53 |     new_id = assigner.assign_or_retrieve_id(original_id)
54 |     assert assigner.lookup_new_id(original_id) == new_id
55 | 
56 | 
57 | def test_lookup_new_id_unassigned(assigner):
58 |     """
59 |     Test lookup for an unassigned ID returns the original ID.
60 |     """
61 |     unassigned_id = 40
62 |     assert assigner.lookup_new_id(unassigned_id) == unassigned_id
63 | 


--------------------------------------------------------------------------------
/tests/converter/test_pytorch_tensor.py:
--------------------------------------------------------------------------------
 1 | from src.converter.pytorch_tensor import PyTorchTensor, list_to_pytorch_tensor
 2 | 
 3 | 
 4 | def test_pytorch_tensor_initialization():
 5 |     """Test initialization of PyTorchTensor object."""
 6 |     tensor_data = [1, 2, 3, 4, 5, 6]
 7 |     tensor = PyTorchTensor(tensor_data)
 8 |     assert tensor.tensor_data == tensor_data
 9 | 
10 | 
11 | def test_pytorch_tensor_is_valid():
12 |     """Test the is_valid method of PyTorchTensor."""
13 |     valid_data = [1, 2, 3, 4, 5, 6]
14 |     invalid_data_1 = [1, 2, 3, 4, 5]  # Less than 6 elements
15 |     invalid_data_2 = [1, 2, 3, 4, 5, 6, 7]  # More than 6 elements
16 |     invalid_data_3 = [1, 2, 3, 4, 5, "a"]  # Non-integer element
17 | 
18 |     valid_tensor = PyTorchTensor(valid_data)
19 |     invalid_tensor_1 = PyTorchTensor(invalid_data_1)
20 |     invalid_tensor_2 = PyTorchTensor(invalid_data_2)
21 |     invalid_tensor_3 = PyTorchTensor(invalid_data_3)
22 | 
23 |     assert valid_tensor.is_valid() is True
24 |     assert invalid_tensor_1.is_valid() is False
25 |     assert invalid_tensor_2.is_valid() is False
26 |     assert invalid_tensor_3.is_valid() is False
27 | 
28 | 
29 | def test_pytorch_tensor_properties():
30 |     """Test property methods of PyTorchTensor."""
31 |     tensor_data = [1, 2, 3, 4, 5, 6]
32 |     tensor = PyTorchTensor(tensor_data)
33 | 
34 |     assert tensor.tensor_id == 1
35 |     assert tensor.storage_id == 2
36 |     assert tensor.offset == 3
37 |     assert tensor.num_elem == 4
38 |     assert tensor.elem_bytes == 5
39 | 
40 | 
41 | def test_pytorch_tensor_has_valid_storage_id():
42 |     """Test has_valid_storage_id method of PyTorchTensor."""
43 |     valid_storage_id_data = [1, 2, 3, 4, 5, 6]
44 |     invalid_storage_id_data = [1, 0, 3, 4, 5, 6]  # storage_id = 0
45 | 
46 |     valid_tensor = PyTorchTensor(valid_storage_id_data)
47 |     invalid_tensor = PyTorchTensor(invalid_storage_id_data)
48 | 
49 |     assert valid_tensor.has_valid_storage_id() is True
50 |     assert invalid_tensor.has_valid_storage_id() is False
51 | 
52 | 
53 | def test_list_to_pytorch_tensor():
54 |     """Test list_to_pytorch_tensor function."""
55 |     tensor_data = [1, 2, 3, 4, 5, 6]
56 |     tensor = list_to_pytorch_tensor(tensor_data)
57 | 
58 |     assert isinstance(tensor, PyTorchTensor)
59 |     assert tensor.tensor_data == tensor_data
60 | 


--------------------------------------------------------------------------------
/src/trace_link/chakra_host_trace_loader.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | import sys
 3 | from typing import List
 4 | 
 5 | from et_replay.execution_trace import Node as PyTorchOperator
 6 | from et_replay.utils import load_execution_trace_file
 7 | 
 8 | # Increase the recursion limit for deep Chakra host execution traces.
 9 | sys.setrecursionlimit(10**6)
10 | 
11 | 
12 | class ChakraHostTraceLoader:
13 |     """Loads Chakra host traces."""
14 | 
15 |     def load(self, chakra_host_trace_file: str) -> List[PyTorchOperator]:
16 |         """
17 |         Load and process the Chakra Host Execution Trace.
18 | 
19 |         Args:
20 |             chakra_host_trace_file (str): Path to the PyTorch execution trace file.
21 | 
22 |         Returns:
23 |             List[PyTorchOperator]: List of PyTorch operators.
24 |         """
25 |         logging.debug(f"Starting to load Chakra host execution trace from file: {chakra_host_trace_file}.")
26 |         chakra_host_trace = load_execution_trace_file(chakra_host_trace_file)
27 | 
28 |         root_node = chakra_host_trace.get_nodes()[1]  # Root node is usually 1-based
29 |         chakra_host_ops = self.extract_chakra_host_ops(root_node)
30 |         logging.debug(f"Extracted {len(chakra_host_ops)} operators from Chakra host execution trace.")
31 |         logging.debug("Chakra host execution trace has been loaded and processed successfully.")
32 | 
33 |         return chakra_host_ops
34 | 
35 |     def extract_chakra_host_ops(self, node: PyTorchOperator) -> List[PyTorchOperator]:
36 |         """
37 |         Extract and sort nodes from the PyTorch execution trace recursively.
38 | 
39 |         This method traverses the execution trace starting from the provided node, extracting all the operator nodes
40 |         recursively, and then returns them sorted by their identifiers.
41 | 
42 |         Args:
43 |             node (PyTorchOperator): Starting node for extraction.
44 | 
45 |         Returns:
46 |             List[PyTorchOperator]: Sorted list of extracted PyTorchOperator nodes.
47 |         """
48 |         nodes = []
49 | 
50 |         def traverse(node: PyTorchOperator):
51 |             nodes.append(node)
52 |             for child in node.children:
53 |                 traverse(child)
54 | 
55 |         traverse(node)
56 |         logging.debug(f"Traversed {len(nodes)} nodes from root node ID: {node.id}")
57 |         return sorted(nodes, key=lambda x: x.id)
58 | 


--------------------------------------------------------------------------------
/src/trace_link/trace_link.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | import logging
 3 | 
 4 | from .trace_linker import TraceLinker
 5 | 
 6 | 
 7 | def main() -> None:
 8 |     parser = argparse.ArgumentParser(
 9 |         description=(
10 |             "This tool links Chakra host execution traces with Chakra device traces. Chakra host execution "
11 |             "traces include host-side (CPU) operators only, missing GPU operators. While these traces show "
12 |             "dependencies between operators, they lack operator duration. Chakra device traces include "
13 |             "device-side (GPU) operators in an unstructured timeline without explicit dependencies. This tool "
14 |             "adds duration information to CPU operators in Chakra host traces and encodes GPU operators into the "
15 |             "final Chakra host + device trace in JSON format. The trace linker also identifies key dependencies, "
16 |             "such as inter-thread and synchronization dependencies. For more information, see the guide at https://"
17 |             "github.com/mlcommons/chakra/wiki/Chakra-Execution-Trace-Collection-%E2%80%90-A-Comprehensive-Guide-on-"
18 |             "Merging-PyTorch-and-Kineto-Traces"
19 |         )
20 |     )
21 |     parser.add_argument("--rank", type=int, required=True, help="Rank for the input traces")
22 |     parser.add_argument(
23 |         "--chakra-host-trace",
24 |         type=str,
25 |         required=True,
26 |         help="Path to the Chakra host execution trace (formerly called PyTorch execution traces)",
27 |     )
28 |     parser.add_argument(
29 |         "--chakra-device-trace",
30 |         type=str,
31 |         required=True,
32 |         help="Path to the Chakra device execution trace (also known as Kineto traces)",
33 |     )
34 |     parser.add_argument(
35 |         "--output-file",
36 |         type=str,
37 |         required=True,
38 |         help="Path for the output Chakra host + device trace in the JSON format",
39 |     )
40 |     parser.add_argument("--log-level", default="INFO", type=str, help="Log output verbosity level")
41 | 
42 |     args = parser.parse_args()
43 | 
44 |     logging.basicConfig(level=args.log_level.upper())
45 | 
46 |     linker = TraceLinker()
47 |     linker.link(args.rank, args.chakra_host_trace, args.chakra_device_trace, args.output_file)
48 | 
49 |     logging.info(f"Linking process successful. Output file is available at {args.output_file}.")
50 |     logging.info("Please run the chakra_converter for further postprocessing.")
51 | 
52 | 
53 | if __name__ == "__main__":
54 |     main()
55 | 


--------------------------------------------------------------------------------
/tests/visualizer/test_visualizer.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | import tempfile
 3 | from unittest.mock import patch
 4 | 
 5 | from chakra.src.visualizer.visualizer import escape_label, main
 6 | 
 7 | 
 8 | def test_escape_label() -> None:
 9 |     """
10 |     Tests the escape_label function.
11 |     """
12 |     assert escape_label("a{b}c") == "a\\{b\\}c"
13 |     assert escape_label("a(b)c") == "a\\(b\\)c"
14 |     assert escape_label("a<b>c") == "a\\<b\\>c"
15 |     assert escape_label("a[b]c") == "a\\[b\\]c"
16 |     assert escape_label("a|b&c-d") == "a\\|b\\&c\\-d"
17 | 
18 | 
19 | @patch("chakra.src.visualizer.visualizer.open_file_rd")
20 | @patch("chakra.src.visualizer.visualizer.decode_message")
21 | @patch("chakra.src.visualizer.visualizer.graphviz.Digraph")
22 | def test_main_pdf(mock_graphviz_digraph, mock_decode_message, mock_open_file_rd) -> None:
23 |     """
24 |     Tests the main function for PDF output.
25 |     """
26 |     with tempfile.NamedTemporaryFile(suffix=".pdf") as temp_output:
27 |         args = argparse.Namespace(input_filename="input_file", output_filename=temp_output.name)
28 |         mock_node = mock_open_file_rd.return_value
29 |         mock_global_metadata = mock_open_file_rd.return_value
30 | 
31 |         mock_decode_message.side_effect = [mock_global_metadata, mock_node, False]
32 | 
33 |         with patch("argparse.ArgumentParser.parse_args", return_value=args):
34 |             main()
35 | 
36 |         mock_open_file_rd.assert_called_with("input_file")
37 |         mock_decode_message.assert_called()
38 |         mock_graphviz_digraph.return_value.render.assert_called()
39 | 
40 | 
41 | @patch("chakra.src.visualizer.visualizer.open_file_rd")
42 | @patch("chakra.src.visualizer.visualizer.decode_message")
43 | @patch("chakra.src.visualizer.visualizer.nx.write_graphml")
44 | def test_main_graphml(mock_write_graphml, mock_decode_message, mock_open_file_rd) -> None:
45 |     """
46 |     Tests the main function for GraphML output.
47 |     """
48 |     with tempfile.NamedTemporaryFile(suffix=".graphml") as temp_output:
49 |         args = argparse.Namespace(input_filename="input_file", output_filename=temp_output.name)
50 |         mock_node = mock_open_file_rd.return_value
51 |         mock_global_metadata = mock_open_file_rd.return_value
52 | 
53 |         mock_decode_message.side_effect = [mock_global_metadata, mock_node, False]
54 | 
55 |         with patch("argparse.ArgumentParser.parse_args", return_value=args):
56 |             main()
57 | 
58 |         mock_open_file_rd.assert_called_with("input_file")
59 |         mock_decode_message.assert_called()
60 |         mock_write_graphml.assert_called()
61 | 


--------------------------------------------------------------------------------
/src/trace_link/unique_id_assigner.py:
--------------------------------------------------------------------------------
 1 | from typing import Dict
 2 | 
 3 | 
 4 | class UniqueIdAssigner:
 5 |     """
 6 |     Assigns unique IDs to items, ensuring each item gets a distinct ID.
 7 | 
 8 |     This class is used to maintain a consistent and unique mapping of original identifiers to new unique identifiers.
 9 |     It's particularly useful in scenarios where the uniqueness of IDs across different entities or iterations needs to
10 |     be preserved.
11 | 
12 |     Attributes
13 |         next_id (int): The next unique ID to be assigned.
14 |         original_to_new_ids (Dict[int, int]): A mapping from original IDs to their corresponding new unique IDs. This
15 |             helps in retrieving already assigned unique IDs and ensures the same original ID always maps to the same
16 |             unique ID.
17 |     """
18 | 
19 |     def __init__(self) -> None:
20 |         """Initialize the UniqueIdAssigner with a starting ID of 0."""
21 |         self.next_id: int = 0
22 |         self.original_to_new_ids: Dict[int, int] = {}
23 | 
24 |     def assign_or_retrieve_id(self, original_id: int) -> int:
25 |         """
26 |         Assign a new unique ID to the given original ID if it doesn't have one already.
27 | 
28 |         Args:
29 |             original_id (int): The original ID for which a unique ID is needed.
30 | 
31 |         Returns:
32 |             int: A unique ID corresponding to the original ID.
33 |         """
34 |         if original_id not in self.original_to_new_ids:
35 |             self.original_to_new_ids[original_id] = self.next_id
36 |             self.next_id += 1
37 | 
38 |         return self.original_to_new_ids[original_id]
39 | 
40 |     def generate_new_id(self) -> int:
41 |         """
42 |         Generate a new unique ID without needing an original ID.
43 | 
44 |         This is useful for cases where new entities are created that do not have an existing identifier.
45 | 
46 |         Returns
47 |             int: A new unique ID.
48 |         """
49 |         unique_id = self.next_id
50 |         self.next_id += 1
51 |         return unique_id
52 | 
53 |     def lookup_new_id(self, original_id: int) -> int:
54 |         """
55 |         Retrieve the new unique ID for a given original ID, if it has been assigned.
56 | 
57 |         This method is useful for checking if a unique ID has already been assigned to an original ID and retrieving it.
58 | 
59 |         Args:
60 |             original_id (int): The original ID to look up.
61 | 
62 |         Returns:
63 |             int: The new unique ID if it has been assigned, otherwise returns the original ID.
64 |         """
65 |         return self.original_to_new_ids.get(original_id, original_id)
66 | 


--------------------------------------------------------------------------------
/src/feeder/wrapper_node.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include "et_feeder.h"
 4 | #include "et_feeder_node.h"
 5 | #include "json_node.h"
 6 | 
 7 | using json = nlohmann::json;
 8 | 
 9 | enum format { Protobuf, JSON };
10 | 
11 | // WrapperNode class wraps protobuf and JSON
12 | class WrapperNode {
13 |  private:
14 |   enum format format_type_;
15 |   Chakra::ETFeeder* et_feeder_;
16 |   std::shared_ptr<Chakra::ETFeederNode> node_{nullptr};
17 |   std::ifstream jsonfile_;
18 |   json data_;
19 |   JSONNode json_node_;
20 |   int64_t node_idx_ = -1;
21 |   std::queue<std::shared_ptr<Chakra::ETFeederNode>> push_back_queue_proto;
22 |   std::queue<JSONNode> push_back_queue_json;
23 |   std::unordered_map<uint64_t, JSONNode> dep_graph_json{};
24 |   std::unordered_set<uint64_t> dep_free_node_id_set_json{};
25 |   std::priority_queue<
26 |       JSONNode, // type of stored elements
27 |       std::vector<JSONNode>, // underlying container to store elements
28 |       CompareJSONNodesGT> // compare type providing a strick weak ordering
29 |       dep_free_node_queue_json{};
30 |   std::unordered_set<JSONNode, std::hash<JSONNode>>
31 |       dep_unresolved_node_set_json{};
32 |   int window_size_json;
33 |   bool json_et_complete_;
34 | 
35 |  public:
36 |   WrapperNode();
37 |   WrapperNode(const WrapperNode& t);
38 |   WrapperNode(std::string filename);
39 |   ~WrapperNode();
40 |   void releaseMemory();
41 |   void createWrapper(std::string filename);
42 |   std::shared_ptr<Chakra::ETFeederNode> getProtobufNode();
43 |   JSONNode getJSONNode();
44 |   void addNode(JSONNode node);
45 |   void addNode(std::shared_ptr<Chakra::ETFeederNode> node);
46 |   void removeNode(uint64_t node_id);
47 |   void readNextWindow();
48 |   JSONNode readNode(uint64_t node_id);
49 |   void resolveDep();
50 |   void pushBackIssuableNode(uint64_t node_id);
51 |   void freeChildrenNodes(uint64_t node_id);
52 |   bool isValidNode();
53 |   void push_to_queue();
54 |   bool is_queue_empty();
55 |   void queue_front();
56 |   void pop_from_queue();
57 |   void getNextIssuableNode();
58 |   uint64_t getNodeID();
59 |   std::string getNodeName();
60 |   int getNodeType();
61 |   bool isCPUOp();
62 |   uint64_t getRuntime();
63 |   uint64_t getNumOps();
64 |   uint64_t getTensorSize();
65 |   int64_t getCommType();
66 |   uint32_t getCommPriority();
67 |   uint64_t getCommSize();
68 |   uint32_t getCommSrc();
69 |   uint32_t getCommDst();
70 |   uint32_t getCommTag();
71 |   bool hasNodesToIssue();
72 |   void lookupNode(uint64_t node_id);
73 |   void getChildren(
74 |       std::vector<std::shared_ptr<Chakra::ETFeederNode>>& childrenNodes);
75 |   void getChildren(std::vector<JSONNode>& childrenNodes);
76 |   int64_t findNodeIndexJSON(uint64_t node_id);
77 | };


--------------------------------------------------------------------------------
/src/feeder/et_feeder_node.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <memory>
 4 | #include <unordered_map>
 5 | #include <unordered_set>
 6 | #include <vector>
 7 | 
 8 | #include "et_def.pb.h"
 9 | 
10 | namespace Chakra {
11 | 
12 | class ETFeederNode {
13 |  public:
14 |   ETFeederNode(std::shared_ptr<ChakraProtoMsg::Node> node);
15 |   std::shared_ptr<ChakraProtoMsg::Node> getChakraNode();
16 |   void addChild(std::shared_ptr<ETFeederNode> node);
17 |   std::vector<std::shared_ptr<ETFeederNode>> getChildren();
18 |   void addDepUnresolvedParentID(uint64_t node_id);
19 |   std::vector<uint64_t> getDepUnresolvedParentIDs();
20 |   void setDepUnresolvedParentIDs(
21 |       std::vector<uint64_t> const& dep_unresolved_parent_ids);
22 | 
23 |   const ChakraProtoMsg::AttributeProto& get_other_attr(
24 |       const std::string& attr_name) const;
25 |   bool has_other_attr(const std::string& attr_name) const;
26 | 
27 |   uint64_t id();
28 |   std::string name();
29 |   bool is_cpu_op();
30 |   ChakraProtoMsg::NodeType type();
31 |   uint64_t runtime();
32 |   uint64_t num_ops();
33 |   uint32_t tensor_loc();
34 |   uint64_t tensor_size();
35 |   ChakraProtoMsg::CollectiveCommType comm_type();
36 |   uint32_t comm_priority();
37 |   uint64_t comm_size();
38 |   uint32_t comm_src();
39 |   uint32_t comm_dst();
40 |   uint32_t comm_tag();
41 |   std::string pg_name();
42 |   std::string get_inputs_values() const;
43 |   std::string get_inputs_shapes() const;
44 |   std::string get_inputs_types() const;
45 |   std::string get_outputs_values() const;
46 |   std::string get_outputs_shapes() const;
47 |   std::string get_outputs_types() const;
48 | 
49 |  private:
50 |   void assign_attr_val(
51 |       std::shared_ptr<ChakraProtoMsg::Node> node,
52 |       int i,
53 |       void* member);
54 | 
55 |   std::shared_ptr<ChakraProtoMsg::Node> node_{nullptr};
56 |   std::unordered_set<std::shared_ptr<ETFeederNode>> children_set_{};
57 |   std::vector<std::shared_ptr<ETFeederNode>> children_vec_{};
58 |   std::vector<uint64_t> dep_unresolved_parent_ids_{};
59 |   std::unordered_map<std::string, const ChakraProtoMsg::AttributeProto&>
60 |       other_attrs_{};
61 | 
62 |   uint64_t id_;
63 |   std::string name_;
64 |   bool is_cpu_op_;
65 |   uint64_t runtime_;
66 |   uint64_t num_ops_;
67 |   uint32_t tensor_loc_;
68 |   uint64_t tensor_size_;
69 |   ChakraProtoMsg::CollectiveCommType comm_type_;
70 |   uint32_t comm_priority_;
71 |   uint64_t comm_size_;
72 |   uint32_t comm_src_;
73 |   uint32_t comm_dst_;
74 |   uint32_t comm_tag_;
75 |   std::string pg_name_;
76 |   std::string inputs_values_;
77 |   std::string inputs_shapes_;
78 |   std::string inputs_types_;
79 |   std::string outputs_values_;
80 |   std::string outputs_shapes_;
81 |   std::string outputs_types_;
82 | };
83 | 
84 | } // namespace Chakra
85 | 


--------------------------------------------------------------------------------
/.clang-format:
--------------------------------------------------------------------------------
 1 | ---
 2 | AccessModifierOffset: -1
 3 | AlignAfterOpenBracket: AlwaysBreak
 4 | AlignConsecutiveAssignments: false
 5 | AlignConsecutiveDeclarations: false
 6 | AlignEscapedNewlinesLeft: true
 7 | AlignOperands: false
 8 | AlignTrailingComments: false
 9 | AllowAllParametersOfDeclarationOnNextLine: false
10 | AllowShortBlocksOnASingleLine: false
11 | AllowShortCaseLabelsOnASingleLine: false
12 | AllowShortFunctionsOnASingleLine: Empty
13 | AllowShortIfStatementsOnASingleLine: false
14 | AllowShortLoopsOnASingleLine: false
15 | AlwaysBreakAfterReturnType: None
16 | AlwaysBreakBeforeMultilineStrings: true
17 | AlwaysBreakTemplateDeclarations: true
18 | BinPackArguments: false
19 | BinPackParameters: false
20 | BraceWrapping:
21 |   AfterClass: false
22 |   AfterControlStatement: false
23 |   AfterEnum: false
24 |   AfterFunction: false
25 |   AfterNamespace: false
26 |   AfterObjCDeclaration: false
27 |   AfterStruct: false
28 |   AfterUnion: false
29 |   BeforeCatch: false
30 |   BeforeElse: false
31 |   IndentBraces: false
32 | BreakBeforeBinaryOperators: None
33 | BreakBeforeBraces: Attach
34 | BreakBeforeTernaryOperators: true
35 | BreakConstructorInitializersBeforeComma: false
36 | BreakAfterJavaFieldAnnotations: false
37 | BreakStringLiterals: false
38 | ColumnLimit: 80
39 | CommentPragmas: '^ IWYU pragma:'
40 | CompactNamespaces: false
41 | ConstructorInitializerAllOnOneLineOrOnePerLine: true
42 | ConstructorInitializerIndentWidth: 4
43 | ContinuationIndentWidth: 4
44 | Cpp11BracedListStyle: true
45 | DerivePointerAlignment: false
46 | DisableFormat: false
47 | ForEachMacros: [ FOR_EACH_RANGE, FOR_EACH, ]
48 | IncludeCategories:
49 |   - Regex: '^<.*\.h(pp)?>'
50 |     Priority: 1
51 |   - Regex: '^<.*'
52 |     Priority: 2
53 |   - Regex: '.*'
54 |     Priority: 3
55 | IndentCaseLabels: true
56 | IndentWidth: 2
57 | IndentWrappedFunctionNames: false
58 | KeepEmptyLinesAtTheStartOfBlocks: false
59 | MacroBlockBegin: ''
60 | MacroBlockEnd: ''
61 | MaxEmptyLinesToKeep: 1
62 | NamespaceIndentation: None
63 | ObjCBlockIndentWidth: 2
64 | ObjCSpaceAfterProperty: false
65 | ObjCSpaceBeforeProtocolList: false
66 | PenaltyBreakBeforeFirstCallParameter: 1
67 | PenaltyBreakComment: 300
68 | PenaltyBreakFirstLessLess: 120
69 | PenaltyBreakString: 1000
70 | PenaltyExcessCharacter: 1000000
71 | PenaltyReturnTypeOnItsOwnLine: 2000000
72 | PointerAlignment: Left
73 | ReflowComments: true
74 | SortIncludes: true
75 | SpaceAfterCStyleCast: false
76 | SpaceBeforeAssignmentOperators: true
77 | SpaceBeforeParens: ControlStatements
78 | SpaceInEmptyParentheses: false
79 | SpacesBeforeTrailingComments: 1
80 | SpacesInAngles: false
81 | SpacesInContainerLiterals: true
82 | SpacesInCStyleCastParentheses: false
83 | SpacesInParentheses: false
84 | SpacesInSquareBrackets: false
85 | Standard: Cpp11
86 | TabWidth: 8
87 | UseTab: Never
88 | ...
89 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [build-system]
 2 | requires = [ "setuptools>=61",
 3 |   "wheel",
 4 |   "setuptools-grpc",
 5 |   "grpcio-tools"]
 6 | build-backend = "setuptools.build_meta"
 7 | 
 8 | [project]
 9 | name = "chakra"
10 | requires-python = ">=3.7"
11 | version = "0.0.4"
12 | readme = "README.md"
13 | license = {file = "LICENSE.md"}
14 | authors = [
15 |     {name = "MLCommons", email = "chakra@mlcommons.org"},
16 | ]
17 | dependencies = [
18 |     "protobuf",
19 |     "graphviz",
20 |     "networkx",
21 |     "pydot",
22 |     "HolisticTraceAnalysis @ git+https://github.com/facebookresearch/HolisticTraceAnalysis.git@d731cc2e2249976c97129d409a83bd53d93051f6"
23 | ]
24 | 
25 | [project.urls]
26 | Homepage = "https://github.com/mlcommons/chakra"
27 | Documentation = "https://github.com/mlcommons/chakra/README.md"
28 | Repository = "https://github.com/mlcommons/chakra.git"
29 | 
30 | [tool.setuptools.package-dir]
31 | "chakra.schema.protobuf" = "schema/protobuf"
32 | "chakra.src.converter" = "src/converter"
33 | "chakra.src.generator" = "src/generator"
34 | "chakra.src.jsonizer" = "src/jsonizer"
35 | "chakra.src.third_party" = "src/third_party"
36 | "chakra.src.timeline_visualizer" = "src/timeline_visualizer"
37 | "chakra.src.trace_link" = "src/trace_link"
38 | "chakra.src.visualizer" = "src/visualizer"
39 | 
40 | [tool.setuptools.package-data]
41 | "chakra.schema.protobuf" = ["et_def.proto"]
42 | 
43 | [project.scripts]
44 | chakra_converter = "chakra.src.converter.converter:main"
45 | chakra_generator = "chakra.src.generator.generator:main"
46 | chakra_jsonizer = "chakra.src.jsonizer.jsonizer:main"
47 | chakra_timeline_visualizer = "chakra.src.timeline_visualizer.timeline_visualizer:main"
48 | chakra_trace_link = "chakra.src.trace_link.trace_link:main"
49 | chakra_visualizer = "chakra.src.visualizer.visualizer:main"
50 | 
51 | [tool.ruff]
52 | target-version = "py39"
53 | line-length = 120
54 | exclude = [
55 |     "src/converter/text_converter.py",
56 |     "src/third_party/utils/protolib.py",
57 | ]
58 | 
59 | [tool.ruff.lint]
60 | select = ["I", "B", "E", "D", "F", "SIM", "W", "C90", "EXE"]
61 | ignore = [
62 |     "D407", # Missing dashed underline after section
63 |     "D203", # conflicts with D211
64 |     "D212", # conflicts with D213
65 |     "D413", # Missing blank line after last section
66 | 
67 |     # TODO: Remove these once we have docstrings
68 |     "D100", # Missing docstring in public module
69 |     "D102", # Missing docstring in public method
70 |     "D103", # Missing docstring in public function
71 |     "D104", # Missing docstring in public package
72 |     "D107", # Missing docstring in `__init__`
73 | ]
74 | 
75 | [tool.ruff.lint.per-file-ignores]
76 | "**/tests/*" = ["D"]
77 | 
78 | [tool.ruff.format]
79 | indent-style = "space"
80 | 
81 | [tool.pyright]
82 | typeCheckingMode = "basic"
83 | exclude = [
84 |     "**/__pycache__",
85 |     "**/build/",
86 |     "setup.py",
87 |     "src/third_party/utils/protolib.py"
88 | ]
89 | reportMissingImports = false
90 | reportAttributeAccessIssue = false
91 | 
92 | [tool.vulture]
93 | ignore_names = ["mock_process_thread"]
94 | min_confidence = 100
95 | 


--------------------------------------------------------------------------------
/src/visualizer/visualizer.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | import re
 3 | 
 4 | import graphviz
 5 | import networkx as nx
 6 | 
 7 | from ...schema.protobuf.et_def_pb2 import GlobalMetadata, Node
 8 | from ..third_party.utils.protolib import decodeMessage as decode_message
 9 | from ..third_party.utils.protolib import openFileRd as open_file_rd
10 | 
11 | 
12 | def escape_label(label: str) -> str:
13 |     """
14 |     Escapes special characters in labels for graph rendering.
15 | 
16 |     Args:
17 |         label (str): The original label string.
18 | 
19 |     Returns:
20 |         str: The escaped label string.
21 |     """
22 |     # Define special characters to escape
23 |     special_chars = "{}()<>\\[\\]|&-"
24 |     # Escape special characters
25 |     return re.sub(f"([{special_chars}])", r"\\\1", label)
26 | 
27 | 
28 | def main() -> None:
29 |     """Generate an output graph file in the specified format (PDF, DOT, or GraphML)."""
30 |     parser = argparse.ArgumentParser(description="Execution Trace Visualizer")
31 |     parser.add_argument("--input_filename", type=str, required=True, help="Input Chakra execution trace filename")
32 |     parser.add_argument(
33 |         "--output_filename",
34 |         type=str,
35 |         required=True,
36 |         help=(
37 |             "Output graph filename. Supported extensions are pdf, dot, and graphml. "
38 |             "Recommend using graphml for large graphs for rendering speed."
39 |         ),
40 |     )
41 |     args = parser.parse_args()
42 | 
43 |     et = open_file_rd(args.input_filename)
44 |     node = Node()
45 |     gm = GlobalMetadata()
46 | 
47 |     # Determine the file type to be created based on the output filename
48 |     if args.output_filename.endswith((".pdf", ".dot")):
49 |         f = graphviz.Digraph()
50 |         decode_message(et, gm)
51 |         while decode_message(et, node):
52 |             escaped_label = escape_label(node.name)
53 |             f.node(name=f"{node.id}", label=escaped_label, id=str(node.id), shape="record")
54 | 
55 |             # Handling data dependencies
56 |             for data_dep_id in node.data_deps:
57 |                 f.edge(str(data_dep_id), str(node.id), arrowhead="normal")  # using "normal" arrow for data_deps
58 | 
59 |             # Handling control dependencies
60 |             for ctrl_dep_id in node.ctrl_deps:
61 |                 f.edge(str(ctrl_dep_id), str(node.id), arrowhead="tee")  # using "tee" arrow for ctrl_deps
62 | 
63 |         if args.output_filename.endswith(".pdf"):
64 |             f.render(args.output_filename.replace(".pdf", ""), format="pdf", cleanup=True)
65 |         else:  # ends with ".dot"
66 |             f.render(args.output_filename.replace(".dot", ""), format="dot", cleanup=True)
67 |     elif args.output_filename.endswith(".graphml"):
68 |         G = nx.DiGraph()
69 |         decode_message(et, gm)
70 |         while decode_message(et, node):
71 |             G.add_node(node.id, label=node.name)
72 | 
73 |             # Handling data dependencies
74 |             for data_dep_id in node.data_deps:
75 |                 G.add_edge(data_dep_id, node.id, dependency="data")
76 | 
77 |             # Handling control dependencies
78 |             for ctrl_dep_id in node.ctrl_deps:
79 |                 G.add_edge(ctrl_dep_id, node.id, dependency="control")
80 | 
81 |         nx.write_graphml(G, args.output_filename)
82 |     else:
83 |         print("Unknown output file extension. Must be one of pdf, dot, graphml.")
84 | 
85 |     et.close()
86 | 
87 | 
88 | if __name__ == "__main__":
89 |     main()
90 | 


--------------------------------------------------------------------------------
/src/converter/pytorch_tensor.py:
--------------------------------------------------------------------------------
  1 | from typing import List
  2 | 
  3 | 
  4 | class PyTorchTensor:
  5 |     """
  6 |     Represents a tensor with its associated properties.
  7 | 
  8 |     Attributes:
  9 |         tensor_data (List[int]): Data of the tensor including tensor_id, storage_id, offset, number of elements, and
 10 |             size of each element in bytes.
 11 | 
 12 |     Note:
 13 |         For more details on the tensor data structure, refer to:
 14 |         https://github.com/pytorch/pytorch/blob/7cd48df2dae7e2194438b162968c47d1f05bf20e/torch/csrc/profiler/
 15 |         standalone/execution_trace_observer.cpp#L400
 16 |     """
 17 | 
 18 |     def __init__(self, tensor_data: List[int]) -> None:
 19 |         """
 20 |         Initialize a PyTorchTensor object with the provided tensor data.
 21 | 
 22 |         Args:
 23 |             tensor_data (List[int]): Data of the tensor including tensor_id, storage_id, offset, number of elements,
 24 |                 and size of each element in bytes.
 25 |         """
 26 |         self.tensor_data = tensor_data
 27 | 
 28 |     def is_valid(self) -> bool:
 29 |         """
 30 |         Check if the tensor data is valid.
 31 | 
 32 |         Returns
 33 |             bool: True if tensor_data is a list of exactly six integers, False otherwise.
 34 |         """
 35 |         return (
 36 |             isinstance(self.tensor_data, list)
 37 |             and len(self.tensor_data) == 6
 38 |             and all(isinstance(item, int) for item in self.tensor_data)
 39 |         )
 40 | 
 41 |     @property
 42 |     def tensor_id(self) -> int:
 43 |         """
 44 |         Return the tensor ID.
 45 | 
 46 |         Returns
 47 |             int: Tensor ID.
 48 |         """
 49 |         return self.tensor_data[0]
 50 | 
 51 |     @property
 52 |     def storage_id(self) -> int:
 53 |         """
 54 |         Return the storage ID.
 55 | 
 56 |         Returns
 57 |             int: Storage ID.
 58 |         """
 59 |         return self.tensor_data[1]
 60 | 
 61 |     @property
 62 |     def offset(self) -> int:
 63 |         """
 64 |         Return the offset.
 65 | 
 66 |         Returns
 67 |             int: Offset value.
 68 |         """
 69 |         return self.tensor_data[2]
 70 | 
 71 |     @property
 72 |     def num_elem(self) -> int:
 73 |         """
 74 |         Return the number of elements in the tensor.
 75 | 
 76 |         Returns
 77 |             int: Number of elements.
 78 |         """
 79 |         return self.tensor_data[3]
 80 | 
 81 |     @property
 82 |     def elem_bytes(self) -> int:
 83 |         """
 84 |         Return the size of each element in bytes.
 85 | 
 86 |         Returns
 87 |             int: Size of each element in bytes.
 88 |         """
 89 |         return self.tensor_data[4]
 90 | 
 91 |     def has_valid_storage_id(self) -> bool:
 92 |         """
 93 |         Check if the tensor has a valid storage ID.
 94 | 
 95 |         Returns
 96 |             bool: True if the storage ID is greater than 0, False otherwise.
 97 |         """
 98 |         return self.storage_id > 0
 99 | 
100 | 
101 | def list_to_pytorch_tensor(tensor_list: List[int]) -> PyTorchTensor:
102 |     """
103 |     Convert a list representation of a tensor into a PyTorchTensor object.
104 | 
105 |     Args:
106 |         tensor_list (List[int]): Data representing a tensor, including tensor_id, storage_id, offset, num_elem,
107 |             elem_bytes.
108 | 
109 |     Returns:
110 |         PyTorchTensor: The PyTorchTensor object created from the data.
111 |     """
112 |     return PyTorchTensor(tensor_list)
113 | 


--------------------------------------------------------------------------------
/schema/protobuf/et_def.proto:
--------------------------------------------------------------------------------
  1 | syntax = "proto3";
  2 | 
  3 | package ChakraProtoMsg;
  4 | 
  5 | message AttributeProto {
  6 |   string name = 1;
  7 |   string doc_string = 2;
  8 | 
  9 |   oneof value {
 10 |     double double_val = 3;
 11 |     DoubleList double_list = 4;
 12 |     float float_val = 5;
 13 |     FloatList float_list = 6;
 14 |     int32 int32_val = 7;
 15 |     Int32List int32_list = 8;
 16 |     int64 int64_val = 9;
 17 |     Int64List int64_list = 10;
 18 |     uint32 uint32_val = 11;
 19 |     Uint32List uint32_list = 12;
 20 |     uint64 uint64_val = 13;
 21 |     Uint64List uint64_list = 14;
 22 |     sint32 sint32_val = 15;
 23 |     Sint32List sint32_list = 16;
 24 |     sint64 sint64_val = 17;
 25 |     Sint64List sint64_list = 18;
 26 |     fixed32 fixed32_val = 19;
 27 |     Fixed32List fixed32_list = 20;
 28 |     fixed64 fixed64_val = 21;
 29 |     Fixed64List fixed64_list = 22;
 30 |     sfixed32 sfixed32_val = 23;
 31 |     Sfixed32List sfixed32_list = 24;
 32 |     sfixed64 sfixed64_val = 25;
 33 |     Sfixed64List sfixed64_list = 26;
 34 |     bool bool_val = 27;
 35 |     BoolList bool_list = 28;
 36 |     string string_val = 29;
 37 |     StringList string_list = 30;
 38 |     bytes bytes_val = 31;
 39 |     BytesList bytes_list = 32;
 40 |   }
 41 | }
 42 | 
 43 | message DoubleList {
 44 |   repeated double values = 1;
 45 | }
 46 | 
 47 | message FloatList {
 48 |   repeated float values = 1;
 49 | }
 50 | 
 51 | message Int32List {
 52 |   repeated int32 values = 1;
 53 | }
 54 | 
 55 | message Int64List {
 56 |   repeated int64 values = 1;
 57 | }
 58 | 
 59 | message Uint32List {
 60 |   repeated uint32 values = 1;
 61 | }
 62 | 
 63 | message Uint64List {
 64 |   repeated uint64 values = 1;
 65 | }
 66 | 
 67 | message Sint32List {
 68 |   repeated sint32 values = 1;
 69 | }
 70 | 
 71 | message Sint64List {
 72 |   repeated sint64 values = 1;
 73 | }
 74 | 
 75 | message Fixed32List {
 76 |   repeated fixed32 values = 1;
 77 | }
 78 | 
 79 | message Fixed64List {
 80 |   repeated fixed64 values = 1;
 81 | }
 82 | 
 83 | message Sfixed32List {
 84 |   repeated sfixed32 values = 1;
 85 | }
 86 | 
 87 | message Sfixed64List {
 88 |   repeated sfixed64 values = 1;
 89 | }
 90 | 
 91 | message BoolList {
 92 |   repeated bool values = 1;
 93 | }
 94 | 
 95 | message StringList {
 96 |   repeated string values = 1;
 97 | }
 98 | 
 99 | message BytesList {
100 |   repeated bytes values = 1;
101 | }
102 | 
103 | message GlobalMetadata {
104 |   string version = 1;
105 |   repeated AttributeProto attr = 2;
106 | }
107 | 
108 | enum NodeType {
109 |   INVALID_NODE = 0;
110 |   METADATA_NODE = 1;
111 |   MEM_LOAD_NODE = 2;
112 |   MEM_STORE_NODE = 3;
113 |   COMP_NODE = 4;
114 |   COMM_SEND_NODE = 5;
115 |   COMM_RECV_NODE = 6;
116 |   COMM_COLL_NODE = 7;
117 | }
118 | 
119 | enum CollectiveCommType {
120 |   ALL_REDUCE = 0;
121 |   REDUCE = 1;
122 |   ALL_GATHER = 2;
123 |   GATHER = 3;
124 |   SCATTER = 4;
125 |   BROADCAST = 5;
126 |   ALL_TO_ALL = 6;
127 |   REDUCE_SCATTER = 7;
128 |   REDUCE_SCATTER_BLOCK = 8;
129 |   BARRIER = 9;
130 | }
131 | 
132 | message Node {
133 |   uint64 id = 1;
134 |   string name = 2;
135 |   NodeType type = 3;
136 | 
137 |   // Control and data dependencies
138 |   repeated uint64 ctrl_deps = 4;
139 |   repeated uint64 data_deps = 5;
140 | 
141 |   // Timing information
142 |   uint64 start_time_micros = 6;
143 |   uint64 duration_micros = 7;
144 | 
145 |   IOInfo inputs = 8;
146 |   IOInfo outputs = 9;
147 |   repeated AttributeProto attr = 10;
148 | }
149 | 
150 | message IOInfo {
151 |   string values = 1;
152 |   string shapes = 2;
153 |   string types = 3;
154 | }
155 | 
156 | message Tensor {
157 |     uint64 tensor_id = 1;  // An unique ID for the TensorImpl object.
158 |     uint64 storage_id = 2; // An unique ID for the underlying storage object.
159 |     uint64 offset = 3;     // Offset to the storage memory.
160 |     uint64 num_elem = 4;   // Number of elements in the storage.
161 |     uint64 elem_bytes = 5; // Number of bytes per element.
162 |     string device = 6;     // Tensor object device location.
163 | }
164 | 


--------------------------------------------------------------------------------
/tests/feeder/tests.cpp:
--------------------------------------------------------------------------------
  1 | #include <gtest/gtest.h>
  2 | #include "et_feeder.h"
  3 | 
  4 | class ETFeederTest : public ::testing::Test {
  5 |  protected:
  6 |   ETFeederTest() {}
  7 |   virtual ~ETFeederTest() {}
  8 | 
  9 |   void SetUp(const std::string& filename) {
 10 |     trace = new Chakra::ETFeeder(filename);
 11 |   }
 12 | 
 13 |   virtual void TearDown() {
 14 |     delete trace;
 15 |   }
 16 | 
 17 |   Chakra::ETFeeder* trace;
 18 | };
 19 | 
 20 | TEST_F(ETFeederTest, ConstructorNodeIDTest) {
 21 |   SetUp("tests/data/chakra.0.et");
 22 |   std::shared_ptr<Chakra::ETFeederNode> node = trace->getNextIssuableNode();
 23 |   uint64_t firstNodeID = node->id();
 24 |   ASSERT_EQ(firstNodeID, 216);
 25 | 
 26 |   node = trace->getNextIssuableNode();
 27 |   uint64_t secondNodeID = node->id();
 28 |   ASSERT_EQ(secondNodeID, 432);
 29 | }
 30 | 
 31 | TEST_F(ETFeederTest, ConstructorNodeValuesTest) {
 32 |   SetUp("tests/data/chakra.0.et");
 33 |   std::shared_ptr<Chakra::ETFeederNode> node = trace->getNextIssuableNode();
 34 |   ChakraProtoMsg::NodeType firstNodeType = node->type();
 35 |   ASSERT_EQ(firstNodeType, ChakraProtoMsg::COMP_NODE);
 36 |   ASSERT_TRUE(node->is_cpu_op());
 37 | 
 38 |   std::string attr = "rf_id";
 39 |   ChakraProtoMsg::AttributeProto rf_id = node->get_other_attr(attr);
 40 |   ASSERT_EQ(rf_id.int64_val(), 2);
 41 | 
 42 |   node = trace->getNextIssuableNode();
 43 |   uint64_t secondNodeType = node->type();
 44 |   ASSERT_EQ(secondNodeType, ChakraProtoMsg::COMM_COLL_NODE);
 45 |   ASSERT_TRUE(node->is_cpu_op());
 46 | 
 47 |   rf_id = node->get_other_attr(attr);
 48 |   ASSERT_EQ(rf_id.int64_val(), 110);
 49 | }
 50 | 
 51 | TEST_F(ETFeederTest, ConstructorETFeederTest) {
 52 |   SetUp("tests/data/chakra.0.et");
 53 |   std::shared_ptr<Chakra::ETFeederNode> node = trace->getNextIssuableNode();
 54 |   std::vector<std::shared_ptr<Chakra::ETFeederNode>> children =
 55 |       node->getChildren();
 56 |   ASSERT_EQ(children[0]->id(), 217);
 57 |   ASSERT_EQ(children[1]->id(), 430);
 58 |   ASSERT_EQ(children[2]->id(), 435);
 59 | }
 60 | 
 61 | TEST_F(ETFeederTest, RemoveTest) {
 62 |   SetUp("tests/data/chakra.0.et");
 63 |   std::shared_ptr<Chakra::ETFeederNode> node = trace->lookupNode(216);
 64 |   ASSERT_EQ(node->id(), 216);
 65 |   trace->removeNode(216);
 66 |   freopen("/dev/null", "w", stderr);
 67 |   try {
 68 |     node = trace->lookupNode(216);
 69 |     ASSERT_TRUE(false) << "node should be removed \n";
 70 |   } catch (const std::exception& e) {
 71 |     // this is the desired behaviour
 72 |   }
 73 |   freopen("/dev/tty", "w", stderr);
 74 | }
 75 | 
 76 | TEST_F(ETFeederTest, RemoveAndGetNextTest) {
 77 |   SetUp("tests/data/chakra.0.et");
 78 |   std::shared_ptr<Chakra::ETFeederNode> node = trace->lookupNode(216);
 79 |   ASSERT_EQ(node->id(), 216);
 80 |   trace->removeNode(216);
 81 |   node = trace->getNextIssuableNode();
 82 |   ASSERT_EQ(node->id(), 216);
 83 | }
 84 | 
 85 | TEST_F(ETFeederTest, FreeChildrenTest) {
 86 |   SetUp("tests/data/chakra.0.et");
 87 |   std::shared_ptr<Chakra::ETFeederNode> node = trace->lookupNode(216);
 88 |   ASSERT_EQ(node->id(), 216);
 89 |   trace->freeChildrenNodes(216);
 90 |   node = trace->getNextIssuableNode();
 91 |   ASSERT_EQ(node->id(), 216);
 92 |   node = trace->getNextIssuableNode();
 93 |   ASSERT_EQ(node->id(), 217);
 94 | }
 95 | 
 96 | TEST_F(ETFeederTest, HasNodesToIssueTest) {
 97 |   SetUp("tests/data/chakra.0.et");
 98 |   std::shared_ptr<Chakra::ETFeederNode> node = trace->getNextIssuableNode();
 99 |   ASSERT_EQ(node->id(), 216);
100 |   ASSERT_TRUE(trace->hasNodesToIssue());
101 |   trace->removeNode(5);
102 |   ASSERT_TRUE(trace->hasNodesToIssue());
103 | }
104 | 
105 | TEST_F(ETFeederTest, PushBackIssuableNodeTest) {
106 |   SetUp("tests/data/chakra.0.et");
107 |   std::shared_ptr<Chakra::ETFeederNode> node;
108 |   trace->pushBackIssuableNode(217);
109 |   node = trace->getNextIssuableNode();
110 |   ASSERT_EQ(node->id(), 216);
111 |   node = trace->getNextIssuableNode();
112 |   ASSERT_EQ(node->id(), 217);
113 | }
114 | 
115 | TEST_F(ETFeederTest, AddNodeTest) {
116 |   SetUp("tests/data/chakra.0.et");
117 |   std::shared_ptr<Chakra::ETFeederNode> node;
118 |   node = trace->lookupNode(216);
119 |   trace->removeNode(216);
120 |   trace->addNode(node);
121 |   std::shared_ptr<Chakra::ETFeederNode> node2;
122 |   node2 = trace->lookupNode(216);
123 |   ASSERT_EQ(node2->id(), 216);
124 | }
125 | 
126 | TEST_F(ETFeederTest, NodeGetChildrenTest) {
127 |   SetUp("tests/data/chakra.0.et");
128 |   std::shared_ptr<Chakra::ETFeederNode> node;
129 |   node = trace->lookupNode(216);
130 |   std::vector<std::shared_ptr<Chakra::ETFeederNode>> children =
131 |       node->getChildren();
132 |   ASSERT_EQ(children[0]->id(), 217);
133 |   ASSERT_EQ(children[2]->id(), 435);
134 | }
135 | 
136 | int main(int argc, char** argv) {
137 |   ::testing::InitGoogleTest(&argc, argv);
138 |   return RUN_ALL_TESTS();
139 | }


--------------------------------------------------------------------------------
/.github/workflows/codeql.yml:
--------------------------------------------------------------------------------
 1 | # For most projects, this workflow file will not need changing; you simply need
 2 | # to commit it to your repository.
 3 | #
 4 | # You may wish to alter this file to override the set of languages analyzed,
 5 | # or to provide custom queries or build logic.
 6 | #
 7 | # ******** NOTE ********
 8 | # We have attempted to detect the languages in your repository. Please check
 9 | # the `language` matrix defined below to confirm you have the correct set of
10 | # supported CodeQL languages.
11 | #
12 | name: "CodeQL"
13 | 
14 | on:
15 |   push:
16 |     branches: [ "main" ]
17 |   pull_request:
18 |     branches: [ "main" ]
19 |   # schedule:
20 |   #   - cron: '36 19 * * 3'
21 | 
22 | jobs:
23 |   analyze:
24 |     name: Analyze (${{ matrix.language }})
25 |     # Runner size impacts CodeQL analysis time. To learn more, please see:
26 |     #   - https://gh.io/recommended-hardware-resources-for-running-codeql
27 |     #   - https://gh.io/supported-runners-and-hardware-resources
28 |     #   - https://gh.io/using-larger-runners (GitHub.com only)
29 |     # Consider using larger runners or machines with greater resources for possible analysis time improvements.
30 |     runs-on: ${{ (matrix.language == 'swift' && 'macos-latest') || 'ubuntu-latest' }}
31 |     timeout-minutes: ${{ (matrix.language == 'swift' && 120) || 360 }}
32 |     permissions:
33 |       # required for all workflows
34 |       security-events: write
35 | 
36 |       # required to fetch internal or private CodeQL packs
37 |       packages: read
38 | 
39 |       # only required for workflows in private repositories
40 |       actions: read
41 |       contents: read
42 | 
43 |     strategy:
44 |       fail-fast: false
45 |       matrix:
46 |         include:
47 |         - language: c-cpp
48 |           build-mode: manual
49 |         - language: python
50 |           build-mode: none
51 |         # CodeQL supports the following values keywords for 'language': 'c-cpp', 'csharp', 'go', 'java-kotlin', 'javascript-typescript', 'python', 'ruby', 'swift'
52 |         # Use `c-cpp` to analyze code written in C, C++ or both
53 |         # Use 'java-kotlin' to analyze code written in Java, Kotlin or both
54 |         # Use 'javascript-typescript' to analyze code written in JavaScript, TypeScript or both
55 |         # To learn more about changing the languages that are analyzed or customizing the build mode for your analysis,
56 |         # see https://docs.github.com/en/code-security/code-scanning/creating-an-advanced-setup-for-code-scanning/customizing-your-advanced-setup-for-code-scanning.
57 |         # If you are analyzing a compiled language, you can modify the 'build-mode' for that language to customize how
58 |         # your codebase is analyzed, see https://docs.github.com/en/code-security/code-scanning/creating-an-advanced-setup-for-code-scanning/codeql-code-scanning-for-compiled-languages
59 |     steps:
60 |     - name: Checkout repository
61 |       uses: actions/checkout@v4
62 | 
63 |     # Initializes the CodeQL tools for scanning.
64 |     - name: Initialize CodeQL
65 |       uses: github/codeql-action/init@v3
66 |       with:
67 |         languages: ${{ matrix.language }}
68 |         build-mode: ${{ matrix.build-mode }}
69 |         queries: security-extended
70 |         # If you wish to specify custom queries, you can do so here or in a config file.
71 |         # By default, queries listed here will override any specified in a config file.
72 |         # Prefix the list here with "+" to use these queries and those in the config file.
73 | 
74 |         # For more details on CodeQL's query packs, refer to: https://docs.github.com/en/code-security/code-scanning/automatically-scanning-your-code-for-vulnerabilities-and-errors/configuring-code-scanning#using-queries-in-ql-packs
75 |         # queries: security-extended,security-and-quality
76 | 
77 |     # If the analyze step fails for one of the languages you are analyzing with
78 |     # "We were unable to automatically build your code", modify the matrix above
79 |     # to set the build mode to "manual" for that language. Then modify this step
80 |     # to build your code.
81 |     # ℹ️ Command-line programs to run using the OS shell.
82 |     # 📚 See https://docs.github.com/en/actions/using-workflows/workflow-syntax-for-github-actions#jobsjob_idstepsrun
83 |     - if: matrix.build-mode == 'manual'
84 |       run: |
85 |         sudo apt update
86 |         sudo apt install protobuf-compiler libprotobuf-dev
87 |         SCRIPT_DIR=.
88 |         BUILD_DIR="${SCRIPT_DIR:?}"/build
89 |         CHAKRA_ET_DIR="${SCRIPT_DIR:?}"/schema/protobuf
90 |         protoc et_def.proto \
91 |           --proto_path="${CHAKRA_ET_DIR:?}" \
92 |           --cpp_out="${CHAKRA_ET_DIR:?}"
93 |         g++ -shared -fPIC -Wall  src/feeder/et_feeder.cpp src/feeder/et_feeder_node.cpp src/third_party/utils/protoio.cc schema/protobuf/et_def.pb.cc -o libfeeder.so -lprotobuf -I . -I src/feeder -I src/third_party/utils -I schema/protobuf
94 | 
95 |     - name: Perform CodeQL Analysis
96 |       uses: github/codeql-action/analyze@v3
97 |       with:
98 |         category: "/language:${{matrix.language}}"
99 | 


--------------------------------------------------------------------------------
/src/feeder/json_node.h:
--------------------------------------------------------------------------------
  1 | #pragma once
  2 | 
  3 | #include <json/json.hpp>
  4 | #include <fstream>
  5 | #include <functional>
  6 | #include <iostream>
  7 | #include <queue>
  8 | #include <set>
  9 | #include <string>
 10 | 
 11 | using json = nlohmann::json;
 12 | 
 13 | enum NodeType : int {
 14 |   INVALID_NODE = 0,
 15 |   METADATA_NODE = 1,
 16 |   MEM_LOAD_NODE = 2,
 17 |   MEM_STORE_NODE = 3,
 18 |   COMP_NODE = 4,
 19 |   COMM_SEND_NODE = 5,
 20 |   COMM_RECV_NODE = 6,
 21 |   COMM_COLL_NODE = 7
 22 | };
 23 | 
 24 | class JSONNode {
 25 |  private:
 26 |   uint64_t node_id;
 27 |   std::string node_name;
 28 |   int node_type;
 29 |   bool is_cpu_op;
 30 |   uint64_t runtime;
 31 |   uint64_t num_ops;
 32 |   uint64_t tensor_size;
 33 |   int64_t comm_type;
 34 |   uint32_t comm_priority;
 35 |   uint64_t comm_size;
 36 |   uint32_t comm_src;
 37 |   uint32_t comm_dst;
 38 |   uint32_t comm_tag;
 39 | 
 40 |  public:
 41 |   std::vector<uint64_t> data_deps{};
 42 |   std::vector<uint64_t> dep_unresolved_parent_ids_json{};
 43 |   std::vector<JSONNode> children_vec_json{};
 44 | 
 45 |   // Compare function for set
 46 |   struct CompareJSONNodesLT {
 47 |     bool operator()(const JSONNode& a, const JSONNode& b) const {
 48 |       return a.node_id < b.node_id;
 49 |     }
 50 |   };
 51 |   std::set<JSONNode, CompareJSONNodesLT> children_set_json{};
 52 | 
 53 |   JSONNode();
 54 |   JSONNode(const JSONNode& t);
 55 |   JSONNode(json data, uint64_t id);
 56 |   uint64_t id() const;
 57 |   std::string name() const;
 58 |   int type() const;
 59 |   bool isCPUOp() const;
 60 |   uint64_t getRuntime() const;
 61 |   uint64_t getNumOps() const;
 62 |   uint64_t getTensorSize() const;
 63 |   int64_t getCommType() const;
 64 |   uint32_t getCommPriority() const;
 65 |   uint64_t getCommSize() const;
 66 |   uint32_t getCommSrc() const;
 67 |   uint32_t getCommDst() const;
 68 |   uint32_t getCommTag() const;
 69 |   void addDepUnresolvedParentID(uint64_t node_id);
 70 |   std::vector<uint64_t> getDepUnresolvedParentIDs();
 71 |   void setDepUnresolvedParentIDs(
 72 |       std::vector<uint64_t> const& dep_unresolved_parent_ids);
 73 |   void addChild(JSONNode node);
 74 |   std::vector<JSONNode> getChildren();
 75 | 
 76 |   // Define the == operator for comparison
 77 |   bool operator==(const JSONNode& other) const {
 78 |     return node_id == other.node_id && node_name == other.node_name &&
 79 |         node_type == other.node_type && is_cpu_op == other.is_cpu_op &&
 80 |         runtime == other.runtime && num_ops == other.num_ops &&
 81 |         tensor_size == other.tensor_size && comm_type == other.comm_type &&
 82 |         comm_priority == other.comm_priority && comm_size == other.comm_size &&
 83 |         comm_src == other.comm_src && comm_dst == other.comm_dst &&
 84 |         comm_tag == other.comm_tag && data_deps == other.data_deps &&
 85 |         dep_unresolved_parent_ids_json ==
 86 |         other.dep_unresolved_parent_ids_json &&
 87 |         children_vec_json == other.children_vec_json &&
 88 |         children_set_json == other.children_set_json;
 89 |   }
 90 | 
 91 |   // Overload the assignment operator
 92 |   JSONNode& operator=(const JSONNode& other) {
 93 |     if (this != &other) {
 94 |       // Copy all member variables
 95 |       node_id = other.node_id;
 96 |       node_name = other.node_name;
 97 |       node_type = other.node_type;
 98 |       is_cpu_op = other.is_cpu_op;
 99 |       runtime = other.runtime;
100 |       num_ops = other.num_ops;
101 |       tensor_size = other.tensor_size;
102 |       comm_type = other.comm_type;
103 |       comm_priority = other.comm_priority;
104 |       comm_size = other.comm_size;
105 |       comm_src = other.comm_src;
106 |       comm_dst = other.comm_dst;
107 |       comm_tag = other.comm_tag;
108 |       data_deps = other.data_deps;
109 |       dep_unresolved_parent_ids_json = other.dep_unresolved_parent_ids_json;
110 |       children_vec_json = other.children_vec_json;
111 |       children_set_json = other.children_set_json;
112 |     }
113 |     return *this;
114 |   }
115 | };
116 | 
117 | // Define a custom hash function for unordered set
118 | namespace std {
119 | template <>
120 | struct hash<JSONNode> {
121 |   std::size_t operator()(const JSONNode& node) const {
122 |     std::size_t h1 = std::hash<int64_t>()(node.id());
123 |     std::size_t h2 = std::hash<std::string>()(node.name());
124 |     std::size_t h3 = std::hash<int>()(node.type());
125 |     std::size_t h4 = std::hash<bool>()(node.isCPUOp());
126 |     std::size_t h5 = std::hash<int64_t>()(node.getRuntime());
127 | 
128 |     // A prime number for bit manipulation
129 |     const std::size_t prime = 31;
130 | 
131 |     // Combine the hash of the current member with the hashes of the previous
132 |     // members
133 |     std::size_t hash = h1;
134 |     hash = hash * prime + h2;
135 |     hash = hash * prime + h3;
136 |     hash = hash * prime + h4;
137 |     hash = hash * prime + h5;
138 | 
139 |     return hash;
140 |   }
141 | };
142 | } // namespace std
143 | 
144 | // Compare function for JSON node for priority queue
145 | struct CompareJSONNodesGT
146 |     : public std::binary_function<JSONNode, JSONNode, bool> {
147 |   bool operator()(const JSONNode lhs, const JSONNode rhs) const {
148 |     return lhs.id() > rhs.id();
149 |   }
150 | };


--------------------------------------------------------------------------------
/src/converter/converter.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import logging
  3 | 
  4 | from .pytorch_converter import PyTorchConverter
  5 | from .text_converter import TextConverter
  6 | 
  7 | 
  8 | def setup_logging(log_filename: str) -> None:
  9 |     """Set up logging to file and stream handlers."""
 10 |     formatter = logging.Formatter("%(levelname)s [%(asctime)s] %(message)s", datefmt="%m/%d/%Y %I:%M:%S %p")
 11 | 
 12 |     file_handler = logging.FileHandler(log_filename, mode="w")
 13 |     file_handler.setLevel(logging.DEBUG)
 14 |     file_handler.setFormatter(formatter)
 15 | 
 16 |     stream_handler = logging.StreamHandler()
 17 |     stream_handler.setLevel(logging.WARNING)
 18 |     stream_handler.setFormatter(formatter)
 19 | 
 20 |     stream_handler = logging.StreamHandler()
 21 |     stream_handler.setLevel(logging.INFO)
 22 |     stream_handler.setFormatter(formatter)
 23 | 
 24 |     logging.basicConfig(level=logging.DEBUG, handlers=[file_handler, stream_handler])
 25 | 
 26 | 
 27 | def convert_text(args: argparse.Namespace) -> None:
 28 |     """Convert text input trace to Chakra execution trace."""
 29 |     converter = TextConverter(args.input, args.output, args.num_npus, args.num_passes)
 30 |     converter.convert()
 31 | 
 32 | 
 33 | def convert_pytorch(args: argparse.Namespace) -> None:
 34 |     """Convert PyTorch input trace to Chakra execution trace."""
 35 |     converter = PyTorchConverter()
 36 |     converter.convert(args.input, args.output, args.simulate)
 37 | 
 38 | 
 39 | def main() -> None:
 40 |     """Convert to Chakra execution trace in the protobuf format."""
 41 |     parser = argparse.ArgumentParser(
 42 |         description=(
 43 |             "Chakra execution trace converter for simulators. This converter is designed for any downstream "
 44 |             "simulators that take Chakra execution traces in the protobuf format. This converter takes an input file "
 45 |             "in another format and generates a Chakra execution trace output in the protobuf format."
 46 |         )
 47 |     )
 48 | 
 49 |     parser.add_argument("--log-filename", type=str, default="debug.log", help="Log filename")
 50 | 
 51 |     subparsers = parser.add_subparsers(title="subcommands", description="Valid subcommands", help="Input type")
 52 | 
 53 |     pytorch_parser = subparsers.add_parser(
 54 |         "PyTorch",
 55 |         help="Convert Chakra host + device execution trace in JSON to Chakra host + device execution trace in the "
 56 |         "Chakra schema with protobuf format",
 57 |     )
 58 |     pytorch_parser.add_argument(
 59 |         "--input", type=str, required=True, help="Input Chakra host + device traces in the JSON format"
 60 |     )
 61 |     pytorch_parser.add_argument(
 62 |         "--output", type=str, required=True, help="Output Chakra host + device traces in the protobuf format"
 63 |     )
 64 |     pytorch_parser.add_argument(
 65 |         "--simulate",
 66 |         action="store_true",
 67 |         help=(
 68 |             "Enable simulation of operators after the conversion for validation and debugging purposes. This option "
 69 |             "allows simulation of traces without running them through a simulator. Users can validate the converter "
 70 |             "or simulator against actual measured values using tools like chrome://tracing or https://perfetto.dev/. "
 71 |             "Read the duration of the timeline and compare the total execution time against the final simulation time "
 72 |             "of a trace. Disabled by default because it takes a long time."
 73 |         ),
 74 |     )
 75 |     pytorch_parser.set_defaults(func=convert_pytorch)
 76 | 
 77 |     text_parser = subparsers.add_parser(
 78 |         "Text", help="Convert text-based model description to Chakra schema-based traces in the protobuf format"
 79 |     )
 80 |     text_parser.add_argument(
 81 |         "--input",
 82 |         type=str,
 83 |         required=True,
 84 |         help=(
 85 |             "Input file in the text format that describes a model. This follows the text format used in ASTRA-sim: "
 86 |             "https://github.com/astra-sim/astra-sim"
 87 |         ),
 88 |     )
 89 |     text_parser.add_argument(
 90 |         "--output", type=str, required=True, help="Output Chakra execution trace filename in the protobuf format"
 91 |     )
 92 |     text_parser.add_argument(
 93 |         "--num-npus",
 94 |         type=int,
 95 |         required=True,
 96 |         help="Number of NPUs in a system. Determines the number of traces the converter generates",
 97 |     )
 98 |     text_parser.add_argument(
 99 |         "--num-passes",
100 |         type=int,
101 |         required=True,
102 |         help=(
103 |             "Number of loops when generating traces based on the text input file. Increasing the number of passes "
104 |             "increases the number of training iterations for a given text input."
105 |         ),
106 |     )
107 |     text_parser.set_defaults(func=convert_text)
108 | 
109 |     args = parser.parse_args()
110 | 
111 |     if "func" in args:
112 |         setup_logging(args.log_filename)
113 |         args.func(args)
114 |         logging.info(f"Conversion successful. Output file is available at {args.output}.")
115 |     else:
116 |         parser.print_help()
117 | 
118 | 
119 | if __name__ == "__main__":
120 |     main()
121 | 


--------------------------------------------------------------------------------
/src/feeder/json_node.cpp:
--------------------------------------------------------------------------------
  1 | #include "json_node.h"
  2 | 
  3 | // JSONNode default constructor
  4 | JSONNode::JSONNode() {}
  5 | 
  6 | // JSONNode copy constructor
  7 | JSONNode::JSONNode(const JSONNode& t) {
  8 |   node_id = t.node_id;
  9 |   node_name = t.node_name;
 10 |   node_type = t.node_type;
 11 |   is_cpu_op = t.is_cpu_op;
 12 |   runtime = t.runtime;
 13 |   data_deps = t.data_deps;
 14 |   dep_unresolved_parent_ids_json = t.dep_unresolved_parent_ids_json;
 15 |   children_vec_json = t.children_vec_json;
 16 |   children_set_json = t.children_set_json;
 17 | 
 18 |   if (node_type == NodeType::COMM_SEND_NODE ||
 19 |       node_type == NodeType::COMM_RECV_NODE ||
 20 |       node_type == NodeType::COMM_COLL_NODE) {
 21 |     tensor_size = t.tensor_size;
 22 |     comm_type = t.comm_type;
 23 |     comm_priority = t.comm_priority;
 24 |     comm_size = t.comm_size;
 25 |     comm_src = t.comm_src;
 26 |     comm_dst = t.comm_dst;
 27 |     comm_tag = t.comm_tag;
 28 |   }
 29 | }
 30 | 
 31 | // JSONNode constructor
 32 | JSONNode::JSONNode(json data, uint64_t id) {
 33 |   try {
 34 |     node_id = data["workload_graph"][id]["Id"];
 35 |   } catch (...) {
 36 |     std::cerr << "node_id not specified in ET" << std::endl;
 37 |   }
 38 |   try {
 39 |     node_name = data["workload_graph"][id]["Name"];
 40 |   } catch (...) {
 41 |     std::cerr << "node_name not specified in ET" << std::endl;
 42 |   }
 43 |   try {
 44 |     node_type = data["workload_graph"][id]["NodeType"];
 45 |   } catch (...) {
 46 |     std::cerr << "node_type not specified in ET" << std::endl;
 47 |   }
 48 |   try {
 49 |     is_cpu_op = data["workload_graph"][id]["is_cpu_op"];
 50 |   } catch (...) {
 51 |     std::cerr << "is_cpu_op not specified in ET" << std::endl;
 52 |   }
 53 |   try {
 54 |     runtime = data["workload_graph"][id]["runtime"];
 55 |   } catch (...) {
 56 |   }
 57 |   try {
 58 |     data_deps =
 59 |         data["workload_graph"][id]["data_deps"].get<std::vector<uint64_t>>();
 60 |   } catch (...) {
 61 |     std::cerr << "data deps not specified in ET" << std::endl;
 62 |   }
 63 | 
 64 |   if (node_type == NodeType::COMM_SEND_NODE ||
 65 |       node_type == NodeType::COMM_RECV_NODE ||
 66 |       node_type == NodeType::COMM_COLL_NODE) {
 67 |     try {
 68 |       tensor_size = data["workload_graph"][id]["tensor_size"];
 69 |     } catch (...) {
 70 |     }
 71 |     try {
 72 |       comm_type = data["workload_graph"][id]["comm_type"];
 73 |     } catch (...) {
 74 |     }
 75 |     try {
 76 |       comm_priority = data["workload_graph"][id]["comm_priority"];
 77 |     } catch (...) {
 78 |       comm_priority = 0; // Protobuf defaults to 0
 79 |     }
 80 |     try {
 81 |       comm_size = data["workload_graph"][id]["comm_size"];
 82 |     } catch (...) {
 83 |     }
 84 |     try {
 85 |       comm_src = data["workload_graph"][id]["comm_src"];
 86 |     } catch (...) {
 87 |     }
 88 |     try {
 89 |       comm_dst = data["workload_graph"][id]["comm_dst"];
 90 |     } catch (...) {
 91 |     }
 92 |     try {
 93 |       comm_tag = data["workload_graph"][id]["comm_tag"];
 94 |     } catch (...) {
 95 |     }
 96 |   }
 97 | }
 98 | 
 99 | // Node id
100 | uint64_t JSONNode::id() const {
101 |   return node_id;
102 | }
103 | 
104 | // Node name
105 | std::string JSONNode::name() const {
106 |   return node_name;
107 | }
108 | 
109 | // Node type
110 | int JSONNode::type() const {
111 |   return node_type;
112 | }
113 | 
114 | // Check if CPU OP
115 | bool JSONNode::isCPUOp() const {
116 |   return is_cpu_op;
117 | }
118 | 
119 | // Runtime
120 | uint64_t JSONNode::getRuntime() const {
121 |   return runtime;
122 | }
123 | 
124 | // Num ops
125 | uint64_t JSONNode::getNumOps() const {
126 |   return num_ops;
127 | }
128 | 
129 | // Tensor size
130 | uint64_t JSONNode::getTensorSize() const {
131 |   return tensor_size;
132 | }
133 | 
134 | // Comm type
135 | int64_t JSONNode::getCommType() const {
136 |   return comm_type;
137 | }
138 | 
139 | // Comm priority
140 | uint32_t JSONNode::getCommPriority() const {
141 |   return comm_priority;
142 | }
143 | 
144 | // Comm size
145 | uint64_t JSONNode::getCommSize() const {
146 |   return comm_size;
147 | }
148 | 
149 | // Comm src
150 | uint32_t JSONNode::getCommSrc() const {
151 |   return comm_src;
152 | }
153 | 
154 | // Comm dst
155 | uint32_t JSONNode::getCommDst() const {
156 |   return comm_dst;
157 | }
158 | 
159 | // Comm tag
160 | uint32_t JSONNode::getCommTag() const {
161 |   return comm_tag;
162 | }
163 | 
164 | // Dependency unresolved parent IDs
165 | void JSONNode::addDepUnresolvedParentID(uint64_t node_id) {
166 |   dep_unresolved_parent_ids_json.emplace_back(node_id);
167 | }
168 | 
169 | // Get dependency unresolved parent IDs
170 | std::vector<uint64_t> JSONNode::getDepUnresolvedParentIDs() {
171 |   return dep_unresolved_parent_ids_json;
172 | }
173 | 
174 | // Set dependency unresolved parent IDs
175 | void JSONNode::setDepUnresolvedParentIDs(
176 |     std::vector<uint64_t> const& dep_unresolved_parent_ids) {
177 |   dep_unresolved_parent_ids_json = dep_unresolved_parent_ids;
178 | }
179 | 
180 | // Add child
181 | void JSONNode::addChild(JSONNode node) {
182 |   // Avoid adding the same child node multiple times
183 |   // addChild is called multiple times to resolve dependencies
184 |   if (children_set_json.find(node) != children_set_json.end()) {
185 |     return;
186 |   }
187 |   children_vec_json.emplace_back(node);
188 |   children_set_json.emplace(node);
189 | }
190 | 
191 | // Get children vector
192 | std::vector<JSONNode> JSONNode::getChildren() {
193 |   return children_vec_json;
194 | }


--------------------------------------------------------------------------------
/tests/feeder/wrapper_tests.cpp:
--------------------------------------------------------------------------------
  1 | #include <gtest/gtest.h>
  2 | #include "WrapperNode.h"
  3 | 
  4 | class WrapperNodeTest : public ::testing::Test {
  5 |  protected:
  6 |   WrapperNodeTest() {}
  7 |   virtual ~WrapperNodeTest() {}
  8 | 
  9 |   void SetUp(const std::string& filename) {
 10 |     node.createWrapper(filename);
 11 |   }
 12 | 
 13 |   virtual void TearDown() {
 14 |     node.releaseMemory();
 15 |   }
 16 | 
 17 |   WrapperNode node;
 18 | };
 19 | 
 20 | TEST_F(WrapperNodeTest, ConstructorNodeIDTest) {
 21 |   // tests/data/small_chakra.0.json is a pruned dataset for quick tests
 22 |   // tests/data/chakra.0.json is the full dataset, which is also available
 23 |   SetUp("tests/data/small_chakra.0.json");
 24 |   node.getNextIssuableNode();
 25 |   uint64_t firstNodeID = node.getNodeID();
 26 |   ASSERT_EQ(firstNodeID, 216);
 27 | 
 28 |   node.getNextIssuableNode();
 29 |   uint64_t secondNodeID = node.getNodeID();
 30 |   ASSERT_EQ(secondNodeID, 432);
 31 | }
 32 | 
 33 | TEST_F(WrapperNodeTest, ConstructorNodeValuesTest) {
 34 |   SetUp("tests/data/small_chakra.0.json");
 35 |   node.getNextIssuableNode();
 36 |   uint64_t firstNodeType = node.getNodeType();
 37 |   ASSERT_EQ(firstNodeType, ChakraProtoMsg::COMP_NODE);
 38 |   ASSERT_TRUE(node.isCPUOp());
 39 | 
 40 |   node.getNextIssuableNode();
 41 |   uint64_t secondNodeType = node.getNodeType();
 42 |   ASSERT_EQ(secondNodeType, ChakraProtoMsg::COMM_COLL_NODE);
 43 |   ASSERT_TRUE(node.isCPUOp());
 44 | }
 45 | 
 46 | TEST_F(WrapperNodeTest, ConstructorWrapperNodeTest) {
 47 |   std::string filename = "tests/data/small_chakra.0.json";
 48 |   std::string ext = filename.substr(filename.find_last_of(".") + 1);
 49 |   SetUp(filename);
 50 |   node.getNextIssuableNode();
 51 |   if (ext == "et") {
 52 |     std::vector<std::shared_ptr<Chakra::ETFeederNode>> children;
 53 |     node.getChildren(children);
 54 |     ASSERT_EQ(children[0]->id(), 217);
 55 |     ASSERT_EQ(children[1]->id(), 430);
 56 |     ASSERT_EQ(children[2]->id(), 435);
 57 |   } else if (ext == "json") {
 58 |     std::vector<JSONNode> children;
 59 |     node.getChildren(children);
 60 |     ASSERT_EQ(children[0].id(), 217);
 61 |     ASSERT_EQ(children[1].id(), 430);
 62 |     ASSERT_EQ(children[2].id(), 435);
 63 |   }
 64 | }
 65 | 
 66 | TEST_F(WrapperNodeTest, RemoveTest) {
 67 |   SetUp("tests/data/small_chakra.0.json");
 68 |   node.lookupNode(216);
 69 |   ASSERT_EQ(node.getNodeID(), 216);
 70 |   node.removeNode(216);
 71 |   freopen("/dev/null", "w", stderr);
 72 |   try {
 73 |     node.lookupNode(216);
 74 |     ASSERT_TRUE(false) << "node should be removed \n";
 75 |   } catch (const std::exception& e) {
 76 |     // this is the desired behaviour
 77 |   }
 78 |   freopen("/dev/tty", "w", stderr);
 79 | }
 80 | 
 81 | TEST_F(WrapperNodeTest, RemoveAndGetNextTest) {
 82 |   SetUp("tests/data/small_chakra.0.json");
 83 |   node.lookupNode(216);
 84 |   ASSERT_EQ(node.getNodeID(), 216);
 85 |   node.removeNode(216);
 86 |   node.getNextIssuableNode();
 87 |   ASSERT_EQ(node.getNodeID(), 216);
 88 | }
 89 | 
 90 | TEST_F(WrapperNodeTest, FreeChildrenTest) {
 91 |   SetUp("tests/data/small_chakra.0.json");
 92 |   node.lookupNode(216);
 93 |   ASSERT_EQ(node.getNodeID(), 216);
 94 |   node.freeChildrenNodes(216);
 95 |   node.getNextIssuableNode();
 96 |   ASSERT_EQ(node.getNodeID(), 216);
 97 |   node.getNextIssuableNode();
 98 |   ASSERT_EQ(node.getNodeID(), 217);
 99 | }
100 | 
101 | TEST_F(WrapperNodeTest, HasNodesToIssueTest) {
102 |   SetUp("tests/data/small_chakra.0.json");
103 |   node.getNextIssuableNode();
104 |   ASSERT_EQ(node.getNodeID(), 216);
105 |   ASSERT_TRUE(node.hasNodesToIssue());
106 |   node.removeNode(5);
107 |   ASSERT_TRUE(node.hasNodesToIssue());
108 | }
109 | 
110 | TEST_F(WrapperNodeTest, PushBackIssuableNodeTest) {
111 |   SetUp("tests/data/small_chakra.0.json");
112 |   node.pushBackIssuableNode(217);
113 |   node.getNextIssuableNode();
114 |   ASSERT_EQ(node.getNodeID(), 216);
115 |   node.getNextIssuableNode();
116 |   ASSERT_EQ(node.getNodeID(), 217);
117 | }
118 | 
119 | TEST_F(WrapperNodeTest, AddNodeTest) {
120 |   std::string filename = "tests/data/small_chakra.0.json";
121 |   std::string ext = filename.substr(filename.find_last_of(".") + 1);
122 |   SetUp(filename);
123 |   if (ext == "et") {
124 |     std::shared_ptr<Chakra::ETFeederNode> pnode1;
125 |     node.lookupNode(216);
126 |     pnode1 = node.getProtobufNode();
127 |     node.removeNode(216);
128 |     node.addNode(pnode1);
129 |     std::shared_ptr<Chakra::ETFeederNode> pnode2;
130 |     node.lookupNode(216);
131 |     pnode2 = node.getProtobufNode();
132 |     ASSERT_EQ(pnode2->id(), 216);
133 |   } else if (ext == "json") {
134 |     JSON jnode1;
135 |     node.lookupNode(216);
136 |     jnode1 = node.getJSONNode();
137 |     node.removeNode(216);
138 |     node.addNode(jnode1);
139 |     JSONNode jnode2;
140 |     node.lookupNode(216);
141 |     jnode2 = node.getJSONNode();
142 |     ASSERT_EQ(jnode2.id(), 216);
143 |   }
144 | }
145 | 
146 | TEST_F(WrapperNodeTest, NodeGetChildrenTest) {
147 |   std::string filename = "tests/data/small_chakra.0.json";
148 |   std::string ext = filename.substr(filename.find_last_of(".") + 1);
149 |   SetUp(filename);
150 |   node.lookupNode(216);
151 |   if (ext == "et") {
152 |     std::vector<std::shared_ptr<Chakra::ETFeederNode>> children;
153 |     node.getChildren(children);
154 |     ASSERT_EQ(children[0]->id(), 217);
155 |     ASSERT_EQ(children[2]->id(), 435);
156 |   } else if (ext == "json") {
157 |     std::vector<JSONNode> children;
158 |     node.getChildren(children);
159 |     ASSERT_EQ(children[0].id(), 217);
160 |     ASSERT_EQ(children[2].id(), 435);
161 |   }
162 | }
163 | 
164 | int main(int argc, char** argv) {
165 |   ::testing::InitGoogleTest(&argc, argv);
166 |   return RUN_ALL_TESTS();
167 | }
168 | 


--------------------------------------------------------------------------------
/src/feeder/et_feeder.cpp:
--------------------------------------------------------------------------------
  1 | #include "et_feeder.h"
  2 | 
  3 | #include <iostream>
  4 | 
  5 | using namespace std;
  6 | using namespace Chakra;
  7 | 
  8 | ETFeeder::ETFeeder(string filename)
  9 |     : trace_(filename), window_size_(4096 * 256), et_complete_(false) {
 10 |   if (!trace_.is_open()) { // Assuming a method to check if file is open
 11 |     throw std::runtime_error("Failed to open trace file: " + filename);
 12 |   }
 13 | 
 14 |   try {
 15 |     readGlobalMetadata();
 16 |     readNextWindow();
 17 |   } catch (const std::exception& e) {
 18 |     cerr << "Error in constructor: " << e.what() << endl;
 19 |     throw; // Rethrow the exception for caller to handle
 20 |   }
 21 | }
 22 | 
 23 | ETFeeder::~ETFeeder() {}
 24 | 
 25 | void ETFeeder::addNode(shared_ptr<ETFeederNode> node) {
 26 |   dep_graph_[node->getChakraNode()->id()] = node;
 27 | }
 28 | 
 29 | void ETFeeder::removeNode(uint64_t node_id) {
 30 |   dep_graph_.erase(node_id);
 31 | 
 32 |   if (!et_complete_ && (dep_free_node_queue_.size() < window_size_)) {
 33 |     readNextWindow();
 34 |   }
 35 | }
 36 | 
 37 | bool ETFeeder::hasNodesToIssue() {
 38 |   return !(dep_graph_.empty() && dep_free_node_queue_.empty());
 39 | }
 40 | 
 41 | shared_ptr<ETFeederNode> ETFeeder::getNextIssuableNode() {
 42 |   if (dep_free_node_queue_.size() != 0) {
 43 |     shared_ptr<ETFeederNode> node = dep_free_node_queue_.top();
 44 |     dep_free_node_id_set_.erase(node->getChakraNode()->id());
 45 |     dep_free_node_queue_.pop();
 46 |     return node;
 47 |   } else {
 48 |     return nullptr;
 49 |   }
 50 | }
 51 | 
 52 | void ETFeeder::pushBackIssuableNode(uint64_t node_id) {
 53 |   shared_ptr<ETFeederNode> node = dep_graph_[node_id];
 54 |   dep_free_node_id_set_.emplace(node_id);
 55 |   dep_free_node_queue_.emplace(node);
 56 | }
 57 | 
 58 | shared_ptr<ETFeederNode> ETFeeder::lookupNode(uint64_t node_id) {
 59 |   try {
 60 |     return dep_graph_.at(node_id);
 61 |   } catch (const std::out_of_range& e) {
 62 |     std::cerr << "looking for node_id=" << node_id
 63 |               << " in dep graph, however, not loaded yet" << std::endl;
 64 |     throw(e);
 65 |   }
 66 | }
 67 | 
 68 | void ETFeeder::freeChildrenNodes(uint64_t node_id) {
 69 |   shared_ptr<ETFeederNode> node = dep_graph_[node_id];
 70 |   for (auto child : node->getChildren()) {
 71 |     auto child_chakra = child->getChakraNode();
 72 |     for (auto it = child_chakra->mutable_data_deps()->begin();
 73 |          it != child_chakra->mutable_data_deps()->end();
 74 |          ++it) {
 75 |       if (*it == node_id) {
 76 |         child_chakra->mutable_data_deps()->erase(it);
 77 |         break;
 78 |       }
 79 |     }
 80 |     if (child_chakra->data_deps().size() == 0) {
 81 |       dep_free_node_id_set_.emplace(child_chakra->id());
 82 |       dep_free_node_queue_.emplace(child);
 83 |     }
 84 |   }
 85 | }
 86 | 
 87 | void ETFeeder::readGlobalMetadata() {
 88 |   if (!trace_.is_open()) {
 89 |     throw runtime_error(
 90 |         "Trace file closed unexpectedly during reading global metadata.");
 91 |   }
 92 |   shared_ptr<ChakraProtoMsg::GlobalMetadata> pkt_msg =
 93 |       make_shared<ChakraProtoMsg::GlobalMetadata>();
 94 |   trace_.read(*pkt_msg);
 95 | }
 96 | 
 97 | shared_ptr<ETFeederNode> ETFeeder::readNode() {
 98 |   shared_ptr<ChakraProtoMsg::Node> pkt_msg =
 99 |       make_shared<ChakraProtoMsg::Node>();
100 |   if (!trace_.read(*pkt_msg)) {
101 |     return nullptr;
102 |   }
103 |   shared_ptr<ETFeederNode> node = make_shared<ETFeederNode>(pkt_msg);
104 | 
105 |   bool dep_unresolved = false;
106 |   for (int i = 0; i < pkt_msg->data_deps_size(); ++i) {
107 |     auto parent_node = dep_graph_.find(pkt_msg->data_deps(i));
108 |     if (parent_node != dep_graph_.end()) {
109 |       parent_node->second->addChild(node);
110 |     } else {
111 |       dep_unresolved = true;
112 |       node->addDepUnresolvedParentID(pkt_msg->data_deps(i));
113 |     }
114 |   }
115 | 
116 |   if (dep_unresolved) {
117 |     dep_unresolved_node_set_.emplace(node);
118 |   }
119 | 
120 |   return node;
121 | }
122 | 
123 | void ETFeeder::resolveDep() {
124 |   for (auto it = dep_unresolved_node_set_.begin();
125 |        it != dep_unresolved_node_set_.end();) {
126 |     shared_ptr<ETFeederNode> node = *it;
127 |     vector<uint64_t> dep_unresolved_parent_ids =
128 |         node->getDepUnresolvedParentIDs();
129 |     for (auto inner_it = dep_unresolved_parent_ids.begin();
130 |          inner_it != dep_unresolved_parent_ids.end();) {
131 |       auto parent_node = dep_graph_.find(*inner_it);
132 |       if (parent_node != dep_graph_.end()) {
133 |         parent_node->second->addChild(node);
134 |         inner_it = dep_unresolved_parent_ids.erase(inner_it);
135 |       } else {
136 |         ++inner_it;
137 |       }
138 |     }
139 |     if (dep_unresolved_parent_ids.size() == 0) {
140 |       it = dep_unresolved_node_set_.erase(it);
141 |     } else {
142 |       node->setDepUnresolvedParentIDs(dep_unresolved_parent_ids);
143 |       ++it;
144 |     }
145 |   }
146 | }
147 | 
148 | void ETFeeder::readNextWindow() {
149 |   if (!trace_.is_open()) {
150 |     throw runtime_error(
151 |         "Trace file closed unexpectedly during reading next window.");
152 |   }
153 |   uint32_t num_read = 0;
154 |   do {
155 |     shared_ptr<ETFeederNode> new_node = readNode();
156 |     if (new_node == nullptr) {
157 |       et_complete_ = true;
158 |       break;
159 |     }
160 | 
161 |     addNode(new_node);
162 |     ++num_read;
163 | 
164 |     resolveDep();
165 |   } while ((num_read < window_size_) || (dep_unresolved_node_set_.size() != 0));
166 | 
167 |   for (auto node_id_node : dep_graph_) {
168 |     uint64_t node_id = node_id_node.first;
169 |     shared_ptr<ETFeederNode> node = node_id_node.second;
170 |     if ((dep_free_node_id_set_.count(node_id) == 0) &&
171 |         (node->getChakraNode()->data_deps().size() == 0)) {
172 |       dep_free_node_id_set_.emplace(node_id);
173 |       dep_free_node_queue_.emplace(node);
174 |     }
175 |   }
176 | }


--------------------------------------------------------------------------------
/tests/converter/test_pytorch_node.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | import tarfile
  3 | from pathlib import Path
  4 | from typing import Any, Dict
  5 | 
  6 | import pytest
  7 | from chakra.src.converter.pytorch_node import PyTorchNode
  8 | 
  9 | 
 10 | @pytest.fixture
 11 | def extract_tar_gz_file(tmp_path: Path) -> Path:
 12 |     """
 13 |     Fixture to extract a tar.gz file to a temporary directory.
 14 | 
 15 |     Args:
 16 |         tmp_path (Path): Temporary directory path provided by pytest.
 17 | 
 18 |     Returns:
 19 |         Path: Path to the extracted directory.
 20 |     """
 21 |     tar_gz_file = Path("tests/data/1.0.2-chakra.0.0.4.tgz")
 22 |     extracted_dir = tmp_path / "extracted"
 23 |     extracted_dir.mkdir()
 24 | 
 25 |     with tarfile.open(tar_gz_file, "r:gz") as tar:
 26 |         tar.extractall(path=extracted_dir)
 27 | 
 28 |     return extracted_dir
 29 | 
 30 | 
 31 | def load_pytorch_execution_traces(file_path: str) -> Dict[str, Any]:
 32 |     """
 33 |     Loads PyTorch execution traces from a file.
 34 | 
 35 |     Args:
 36 |         file_path (str): Path to the PyTorch execution trace file.
 37 | 
 38 |     Returns:
 39 |         Dict[str, Any]: Parsed PyTorch execution trace data.
 40 |     """
 41 |     with open(file_path, "r") as pytorch_et:
 42 |         return json.load(pytorch_et)
 43 | 
 44 | 
 45 | def test_pytorch_node_parsing(extract_tar_gz_file: Path) -> None:
 46 |     """
 47 |     Test to check if PyTorchNode can parse nodes properly from the extracted data.
 48 | 
 49 |     Args:
 50 |         extract_tar_gz_file (Path): Path to the extracted directory containing
 51 |             the PyTorch execution trace file.
 52 |     """
 53 |     pytorch_trace_file = extract_tar_gz_file / "1.0.2-chakra.0.0.4.json"
 54 |     pytorch_et_data = load_pytorch_execution_traces(str(pytorch_trace_file))
 55 | 
 56 |     pytorch_schema = pytorch_et_data["schema"]
 57 |     pytorch_nodes = pytorch_et_data["nodes"]
 58 | 
 59 |     for node_data in pytorch_nodes:
 60 |         node = PyTorchNode(pytorch_schema, node_data)
 61 |         assert node is not None  # Check if node is instantiated properly
 62 | 
 63 | 
 64 | @pytest.fixture
 65 | def sample_node_data_1_0_2_chakra_0_0_4() -> Dict:
 66 |     return {
 67 |         "id": 1,
 68 |         "name": "node1",
 69 |         "ctrl_deps": None,
 70 |         "inputs": {"values": "values", "shapes": "shapes", "types": "types"},
 71 |         "outputs": {"values": "values", "shapes": "shapes", "types": "types"},
 72 |         "attrs": [
 73 |             {"name": "rf_id", "type": "uint64", "value": 0},
 74 |             {"name": "fw_parent", "type": "uint64", "value": 0},
 75 |             {"name": "seq_id", "type": "int64", "value": -1},
 76 |             {"name": "scope", "type": "uint64", "value": 7},
 77 |             {"name": "tid", "type": "uint64", "value": 1},
 78 |             {"name": "fw_tid", "type": "uint64", "value": 0},
 79 |             {"name": "op_schema", "type": "string", "value": ""},
 80 |         ],
 81 |         "exclusive_dur": 50,
 82 |     }
 83 | 
 84 | 
 85 | @pytest.fixture
 86 | def sample_node_data_1_0_3_chakra_0_0_4() -> Dict:
 87 |     return {
 88 |         "id": 2,
 89 |         "name": "node2",
 90 |         "ctrl_deps": 1,
 91 |         "inputs": {"values": [], "shapes": [], "types": []},
 92 |         "outputs": {"values": [], "shapes": [], "types": []},
 93 |         "attrs": [
 94 |             {"name": "rf_id", "type": "uint64", "value": 2},
 95 |             {"name": "fw_parent", "type": "uint64", "value": 0},
 96 |             {"name": "seq_id", "type": "int64", "value": -1},
 97 |             {"name": "scope", "type": "uint64", "value": 7},
 98 |             {"name": "tid", "type": "uint64", "value": 1},
 99 |             {"name": "fw_tid", "type": "uint64", "value": 0},
100 |             {"name": "op_schema", "type": "string", "value": ""},
101 |         ],
102 |         "exclusive_dur": 30,
103 |     }
104 | 
105 | 
106 | @pytest.fixture
107 | def sample_node_data_unsupported_schema() -> Dict:
108 |     return {
109 |         "id": 4,
110 |         "name": "## process_group:init ##",
111 |         "ctrl_deps": 3,
112 |         "inputs": {
113 |             "values": [],
114 |             "shapes": [[]],
115 |             "types": ["String"],
116 |         },
117 |         "outputs": {"values": [], "shapes": [], "types": []},
118 |         "attrs": [
119 |             {"name": "rf_id", "type": "uint64", "value": 2},
120 |             {"name": "fw_parent", "type": "uint64", "value": 0},
121 |             {"name": "seq_id", "type": "int64", "value": -1},
122 |             {"name": "scope", "type": "uint64", "value": 7},
123 |             {"name": "tid", "type": "uint64", "value": 1},
124 |             {"name": "fw_tid", "type": "uint64", "value": 0},
125 |             {"name": "op_schema", "type": "string", "value": ""},
126 |         ],
127 |         "exclusive_dur": 40,
128 |     }
129 | 
130 | 
131 | def test_pytorch_node_parsing_1_0_2_chakra_0_0_4(sample_node_data_1_0_2_chakra_0_0_4) -> None:
132 |     schema = "1.0.2-chakra.0.0.4"
133 |     node = PyTorchNode(schema, sample_node_data_1_0_2_chakra_0_0_4)
134 |     assert node is not None
135 |     assert node.schema == schema
136 |     assert isinstance(node.id, int)
137 |     assert isinstance(node.name, str)
138 |     assert node.exclusive_dur == 50
139 | 
140 | 
141 | def test_pytorch_node_parsing_1_0_3_chakra_0_0_4(sample_node_data_1_0_3_chakra_0_0_4) -> None:
142 |     schema = "1.0.3-chakra.0.0.4"
143 |     node = PyTorchNode(schema, sample_node_data_1_0_3_chakra_0_0_4)
144 |     assert node is not None
145 |     assert node.schema == schema
146 |     assert isinstance(node.id, int)
147 |     assert isinstance(node.name, str)
148 |     assert node.exclusive_dur == 30
149 | 
150 | 
151 | def test_pytorch_node_unsupported_schema(sample_node_data_unsupported_schema) -> None:
152 |     schema = "9999.9999.9999-chakra.0.0.4"
153 |     with pytest.raises(ValueError, match=f"Unsupported schema version '{schema}'"):
154 |         PyTorchNode(schema, sample_node_data_unsupported_schema)
155 | 


--------------------------------------------------------------------------------
/USER_GUIDE.md:
--------------------------------------------------------------------------------
  1 | # Chakra User Guide
  2 | ## Installation
  3 | ### Step 1: Set up a Virtual Environment
  4 | It's advisable to create a virtual environment using Python 3.10.2.
  5 | 
  6 | ```bash
  7 | # Create a virtual environment
  8 | $ python3 -m venv chakra_env
  9 | 
 10 | # Activate the virtual environment
 11 | $ source chakra_env/bin/activate
 12 | ```
 13 | 
 14 | ### Step 2: Install Chakra
 15 | With the virtual environment activated, install the Chakra package using pip.
 16 | 
 17 | ```bash
 18 | # Install package from source
 19 | $ pip install .
 20 | 
 21 | # Install latest from GitHub
 22 | $ pip install https://github.com/mlcommons/chakra/archive/refs/heads/main.zip
 23 | 
 24 | # Install specific revision from GitHub
 25 | $ pip install https://github.com/mlcommons/chakra/archive/ae7c671db702eb1384015bb2618dc753eed787f2.zip
 26 | ```
 27 | 
 28 | ### Step 3: Install PARAM
 29 | Installing PARAM is necessary for Chakra to function properly as it imports essential components from it.
 30 | 
 31 | ```bash
 32 | $ git clone git@github.com:facebookresearch/param.git
 33 | $ cd param/et_replay
 34 | $ git checkout 7b19f586dd8b267333114992833a0d7e0d601630
 35 | $ pip install .
 36 | ```
 37 | 
 38 | ### Step 4: Install Holistic Trace Analysis
 39 | Installing Holistic Trace Analysis is necessary for Trace link.
 40 | 
 41 | ```bash
 42 | $ git clone https://github.com/facebookresearch/HolisticTraceAnalysis.git
 43 | $ cd HolisticTraceAnalysis
 44 | $ git checkout d731cc2e2249976c97129d409a83bd53d93051f6
 45 | $ git submodule update --init
 46 | $ pip install -r requirements.txt
 47 | $ pip install -e .
 48 | ```
 49 | 
 50 | ### Step 5: Uninstalling Chakra
 51 | To uninstall Chakra, use the following command within the virtual environment.
 52 | 
 53 | ```bash
 54 | $ pip uninstall chakra
 55 | ```
 56 | 
 57 | ## Tools Overview
 58 | ### Execution Trace Link (chakra_trace_link)
 59 | Merge Chakra host execution trace and Chakra device execution trace to encode GPU operators into the output execution trace.
 60 | ```bash
 61 | $ chakra_trace_link \
 62 |     --chakra-host-trace /path/to/chakra_host_trace \
 63 |     --chakra-device-trace /path/to/chakra_device_trace \
 64 |     --rank [RANK] \
 65 |     --output-file /path/to/chakra_host_device_trace.json
 66 | ```
 67 | 
 68 | ### Execution Trace Converter (chakra_converter)
 69 | Converts the execution traces from `chakra_trace_link` into traces in the protobuf format. It is responsible for identifying and encoding dependencies for simulation as well. The converter is designed for any downstream simulators that take Chakra execution traces in the protobuf format. It takes an input file in another format and generates a Chakra execution trace output in the protobuf format.
 70 | ```bash
 71 | $ chakra_converter PyTorch \
 72 |     --input /path/to/chakra_host_device_trace.json \
 73 |     --output /path/to/chakra_trace \
 74 |     [--simulate] \
 75 | ```
 76 | * --input: Path to the input file containing the merged Chakra host and device traces in JSON format.
 77 | * --output: Path to the output file where the converted Chakra trace will be saved in protobuf format.
 78 | * --simulate: (Optional) Enable simulation of operators after the conversion for validation and debugging purposes. This option allows simulation of traces without running them through a simulator. Users can validate the converter or simulator against actual measured values using tools like chrome://tracing or https://perfetto.dev/. Read the duration of the timeline and compare the total execution time against the final simulation time of a trace. Disabled by default because it takes a long time.
 79 | 
 80 | ### Execution Trace Feeder (et_feeder)
 81 | The Execution Trace Feeder (et_feeder) is a C++ library designed to feed Chakra traces into any compatible C++ simulator. This library specifically provides dependency-free nodes to a simulator, which must import the feeder as a library. Currently, ASTRA-sim is the only simulator that supports this trace feeder. Below are the commands to run execution traces on ASTRA-sim:
 82 | ```bash
 83 | $ git clone --recurse-submodules git@github.com:astra-sim/astra-sim.git
 84 | $ cd astra-sim
 85 | $ git checkout Chakra
 86 | $ git submodule update --init --recursive
 87 | $ cd extern/graph_frontend/chakra/
 88 | $ git checkout main
 89 | $ cd -
 90 | $ ./build/astra_analytical/build.sh -c
 91 | 
 92 | $ cd extern/graph_frontend/chakra/
 93 | $ python -m chakra.et_generator.et_generator\
 94 |     --num_npus <num_npus>
 95 | 
 96 | $ cd -
 97 | $ ./run.sh
 98 | ```
 99 | 
100 | ### Execution Trace Visualizer (chakra_visualizer)
101 | This tool visualizes execution traces in various formats. Here is an example command:
102 | 
103 | ```bash
104 | $ chakra_visualizer \
105 |     --input_filename /path/to/chakra_et
106 |     --output_filename /path/to/output.[graphml|pdf|dot]
107 | ```
108 | 
109 | ### Execution Trace Jsonizer (chakra_jsonizer)
110 | Provides a readable JSON format of execution traces:
111 | 
112 | ```bash
113 | $ chakra_jsonizer \
114 |     --input_filename /path/to/chakra_et \
115 |     --output_filename /path/to/output_json
116 | ```
117 | 
118 | ### Timeline Visualizer (chakra_timeline_visualizer)
119 | Visualizes the execution timeline of traces. This tool serves as a reference implementation for visualizing the simulation of Chakra traces. After simulating Chakra traces, you can visualize the timeline of operator executions. Update the simulator to present when operators are issued and completed. Below is the format needed:
120 | ```csv
121 | issue,<dummy_str>=npu_id,<dummy_str>=curr_cycle,<dummy_str>=node_id,<dummy_str>=node_name
122 | callback,<dummy_str>=npu_id,<dummy_str>=curr_cycle,<dummy_str>=node_id,<dummy_str>=node_name
123 | ...
124 | ```
125 | 
126 | You can visualize the timeline with the command below.
127 | ```bash
128 | $ chakra_timeline_visualizer \
129 |     --input_filename /path/to/input.csv \
130 |     --output_filename /path/to/output.json \
131 |     --num_npus 4 \
132 |     --npu_frequency 1.5GHz
133 | ```
134 | 
135 | When you open the output file with `chrome://tracing`, you will see an execution timeline like the one below.
136 | ![](doc/timeline_visualizer.png)
137 | 


--------------------------------------------------------------------------------
/src/feeder/et_feeder_node.cpp:
--------------------------------------------------------------------------------
  1 | #include "et_feeder_node.h"
  2 | 
  3 | using namespace std;
  4 | using namespace Chakra;
  5 | 
  6 | ETFeederNode::ETFeederNode(std::shared_ptr<ChakraProtoMsg::Node> node) {
  7 |   this->node_ = node;
  8 |   this->id_ = node->id();
  9 |   this->name_ = node->name();
 10 |   this->runtime_ = node->duration_micros();
 11 |   this->is_cpu_op_ = 0;
 12 |   this->num_ops_ = 0;
 13 | 
 14 |   if (node->has_inputs()) {
 15 |     this->inputs_values_ = static_cast<string>(node->inputs().values());
 16 |     this->inputs_shapes_ = static_cast<string>(node->inputs().shapes());
 17 |     this->inputs_types_ = static_cast<string>(node->inputs().types());
 18 |   }
 19 | 
 20 |   if (node->has_outputs()) {
 21 |     this->outputs_values_ = static_cast<string>(node->outputs().values());
 22 |     this->outputs_shapes_ = static_cast<string>(node->outputs().shapes());
 23 |     this->outputs_types_ = static_cast<string>(node->outputs().types());
 24 |   }
 25 | 
 26 |   for (const auto& attr : node->attr()) {
 27 |     const string& attr_name = attr.name();
 28 | 
 29 |     if (attr_name == "is_cpu_op") {
 30 |       this->is_cpu_op_ = static_cast<bool>(attr.bool_val());
 31 |     } else if (attr_name == "num_ops") {
 32 |       this->num_ops_ = static_cast<uint64_t>(attr.int64_val());
 33 |     } else if (attr_name == "tensor_size") {
 34 |       this->tensor_size_ = attr.uint64_val();
 35 |     } else if (attr_name == "comm_type") {
 36 |       this->comm_type_ =
 37 |           static_cast<ChakraProtoMsg::CollectiveCommType>(attr.int64_val());
 38 |     } else if (attr_name == "comm_priority") {
 39 |       this->comm_priority_ = static_cast<uint32_t>(attr.int32_val());
 40 |     } else if (attr_name == "comm_size") {
 41 |       this->comm_size_ = static_cast<uint64_t>(attr.int64_val());
 42 |     } else if (attr_name == "comm_src") {
 43 |       this->comm_src_ = static_cast<uint32_t>(attr.int32_val());
 44 |     } else if (attr_name == "comm_dst") {
 45 |       this->comm_dst_ = static_cast<uint32_t>(attr.int32_val());
 46 |     } else if (attr_name == "comm_tag") {
 47 |       this->comm_tag_ = static_cast<uint32_t>(attr.int32_val());
 48 |     } else if (attr_name == "pg_name") {
 49 |       this->pg_name_ = static_cast<string>(attr.string_val());
 50 |     } else {
 51 |       this->other_attrs_.emplace(attr_name, attr);
 52 |     }
 53 |   }
 54 | }
 55 | 
 56 | shared_ptr<ChakraProtoMsg::Node> ETFeederNode::getChakraNode() {
 57 |   return node_;
 58 | }
 59 | 
 60 | void ETFeederNode::addChild(shared_ptr<ETFeederNode> node) {
 61 |   // Avoid adding the same child node multiple times
 62 |   // addChild is called multiple times to resolve dependencies
 63 |   if (children_set_.find(node) != children_set_.end()) {
 64 |     return;
 65 |   }
 66 |   children_vec_.emplace_back(node);
 67 |   children_set_.emplace(node);
 68 | }
 69 | 
 70 | vector<shared_ptr<ETFeederNode>> ETFeederNode::getChildren() {
 71 |   return children_vec_;
 72 | }
 73 | 
 74 | void ETFeederNode::addDepUnresolvedParentID(uint64_t node_id) {
 75 |   dep_unresolved_parent_ids_.emplace_back(node_id);
 76 | }
 77 | 
 78 | vector<uint64_t> ETFeederNode::getDepUnresolvedParentIDs() {
 79 |   return dep_unresolved_parent_ids_;
 80 | }
 81 | 
 82 | void ETFeederNode::setDepUnresolvedParentIDs(
 83 |     vector<uint64_t> const& dep_unresolved_parent_ids) {
 84 |   dep_unresolved_parent_ids_ = dep_unresolved_parent_ids;
 85 | }
 86 | 
 87 | const ChakraProtoMsg::AttributeProto& ETFeederNode::get_other_attr(
 88 |     const string& attr_name) const {
 89 |   if (this->has_other_attr(attr_name))
 90 |     return this->other_attrs_.at(attr_name);
 91 |   throw std::runtime_error(
 92 |       "Asked for attr \"" + attr_name + "\" from node " +
 93 |       std::to_string(this->id_) + ", which do not exist");
 94 | }
 95 | 
 96 | bool ETFeederNode::has_other_attr(const string& attr_name) const {
 97 |   const auto& item = this->other_attrs_.find(attr_name);
 98 |   return item != this->other_attrs_.end();
 99 | }
100 | 
101 | uint64_t ETFeederNode::id() {
102 |   return id_;
103 | }
104 | 
105 | string ETFeederNode::name() {
106 |   return name_;
107 | }
108 | 
109 | bool ETFeederNode::is_cpu_op() {
110 |   return is_cpu_op_;
111 | }
112 | 
113 | ChakraProtoMsg::NodeType ETFeederNode::type() {
114 |   return node_->type();
115 | }
116 | 
117 | uint64_t ETFeederNode::runtime() {
118 |   return runtime_;
119 | }
120 | 
121 | uint64_t ETFeederNode::num_ops() {
122 |   return num_ops_;
123 | }
124 | 
125 | uint32_t ETFeederNode::tensor_loc() {
126 |   return tensor_loc_;
127 | }
128 | 
129 | uint64_t ETFeederNode::tensor_size() {
130 |   return tensor_size_;
131 | }
132 | 
133 | ChakraProtoMsg::CollectiveCommType ETFeederNode::comm_type() {
134 |   return comm_type_;
135 | }
136 | 
137 | uint32_t ETFeederNode::comm_priority() {
138 |   return comm_priority_;
139 | }
140 | 
141 | uint64_t ETFeederNode::comm_size() {
142 |   return comm_size_;
143 | }
144 | 
145 | uint32_t ETFeederNode::comm_src() {
146 |   return comm_src_;
147 | }
148 | 
149 | uint32_t ETFeederNode::comm_dst() {
150 |   return comm_dst_;
151 | }
152 | 
153 | uint32_t ETFeederNode::comm_tag() {
154 |   return comm_tag_;
155 | }
156 | 
157 | string ETFeederNode::pg_name() {
158 |   return pg_name_;
159 | }
160 | 
161 | string ETFeederNode::get_inputs_values() const {
162 |   if (node_->has_inputs()) {
163 |     return inputs_values_;
164 |   }
165 |   return "";
166 | }
167 | 
168 | string ETFeederNode::get_inputs_shapes() const {
169 |   if (node_->has_inputs()) {
170 |     return inputs_shapes_;
171 |   }
172 |   return "";
173 | }
174 | 
175 | string ETFeederNode::get_inputs_types() const {
176 |   if (node_->has_inputs()) {
177 |     return inputs_types_;
178 |   }
179 |   return "";
180 | }
181 | 
182 | string ETFeederNode::get_outputs_values() const {
183 |   if (node_->has_outputs()) {
184 |     return outputs_values_;
185 |   }
186 |   return "";
187 | }
188 | 
189 | string ETFeederNode::get_outputs_shapes() const {
190 |   if (node_->has_outputs()) {
191 |     return outputs_shapes_;
192 |   }
193 |   return "";
194 | }
195 | 
196 | string ETFeederNode::get_outputs_types() const {
197 |   if (node_->has_outputs()) {
198 |     return outputs_types_;
199 |   }
200 |   return "";
201 | }
202 | 


--------------------------------------------------------------------------------
/src/timeline_visualizer/timeline_visualizer.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import json
  3 | import logging
  4 | import sys
  5 | from enum import IntEnum
  6 | from logging import FileHandler
  7 | from typing import Any, Dict, List, Tuple
  8 | 
  9 | 
 10 | class TID(IntEnum):
 11 |     """
 12 |     Enum representing the types of TID (Thread ID) used for classifying different nodes in a trace.
 13 | 
 14 |     Attributes
 15 |         LOCAL_MEMORY (int): Represents local memory nodes.
 16 |         REMOTE_MEMORY (int): Represents remote memory nodes.
 17 |         COMP (int): Represents compute nodes.
 18 |         COMM (int): Represents communication nodes.
 19 |     """
 20 | 
 21 |     LOCAL_MEMORY = 1
 22 |     REMOTE_MEMORY = 2
 23 |     COMP = 3
 24 |     COMM = 4
 25 | 
 26 | 
 27 | def get_logger(log_filename: str) -> logging.Logger:
 28 |     formatter = logging.Formatter("%(levelname)s [%(asctime)s] %(message)s", datefmt="%m/%d/%Y %I:%M:%S %p")
 29 | 
 30 |     file_handler = FileHandler(log_filename, mode="w")
 31 |     file_handler.setLevel(logging.DEBUG)
 32 |     file_handler.setFormatter(formatter)
 33 | 
 34 |     stream_handler = logging.StreamHandler()
 35 |     stream_handler.setLevel(logging.WARNING)
 36 |     stream_handler.setFormatter(formatter)
 37 | 
 38 |     logger = logging.getLogger(__file__)
 39 |     logger.setLevel(logging.DEBUG)
 40 |     logger.addHandler(file_handler)
 41 |     logger.addHandler(stream_handler)
 42 | 
 43 |     return logger
 44 | 
 45 | 
 46 | def is_local_mem_node(node_name: str) -> bool:
 47 |     return (
 48 |         ("MEM_LOAD_NODE" in node_name)
 49 |         and ("LOCAL_MEMORY" in node_name)
 50 |         or ("MEM_STORE_NODE" in node_name)
 51 |         and ("LOCAL_MEMORY" in node_name)
 52 |     )
 53 | 
 54 | 
 55 | def is_remote_mem_node(node_name: str) -> bool:
 56 |     return (
 57 |         ("MEM_LOAD_NODE" in node_name)
 58 |         and ("REMOTE_MEMORY" in node_name)
 59 |         or ("MEM_STORE_NODE" in node_name)
 60 |         and ("REMOTE_MEMORY" in node_name)
 61 |     )
 62 | 
 63 | 
 64 | def is_comp_node(node_name: str) -> bool:
 65 |     return "COMP_NODE" in node_name
 66 | 
 67 | 
 68 | def is_comm_node(node_name: str) -> bool:
 69 |     return ("COMM_SEND_NODE" in node_name) or ("COMM_RECV_NODE" in node_name) or ("COMM_COLL_NODE" in node_name)
 70 | 
 71 | 
 72 | def get_tid(node_name: str) -> TID:
 73 |     if is_local_mem_node(node_name):
 74 |         return TID.LOCAL_MEMORY
 75 |     elif is_remote_mem_node(node_name):
 76 |         return TID.REMOTE_MEMORY
 77 |     elif is_comp_node(node_name):
 78 |         return TID.COMP
 79 |     elif is_comm_node(node_name):
 80 |         return TID.COMM
 81 |     else:
 82 |         raise ValueError(f"Node type cannot be identified from {node_name}")
 83 | 
 84 | 
 85 | def parse_event(line: str) -> Tuple[str, int, int, int, str]:
 86 |     try:
 87 |         cols = line.strip().split(",")
 88 |         trace_type = cols[0]
 89 |         npu_id = int(cols[1].split("=")[1])
 90 |         curr_cycle = int(cols[2].split("=")[1])
 91 |         node_id = int(cols[3].split("=")[1])
 92 |         node_name = cols[4].split("=")[1]
 93 |         return (trace_type, npu_id, curr_cycle, node_id, node_name)
 94 |     except Exception as e:
 95 |         raise ValueError(f'Cannot parse the following event -- "{line}": {e}') from e
 96 | 
 97 | 
 98 | def get_trace_events(input_filename: str, num_npus: int, npu_frequency: int) -> List[Dict[str, Any]]:
 99 |     trace_dict = {i: {} for i in range(num_npus)}
100 |     trace_events = []
101 | 
102 |     with open(input_filename, "r") as f:
103 |         for line in f:
104 |             if ("issue" in line) or ("callback" in line):
105 |                 (trace_type, npu_id, curr_cycle, node_id, node_name) = parse_event(line)
106 | 
107 |                 if trace_type == "issue":
108 |                     trace_dict[npu_id].update({node_id: [node_name, curr_cycle]})
109 |                 elif trace_type == "callback":
110 |                     node_name = trace_dict[npu_id][node_id][0]
111 |                     tid = get_tid(node_name)
112 |                     issued_cycle = trace_dict[npu_id][node_id][1]
113 |                     issued_ms = (issued_cycle / npu_frequency) / 1_000
114 |                     duration_in_cycles = curr_cycle - issued_cycle
115 |                     duration_in_ms = duration_in_cycles / (npu_frequency * 1_000)
116 | 
117 |                     trace_events.append(
118 |                         {
119 |                             "pid": npu_id,
120 |                             "tid": tid,
121 |                             "ts": issued_ms,
122 |                             "dur": duration_in_ms,
123 |                             "ph": "X",
124 |                             "name": node_name,
125 |                             "args": {"ms": duration_in_ms},
126 |                         }
127 |                     )
128 | 
129 |                     del trace_dict[npu_id][node_id]
130 |                 else:
131 |                     raise ValueError(f"Unsupported trace_type, {trace_type}")
132 | 
133 |     return trace_events
134 | 
135 | 
136 | def write_trace_events(output_filename: str, num_npus: int, trace_events: List[Dict[str, Any]]) -> None:
137 |     output_dict = {"meta_user": "aras", "traceEvents": trace_events, "meta_cpu_count": num_npus}
138 |     with open(output_filename, "w") as f:
139 |         json.dump(output_dict, f)
140 | 
141 | 
142 | def main() -> None:
143 |     parser = argparse.ArgumentParser(description="Timeline Visualizer")
144 |     parser.add_argument("--input_filename", type=str, default=None, required=True, help="Input timeline filename")
145 |     parser.add_argument("--output_filename", type=str, default=None, required=True, help="Output trace filename")
146 |     parser.add_argument("--num_npus", type=int, default=None, required=True, help="Number of NPUs in a system")
147 |     parser.add_argument("--npu_frequency", type=int, default=None, required=True, help="NPU frequency in MHz")
148 |     parser.add_argument("--log_filename", type=str, default="debug.log", help="Log filename")
149 |     args = parser.parse_args()
150 | 
151 |     logger = get_logger(args.log_filename)
152 |     logger.debug(" ".join(sys.argv))
153 | 
154 |     try:
155 |         trace_events = get_trace_events(args.input_filename, args.num_npus, args.npu_frequency)
156 |         write_trace_events(args.output_filename, args.num_npus, trace_events)
157 |     except Exception as e:
158 |         logger.error(str(e))
159 |         sys.exit(1)
160 | 
161 | 
162 | if __name__ == "__main__":
163 |     main()
164 | 


--------------------------------------------------------------------------------
/src/third_party/utils/protoio.hh:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright (c) 2012 ARM Limited
  3 |  * All rights reserved
  4 |  *
  5 |  * The license below extends only to copyright in the software and shall
  6 |  * not be construed as granting a license to any other intellectual
  7 |  * property including but not limited to intellectual property relating
  8 |  * to a hardware implementation of the functionality of the software
  9 |  * licensed hereunder.  You may use the software subject to the license
 10 |  * terms below provided that you ensure that this notice is replicated
 11 |  * unmodified and in its entirety in all distributions of the software,
 12 |  * modified or unmodified, in source code or in binary form.
 13 |  *
 14 |  * Redistribution and use in source and binary forms, with or without
 15 |  * modification, are permitted provided that the following conditions are
 16 |  * met: redistributions of source code must retain the above copyright
 17 |  * notice, this list of conditions and the following disclaimer;
 18 |  * redistributions in binary form must reproduce the above copyright
 19 |  * notice, this list of conditions and the following disclaimer in the
 20 |  * documentation and/or other materials provided with the distribution;
 21 |  * neither the name of the copyright holders nor the names of its
 22 |  * contributors may be used to endorse or promote products derived from
 23 |  * this software without specific prior written permission.
 24 |  *
 25 |  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 26 |  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 27 |  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 28 |  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 29 |  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 30 |  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 31 |  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 32 |  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 33 |  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 34 |  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 35 |  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 36 |  */
 37 | 
 38 | /**
 39 |  * @file
 40 |  * Declaration of a wrapper for protobuf output streams and input streams.
 41 |  */
 42 | 
 43 | #ifndef __PROTO_PROTOIO_HH__
 44 | #define __PROTO_PROTOIO_HH__
 45 | 
 46 | #include <google/protobuf/io/coded_stream.h>
 47 | #include <google/protobuf/io/gzip_stream.h>
 48 | #include <google/protobuf/io/zero_copy_stream_impl.h>
 49 | #include <google/protobuf/message.h>
 50 | 
 51 | #include <fstream>
 52 | 
 53 | /**
 54 |  * A ProtoStream provides the shared functionality of the input and
 55 |  * output streams. At the moment this is limited to magic number.
 56 |  */
 57 | class ProtoStream {
 58 |  protected:
 59 |   /// Use the ASCII characters gem5 as our magic number
 60 |   static const uint32_t magicNumber = 0x356d6567;
 61 | 
 62 |   /**
 63 |    * Create a ProtoStream.
 64 |    */
 65 |   ProtoStream() {}
 66 | 
 67 |  private:
 68 |   /**
 69 |    * Hide the copy constructor and assignment operator.
 70 |    * @{
 71 |    */
 72 |   ProtoStream(const ProtoStream&);
 73 |   ProtoStream& operator=(const ProtoStream&);
 74 |   /** @} */
 75 | };
 76 | 
 77 | /**
 78 |  * A ProtoOutputStream wraps a coded stream, potentially with
 79 |  * compression, based on looking at the file name. Writing to the
 80 |  * stream is done to enable interaction with the file on a per-message
 81 |  * basis to avoid having to deal with huge data structures. The latter
 82 |  * is made possible by encoding the length of each message in the
 83 |  * stream.
 84 |  */
 85 | class ProtoOutputStream : public ProtoStream {
 86 |  public:
 87 |   /**
 88 |    * Create an output stream for a given file name. If the filename
 89 |    * ends with .gz then the file will be compressed accordinly.
 90 |    *
 91 |    * @param filename Path to the file to create or truncate
 92 |    */
 93 |   ProtoOutputStream(const std::string& filename);
 94 | 
 95 |   /**
 96 |    * Destruct the output stream, and also flush and close the
 97 |    * underlying file streams and coded streams.
 98 |    */
 99 |   ~ProtoOutputStream();
100 | 
101 |   /**
102 |    * Write a message to the stream, preprending it with the message
103 |    * size.
104 |    *
105 |    * @param msg Message to write to the stream
106 |    */
107 |   void write(const google::protobuf::Message& msg);
108 | 
109 |  private:
110 |   /// Underlying file output stream
111 |   std::ofstream fileStream;
112 | 
113 |   /// Zero Copy stream wrapping the STL output stream
114 |   google::protobuf::io::OstreamOutputStream* wrappedFileStream;
115 | 
116 |   /// Optional Gzip stream to wrap the Zero Copy stream
117 |   google::protobuf::io::GzipOutputStream* gzipStream;
118 | 
119 |   /// Top-level zero-copy stream, either with compression or not
120 |   google::protobuf::io::ZeroCopyOutputStream* zeroCopyStream;
121 | };
122 | 
123 | /**
124 |  * A ProtoInputStream wraps a coded stream, potentially with
125 |  * decompression, based on looking at the file name. Reading from the
126 |  * stream is done on a per-message basis to avoid having to deal with
127 |  * huge data structures. The latter assumes the length of each message
128 |  * is encoded in the stream when it is written.
129 |  */
130 | class ProtoInputStream : public ProtoStream {
131 |  public:
132 |   /**
133 |    * Create an input stream for a given file name. If the filename
134 |    * ends with .gz then the file will be decompressed accordingly.
135 |    *
136 |    * @param filename Path to the file to read from
137 |    */
138 |   ProtoInputStream(const std::string& filename);
139 | 
140 |   /**
141 |    * Destruct the input stream, and also close the underlying file
142 |    * streams and coded streams.
143 |    */
144 |   ~ProtoInputStream();
145 | 
146 |   bool is_open();
147 | 
148 |   /**
149 |    * Read a message from the stream.
150 |    *
151 |    * @param msg Message read from the stream
152 |    * @param return True if a message was read, false if reading fails
153 |    */
154 |   bool read(google::protobuf::Message& msg);
155 | 
156 |   /**
157 |    * Reset the input stream and seek to the beginning of the file.
158 |    */
159 |   void reset();
160 | 
161 |  private:
162 |   /**
163 |    * Create the internal streams that are wrapping the input file.
164 |    */
165 |   void createStreams();
166 | 
167 |   /**
168 |    * Destroy the internal streams that are wrapping the input file.
169 |    */
170 |   void destroyStreams();
171 | 
172 |   /// Underlying file input stream
173 |   std::ifstream fileStream;
174 | 
175 |   /// Hold on to the file name for debug messages
176 |   const std::string fileName;
177 | 
178 |   /// Boolean flag to remember whether we use gzip or not
179 |   bool useGzip;
180 | 
181 |   /// Zero Copy stream wrapping the STL input stream
182 |   google::protobuf::io::IstreamInputStream* wrappedFileStream;
183 | 
184 |   /// Optional Gzip stream to wrap the Zero Copy stream
185 |   google::protobuf::io::GzipInputStream* gzipStream;
186 | 
187 |   /// Top-level zero-copy stream, either with compression or not
188 |   google::protobuf::io::ZeroCopyInputStream* zeroCopyStream;
189 | };
190 | 
191 | #endif //__PROTO_PROTOIO_HH
192 | 


--------------------------------------------------------------------------------
/src/third_party/utils/protolib.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | 
  3 | # Copyright (c) 2013 ARM Limited
  4 | # All rights reserved
  5 | #
  6 | # The license below extends only to copyright in the software and shall
  7 | # not be construed as granting a license to any other intellectual
  8 | # property including but not limited to intellectual property relating
  9 | # to a hardware implementation of the functionality of the software
 10 | # licensed hereunder.  You may use the software subject to the license
 11 | # terms below provided that you ensure that this notice is replicated
 12 | # unmodified and in its entirety in all distributions of the software,
 13 | # modified or unmodified, in source code or in binary form.
 14 | #
 15 | # Redistribution and use in source and binary forms, with or without
 16 | # modification, are permitted provided that the following conditions are
 17 | # met: redistributions of source code must retain the above copyright
 18 | # notice, this list of conditions and the following disclaimer;
 19 | # redistributions in binary form must reproduce the above copyright
 20 | # notice, this list of conditions and the following disclaimer in the
 21 | # documentation and/or other materials provided with the distribution;
 22 | # neither the name of the copyright holders nor the names of its
 23 | # contributors may be used to endorse or promote products derived from
 24 | # this software without specific prior written permission.
 25 | #
 26 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 27 | # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 28 | # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 29 | # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 30 | # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 31 | # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 32 | # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 33 | # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 34 | # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 35 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 36 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 37 | #
 38 | # Copyright 2008 Google Inc.  All rights reserved.
 39 | # http://code.google.com/p/protobuf/
 40 | #
 41 | # Redistribution and use in source and binary forms, with or without
 42 | # modification, are permitted provided that the following conditions are
 43 | # met:
 44 | #
 45 | #     * Redistributions of source code must retain the above copyright
 46 | # notice, this list of conditions and the following disclaimer.
 47 | #     * Redistributions in binary form must reproduce the above
 48 | # copyright notice, this list of conditions and the following disclaimer
 49 | # in the documentation and/or other materials provided with the
 50 | # distribution.
 51 | #     * Neither the name of Google Inc. nor the names of its
 52 | # contributors may be used to endorse or promote products derived from
 53 | # this software without specific prior written permission.
 54 | #
 55 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 56 | # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 57 | # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 58 | # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 59 | # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 60 | # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 61 | # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 62 | # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 63 | # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 64 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 65 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 66 | 
 67 | # This file is a library of commonly used functions used when interfacing
 68 | # with protobuf python messages. For eg, the decode scripts for different
 69 | # types of proto objects can use the same function to decode a single message
 70 | 
 71 | import gzip
 72 | import struct
 73 | 
 74 | 
 75 | def openFileRd(in_file):
 76 |     """
 77 |     This opens the file passed as argument for reading using an appropriate
 78 |     function depending on if it is gzipped or not. It returns the file
 79 |     handle.
 80 |     """
 81 |     try:
 82 |         # First see if this file is gzipped
 83 |         try:
 84 |             # Opening the file works even if it is not a gzip file
 85 |             proto_in = gzip.open(in_file, "rb")
 86 | 
 87 |             # Force a check of the magic number by seeking in the
 88 |             # file. If we do not do it here the error will occur when
 89 |             # reading the first message.
 90 |             proto_in.seek(1)
 91 |             proto_in.seek(0)
 92 |         except IOError:
 93 |             proto_in = open(in_file, "rb")
 94 |     except IOError:
 95 |         print("Failed to open ", in_file, " for reading")
 96 |         exit(-1)
 97 |     return proto_in
 98 | 
 99 | 
100 | def _DecodeVarint32(in_file):
101 |     """
102 |     The decoding of the Varint32 is copied from
103 |     google.protobuf.internal.decoder and is only repeated here to
104 |     avoid depending on the internal functions in the library. If the
105 |     end of file is reached, return (0, 0).
106 |     """
107 |     result = 0
108 |     shift = 0
109 |     pos = 0
110 |     # Use a 32-bit mask
111 |     mask = 0xFFFFFFFF
112 |     while 1:
113 |         c = in_file.read(1)
114 |         if len(c) == 0:
115 |             return (0, 0)
116 |         b = struct.unpack("<B", c)[0]
117 |         result |= (b & 0x7F) << shift
118 |         pos += 1
119 |         if not (b & 0x80):
120 |             if result > 0x7FFFFFFFFFFFFFFF:
121 |                 result -= 1 << 64
122 |                 result |= ~mask
123 |             else:
124 |                 result &= mask
125 |             return (result, pos)
126 |         shift += 7
127 |         if shift >= 64:
128 |             raise IOError("Too many bytes when decoding varint.")
129 | 
130 | 
131 | def decodeMessage(in_file, message):
132 |     """
133 |     Attempt to read a message from the file and decode it. Return
134 |     False if no message could be read.
135 |     """
136 |     try:
137 |         size, pos = _DecodeVarint32(in_file)
138 |         if size == 0:
139 |             return False
140 |         buf = in_file.read(size)
141 |         message.ParseFromString(buf)
142 |         return True
143 |     except IOError:
144 |         return False
145 | 
146 | 
147 | def _EncodeVarint32(out_file, value):
148 |     """
149 |     The encoding of the Varint32 is copied from
150 |     google.protobuf.internal.encoder and is only repeated here to
151 |     avoid depending on the internal functions in the library.
152 |     """
153 |     bits = value & 0x7F
154 |     value >>= 7
155 |     while value:
156 |         out_file.write(struct.pack("<B", 0x80 | bits))
157 |         bits = value & 0x7F
158 |         value >>= 7
159 |     out_file.write(struct.pack("<B", bits))
160 | 
161 | 
162 | def encodeMessage(out_file, message):
163 |     """
164 |     Encoded a message with the length prepended as a 32-bit varint.
165 |     """
166 |     out = message.SerializeToString()
167 |     _EncodeVarint32(out_file, len(out))
168 |     out_file.write(out)
169 | 


--------------------------------------------------------------------------------
/src/third_party/utils/protoio.cc:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright (c) 2012 ARM Limited
  3 |  * All rights reserved
  4 |  *
  5 |  * The license below extends only to copyright in the software and shall
  6 |  * not be construed as granting a license to any other intellectual
  7 |  * property including but not limited to intellectual property relating
  8 |  * to a hardware implementation of the functionality of the software
  9 |  * licensed hereunder.  You may use the software subject to the license
 10 |  * terms below provided that you ensure that this notice is replicated
 11 |  * unmodified and in its entirety in all distributions of the software,
 12 |  * modified or unmodified, in source code or in binary form.
 13 |  *
 14 |  * Redistribution and use in source and binary forms, with or without
 15 |  * modification, are permitted provided that the following conditions are
 16 |  * met: redistributions of source code must retain the above copyright
 17 |  * notice, this list of conditions and the following disclaimer;
 18 |  * redistributions in binary form must reproduce the above copyright
 19 |  * notice, this list of conditions and the following disclaimer in the
 20 |  * documentation and/or other materials provided with the distribution;
 21 |  * neither the name of the copyright holders nor the names of its
 22 |  * contributors may be used to endorse or promote products derived from
 23 |  * this software without specific prior written permission.
 24 |  *
 25 |  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 26 |  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 27 |  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 28 |  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 29 |  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 30 |  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 31 |  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 32 |  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 33 |  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 34 |  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 35 |  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 36 |  */
 37 | 
 38 | #include "protoio.hh"
 39 | 
 40 | #define panic(format, args...)
 41 | 
 42 | using namespace google::protobuf;
 43 | 
 44 | ProtoOutputStream::ProtoOutputStream(const std::string& filename)
 45 |     : fileStream(
 46 |           filename.c_str(),
 47 |           std::ios::out | std::ios::binary | std::ios::trunc),
 48 |       wrappedFileStream(NULL),
 49 |       gzipStream(NULL),
 50 |       zeroCopyStream(NULL) {
 51 |   if (!fileStream.good())
 52 |     panic("Could not open %s for writing\n", filename);
 53 | 
 54 |   // Wrap the output file in a zero copy stream, that in turn is
 55 |   // wrapped in a gzip stream if the filename ends with .gz. The
 56 |   // latter stream is in turn wrapped in a coded stream
 57 |   wrappedFileStream = new io::OstreamOutputStream(&fileStream);
 58 |   if (filename.find_last_of('.') != std::string::npos &&
 59 |       filename.substr(filename.find_last_of('.') + 1) == "gz") {
 60 |     gzipStream = new io::GzipOutputStream(wrappedFileStream);
 61 |     zeroCopyStream = gzipStream;
 62 |   } else {
 63 |     zeroCopyStream = wrappedFileStream;
 64 |   }
 65 | }
 66 | 
 67 | ProtoOutputStream::~ProtoOutputStream() {
 68 |   // As the compression is optional, see if the stream exists
 69 |   if (gzipStream != NULL)
 70 |     delete gzipStream;
 71 |   delete wrappedFileStream;
 72 |   fileStream.close();
 73 | }
 74 | 
 75 | void ProtoOutputStream::write(const Message& msg) {
 76 |   // Due to the byte limit of the coded stream we create it for
 77 |   // every single mesage (based on forum discussions around the size
 78 |   // limitation)
 79 |   io::CodedOutputStream codedStream(zeroCopyStream);
 80 | 
 81 |   // Write the size of the message to the stream
 82 | #if GOOGLE_PROTOBUF_VERSION < 3001000
 83 |   auto msg_size = msg.ByteSize();
 84 | #else
 85 |   auto msg_size = msg.ByteSizeLong();
 86 | #endif
 87 |   codedStream.WriteVarint32(msg_size);
 88 | 
 89 |   // Write the message itself to the stream
 90 |   msg.SerializeWithCachedSizes(&codedStream);
 91 | }
 92 | 
 93 | ProtoInputStream::ProtoInputStream(const std::string& filename)
 94 |     : fileStream(filename.c_str(), std::ios::in | std::ios::binary),
 95 |       fileName(filename),
 96 |       useGzip(false),
 97 |       wrappedFileStream(NULL),
 98 |       gzipStream(NULL),
 99 |       zeroCopyStream(NULL) {
100 |   if (!fileStream.good())
101 |     panic("Could not open %s for reading\n", filename);
102 | 
103 |   // check the magic number to see if this is a gzip stream
104 |   unsigned char bytes[2];
105 |   fileStream.read((char*)bytes, 2);
106 |   useGzip = fileStream.good() && bytes[0] == 0x1f && bytes[1] == 0x8b;
107 | 
108 |   // seek to the start of the input file and clear any flags
109 |   fileStream.clear();
110 |   fileStream.seekg(0, std::ifstream::beg);
111 | 
112 |   createStreams();
113 | }
114 | 
115 | void ProtoInputStream::createStreams() {
116 |   // All streams should be NULL at this point
117 |   assert(
118 |       wrappedFileStream == NULL && gzipStream == NULL &&
119 |       zeroCopyStream == NULL);
120 | 
121 |   // Wrap the input file in a zero copy stream, that in turn is
122 |   // wrapped in a gzip stream if the filename ends with .gz. The
123 |   // latter stream is in turn wrapped in a coded stream
124 |   wrappedFileStream = new io::IstreamInputStream(&fileStream);
125 |   if (useGzip) {
126 |     gzipStream = new io::GzipInputStream(wrappedFileStream);
127 |     zeroCopyStream = gzipStream;
128 |   } else {
129 |     zeroCopyStream = wrappedFileStream;
130 |   }
131 | }
132 | 
133 | void ProtoInputStream::destroyStreams() {
134 |   // As the compression is optional, see if the stream exists
135 |   if (gzipStream != NULL) {
136 |     delete gzipStream;
137 |     gzipStream = NULL;
138 |   }
139 |   delete wrappedFileStream;
140 |   wrappedFileStream = NULL;
141 | 
142 |   zeroCopyStream = NULL;
143 | }
144 | 
145 | ProtoInputStream::~ProtoInputStream() {
146 |   destroyStreams();
147 |   fileStream.close();
148 | }
149 | 
150 | void ProtoInputStream::reset() {
151 |   destroyStreams();
152 |   // seek to the start of the input file and clear any flags
153 |   fileStream.clear();
154 |   fileStream.seekg(0, std::ifstream::beg);
155 |   createStreams();
156 | }
157 | 
158 | bool ProtoInputStream::is_open() {
159 |   return fileStream.is_open();
160 | }
161 | 
162 | bool ProtoInputStream::read(Message& msg) {
163 |   // Read a message from the stream by getting the size, using it as
164 |   // a limit when parsing the message, then popping the limit again
165 |   uint32_t size;
166 | 
167 |   // Due to the byte limit of the coded stream we create it for
168 |   // every single mesage (based on forum discussions around the size
169 |   // limitation)
170 |   io::CodedInputStream codedStream(zeroCopyStream);
171 |   if (codedStream.ReadVarint32(&size)) {
172 |     io::CodedInputStream::Limit limit = codedStream.PushLimit(size);
173 |     if (msg.ParseFromCodedStream(&codedStream)) {
174 |       codedStream.PopLimit(limit);
175 |       // All went well, the message is parsed and the limit is
176 |       // popped again
177 |       return true;
178 |     } else {
179 |       panic("Unable to read message from coded stream %s\n", fileName);
180 |     }
181 |   }
182 | 
183 |   return false;
184 | }
185 | 


--------------------------------------------------------------------------------
/tests/trace_link/test_kineto_operator.py:
--------------------------------------------------------------------------------
  1 | import pytest
  2 | 
  3 | from src.trace_link.kineto_operator import KinetoOperator
  4 | 
  5 | 
  6 | @pytest.fixture
  7 | def sample_operator_data():
  8 |     """Provides sample Kineto trace data for testing."""
  9 |     return {
 10 |         "cat": "Kernel",
 11 |         "name": "cudaLaunchKernel",
 12 |         "ph": "X",
 13 |         "dur": 100,
 14 |         "ts": 1590000000,
 15 |         "tid": 1234,
 16 |         "args": {"External id": "123", "Ev Idx": "456", "stream": 7, "Record function id": 12, "correlation": 99},
 17 |     }
 18 | 
 19 | 
 20 | def test_init_kineto_operator(sample_operator_data):
 21 |     """Test the initialization and attribute assignment of KinetoOperator."""
 22 |     operator = KinetoOperator(sample_operator_data)
 23 |     assert operator.category == "Kernel"
 24 |     assert operator.name == "cudaLaunchKernel"
 25 |     assert operator.phase == "X"
 26 |     assert operator.inclusive_dur == 100
 27 |     assert operator.exclusive_dur == 100
 28 |     assert operator.timestamp == 1590000000
 29 |     assert operator.external_id == 123
 30 |     assert operator.ev_idx == 456
 31 |     assert operator.tid == 1234
 32 |     assert operator.stream == 7
 33 |     assert operator.rf_id == 12
 34 |     assert operator.correlation == 99
 35 |     assert operator.host_op is None  # Ensure default None
 36 |     assert operator.parent_host_op_id is None  # Ensure default None
 37 |     assert operator.inter_thread_dep is None  # Ensure default None
 38 | 
 39 | 
 40 | def test_repr_method(sample_operator_data):
 41 |     """Test the __repr__ method output."""
 42 |     operator = KinetoOperator(sample_operator_data)
 43 |     expected_repr = (
 44 |         "KinetoOperator(id=None, category=Kernel, name=cudaLaunchKernel, phase=X, "
 45 |         "inclusive_dur=100, exclusive_dur=100, timestamp=1590000000, external_id=123, ev_idx=456, "
 46 |         "tid=1234, parent_host_op_id=None, inter_thread_dep=None, sync_dep=[], stream=7, rf_id=12, "
 47 |         "correlation=99)"
 48 |     )
 49 |     assert repr(operator) == expected_repr
 50 | 
 51 | 
 52 | @pytest.mark.parametrize(
 53 |     "category, expected",
 54 |     [
 55 |         ("cpu_op", True),
 56 |         ("user_annotation", True),
 57 |         ("ProfilerStep", False),
 58 |         ("cuda_runtime", False),
 59 |         ("cuda_driver", False),
 60 |     ],
 61 | )
 62 | def test_is_cpu_op(category, expected):
 63 |     """Test the is_cpu_op method with various inputs."""
 64 |     operator_data = {
 65 |         "cat": category,
 66 |         "name": "someOperation",
 67 |         "ph": "X",
 68 |         "dur": 100,
 69 |         "ts": 1590000000,
 70 |         "tid": 1234,
 71 |         "args": {"External id": "123", "Ev Idx": "456", "stream": 7, "Record function id": 12, "correlation": 99},
 72 |     }
 73 |     operator = KinetoOperator(operator_data)
 74 |     assert operator.is_cpu_op() == expected
 75 | 
 76 | 
 77 | @pytest.mark.parametrize(
 78 |     "category, expected",
 79 |     [
 80 |         ("cuda_runtime", True),
 81 |         ("kernel", False),
 82 |         ("cuda_driver", False),
 83 |         ("cpu_op", False),
 84 |     ],
 85 | )
 86 | def test_is_cuda_runtime_op(category, expected):
 87 |     """Test the is_cuda_runtime_op method with various inputs."""
 88 |     operator_data = {
 89 |         "cat": category,
 90 |         "name": "someOperation",
 91 |         "ph": "X",
 92 |         "dur": 100,
 93 |         "ts": 1590000000,
 94 |         "tid": 1234,
 95 |         "args": {"External id": "123", "Ev Idx": "456", "stream": 7, "Record function id": 12, "correlation": 99},
 96 |     }
 97 |     operator = KinetoOperator(operator_data)
 98 |     assert operator.is_cuda_runtime_op() == expected
 99 | 
100 | 
101 | @pytest.mark.parametrize(
102 |     "category, expected",
103 |     [
104 |         ("cuda_driver", True),
105 |         ("kernel", False),
106 |         ("cuda_runtime", False),
107 |         ("cpu_op", False),
108 |     ],
109 | )
110 | def test_is_cuda_driver_op(category, expected):
111 |     """Test the is_cuda_driver_op method with various inputs."""
112 |     operator_data = {
113 |         "cat": category,
114 |         "name": "someOperation",
115 |         "ph": "X",
116 |         "dur": 100,
117 |         "ts": 1590000000,
118 |         "tid": 1234,
119 |         "args": {"External id": "123", "Ev Idx": "456", "stream": 7, "Record function id": 12, "correlation": 99},
120 |     }
121 |     operator = KinetoOperator(operator_data)
122 |     assert operator.is_cuda_driver_op() == expected
123 | 
124 | 
125 | @pytest.mark.parametrize(
126 |     "category, expected",
127 |     [
128 |         ("ac2g", True),
129 |         ("kernel", False),
130 |         ("cuda_runtime", False),
131 |         ("cpu_op", False),
132 |     ],
133 | )
134 | def test_is_ac2g_op(category, expected):
135 |     """Test the is_ac2g_op method with various inputs."""
136 |     operator_data = {
137 |         "cat": category,
138 |         "name": "someOperation",
139 |         "ph": "X",
140 |         "dur": 100,
141 |         "ts": 1590000000,
142 |         "tid": 1234,
143 |         "args": {"External id": "123", "Ev Idx": "456", "stream": 7, "Record function id": 12, "correlation": 99},
144 |     }
145 |     operator = KinetoOperator(operator_data)
146 |     assert operator.is_ac2g_op() == expected
147 | 
148 | 
149 | @pytest.mark.parametrize(
150 |     "category, name, expected",
151 |     [
152 |         ("cuda_driver", "cuLaunchKernel", True),
153 |         ("cuda_driver", "cuLaunchKernelEx", True),
154 |         ("cuda_driver", "cudaLaunchKernel", True),
155 |         ("cuda_driver", "cudaLaunchKernelExC", True),
156 |         ("cuda_driver", "cudaLaunchCooperativeKernel", True),
157 |         ("cuda_runtime", "cuLaunchKernel", True),
158 |         ("cuda_runtime", "cuLaunchKernelEx", True),
159 |         ("cuda_runtime", "cudaLaunchKernel", True),
160 |         ("cuda_runtime", "cudaLaunchKernelExC", True),
161 |         ("cuda_runtime", "cudaLaunchCooperativeKernel", True),
162 |         ("cuda_runtime", "cudaMemcpy", True),
163 |         ("cuda_runtime", "cudaMemcpyAsync", True),
164 |         ("cuda_runtime", "cudaMemcpyFromSymbol", True),
165 |         ("cuda_runtime", "cudaMemcpyToSymbol", True),
166 |         ("cpu_op", "cudaLaunchKernel", False),
167 |         ("cuda_runtime", "someOtherOperation", False),
168 |         ("some_other_category", "cudaLaunchKernel", False),
169 |     ],
170 | )
171 | def test_is_kernel_launch_op(category, name, expected):
172 |     """Test the is_kernel_launch_op method with various inputs."""
173 |     operator_data = {
174 |         "cat": category,
175 |         "name": name,
176 |         "ph": "X",
177 |         "dur": 100,
178 |         "ts": 1590000000,
179 |         "tid": 1234,
180 |         "args": {"External id": "123", "Ev Idx": "456", "stream": 7, "Record function id": 12, "correlation": 99},
181 |     }
182 |     operator = KinetoOperator(operator_data)
183 |     assert operator.is_kernel_launch_op() == expected
184 | 
185 | 
186 | @pytest.mark.parametrize(
187 |     "category, expected",
188 |     [
189 |         ("kernel", True),
190 |         ("gpu_memcpy", True),
191 |         ("cuda_runtime", False),
192 |         ("cpu_op", False),
193 |     ],
194 | )
195 | def test_is_gpu_op(category, expected):
196 |     """Test the is_gpu_op method with various inputs."""
197 |     operator_data = {
198 |         "cat": category,
199 |         "name": "someOperation",
200 |         "ph": "X",
201 |         "dur": 100,
202 |         "ts": 1590000000,
203 |         "tid": 1234,
204 |         "args": {"External id": "123", "Ev Idx": "456", "stream": 7, "Record function id": 12, "correlation": 99},
205 |     }
206 |     operator = KinetoOperator(operator_data)
207 |     assert operator.is_gpu_op() == expected
208 | 


--------------------------------------------------------------------------------
/src/trace_link/kineto_operator.py:
--------------------------------------------------------------------------------
  1 | from typing import Any, Dict, List, Optional
  2 | 
  3 | from et_replay.execution_trace import Node as PyTorchOperator
  4 | 
  5 | 
  6 | class KinetoOperator:
  7 |     """
  8 |     Represents a single operator in a Kineto trace.
  9 | 
 10 |     Attributes
 11 |         id (Optional[int]): Identifier of the operator.
 12 |         category (str): Category of the operator.
 13 |         name (str): Name of the operator.
 14 |         phase (Optional[str]): Execution phase of the operator.
 15 |         inclusive_dur (int): Total duration of the operator, including its children.
 16 |         exclusive_dur (int): Duration of the operator execution alone. Corresponds to the self time field in
 17 |             chrome://tracing.
 18 |         timestamp (int): Start time of the operator in microseconds.
 19 |         external_id (int): An external identifier associated with the operator.
 20 |         ev_idx (int): Event index of the operator.
 21 |         tid (int): Thread identifier where the operator was executed.
 22 |         host_op (Optional[PyTorchOperator]): Corresponding PyTorch operator object.
 23 |         parent_host_op_id (Optional[int]): ID of the parent PyTorch operator.
 24 |         inter_thread_dep (Optional[int]): Identifier for inter-thread dependencies.
 25 |         sync_dep (List[KinetoOperator]): List of KinetoOperator objects that have dependencies on this operator.
 26 |         stream (Optional[int]): CUDA stream identifier associated with the operator.
 27 |         rf_id (Optional[int]): Record function identifier.
 28 |         correlation (int): Identifier used to correlate CUDA runtime and GPU operations.
 29 |         pg_name (Optional[str]): Process Group name for the collective communication.
 30 |     """
 31 | 
 32 |     def __init__(self, kineto_op: Dict[str, Any]) -> None:
 33 |         """
 34 |         Initialize a new instance of the KinetoOperator class.
 35 | 
 36 |         Args:
 37 |             kineto_op (Dict[str, Any]): The dictionary representing the
 38 |                                         operator data.
 39 |         """
 40 |         self.id: Optional[int] = kineto_op.get("id")
 41 |         self.category: str = kineto_op.get("cat", "")
 42 |         self.name: str = kineto_op.get("name", "")
 43 |         self.phase: Optional[str] = kineto_op.get("ph")
 44 |         self.inclusive_dur: int = kineto_op.get("dur", 0)
 45 |         self.exclusive_dur: int = kineto_op.get("dur", 0)
 46 |         self.timestamp: int = kineto_op.get("ts", 0)
 47 |         self.external_id: int = int(kineto_op.get("args", {}).get("External id", -1))
 48 |         self.ev_idx: int = int(kineto_op.get("args", {}).get("Ev Idx", -1))
 49 |         self.tid: int = kineto_op.get("tid", 0)
 50 |         self.host_op: Optional[PyTorchOperator] = None
 51 |         self.parent_host_op_id: Optional[int] = None
 52 |         self.inter_thread_dep: Optional[int] = None
 53 |         self.sync_dep: List[KinetoOperator] = []
 54 |         self.stream: Optional[int] = kineto_op.get("args", {}).get("stream", None)
 55 |         self.rf_id: Optional[int] = kineto_op.get("args", {}).get("Record function id", None)
 56 |         self.correlation: int = kineto_op.get("args", {}).get("correlation", -1)
 57 |         self.pg_name: Optional[str] = kineto_op.get("args", {}).get("Process Group Name", None)
 58 | 
 59 |     def __repr__(self) -> str:
 60 |         """
 61 |         Represent the KinetoOperator as a string.
 62 | 
 63 |         Returns
 64 |             str: A string representation of the KinetoOperator.
 65 |         """
 66 |         sync_dep_ids = [op.id for op in self.sync_dep]
 67 |         return (
 68 |             f"KinetoOperator(id={self.id}, category={self.category}, name={self.name}, "
 69 |             f"phase={self.phase}, inclusive_dur={self.inclusive_dur}, "
 70 |             f"exclusive_dur={self.exclusive_dur}, timestamp={self.timestamp}, "
 71 |             f"external_id={self.external_id}, ev_idx={self.ev_idx}, tid={self.tid}, "
 72 |             f"parent_host_op_id={self.parent_host_op_id}, inter_thread_dep={self.inter_thread_dep}, "
 73 |             f"sync_dep={sync_dep_ids}, stream={self.stream}, rf_id={self.rf_id}, correlation={self.correlation})"
 74 |         )
 75 | 
 76 |     def is_cpu_op(self) -> bool:
 77 |         """
 78 |         Determine if the operator is simulatable based on its category and name.
 79 | 
 80 |         The categories 'cpu_op' and 'user_annotation' are considered CPU operators.
 81 |         Notably, 'user_annotation' operators often include the duration of CPU operator launch times.
 82 |         Ignoring the duration measured in 'user_annotation' can lead to inaccuracies in simulation.
 83 |         An exception to this is 'ProfilerStep', which should be completely ignored.
 84 |         Ideally, a more general rule should be developed to identify such exception nodes.
 85 | 
 86 |         Returns
 87 |             bool: True if the operator is simulatable, False otherwise.
 88 |         """
 89 |         simulatable_categories = {"cpu_op", "user_annotation"}
 90 |         name_exceptions = {"ProfilerStep"}
 91 |         if self.category in simulatable_categories and all(exc not in self.name for exc in name_exceptions):
 92 |             return True
 93 |         return False
 94 | 
 95 |     def is_cuda_runtime_op(self) -> bool:
 96 |         """
 97 |         Determine whether the operator is a CUDA runtime operator.
 98 | 
 99 |         Returns
100 |             bool: True if it's a CUDA runtime operator, otherwise False.
101 |         """
102 |         return self.category == "cuda_runtime"
103 | 
104 |     def is_cuda_driver_op(self) -> bool:
105 |         """
106 |         Determine whether the operator is a CUDA driver operator.
107 | 
108 |         Returns
109 |             bool: True if it's a CUDA driver operator, otherwise False.
110 |         """
111 |         return self.category == "cuda_driver"
112 | 
113 |     def is_ac2g_op(self) -> bool:
114 |         """
115 |         Check if the operator is categorized as 'ac2g', which stands for arrows from CPU to GPU.
116 | 
117 |         Excerpt from https://pytorch.org/docs/stable/torch.compiler_profiling_torch_compile.html
118 |         ```
119 |             Every kernel on the GPU occurs after being launched by code running on the CPU. The profiler can draw
120 |             connections (i.e. "flows") between the GPU and CPU events to show which CPU event launched a GPU kernel.
121 |             This is particularly helpful because, with a few exceptions, GPU kernels are launched asynchronously.
122 | 
123 |             To view a flow connection, click on a GPU kernel and click "ac2g".
124 |         ````
125 | 
126 |         Returns
127 |             bool: True if the operator is an 'ac2g' type, otherwise False.
128 |         """
129 |         return self.category == "ac2g"
130 | 
131 |     def is_kernel_launch_op(self) -> bool:
132 |         """
133 |         Determine whether the operator is a kernel-launching CUDA runtime operator.
134 | 
135 |         Returns
136 |             bool: True if it's a launch operation, otherwise False.
137 |         """
138 |         cuda_launch_categories = self.is_cuda_runtime_op() or self.is_cuda_driver_op()
139 |         cuda_launch_operations = {
140 |             "cuLaunchKernel",
141 |             "cuLaunchKernelEx",
142 |             "cudaLaunchKernel",
143 |             "cudaLaunchKernelExC",
144 |             "cudaMemcpy",
145 |             "cudaMemcpyAsync",
146 |             "cudaMemcpyFromSymbol",
147 |             "cudaMemcpyToSymbol",
148 |             "cudaLaunchCooperativeKernel",
149 |         }
150 | 
151 |         hip_launch_operations = {
152 |             "hipLaunchKernel",
153 |             "hipExtLaunchKernel",
154 |             "hipExtModuleLaunchKernel",
155 |             "hipModuleLaunchKernel",
156 |             "hipMemcpyWithStream",
157 |             "hipMemcpyAsync",
158 |         }
159 |         return cuda_launch_categories and (self.name in cuda_launch_operations or self.name in hip_launch_operations)
160 | 
161 |     def is_gpu_op(self) -> bool:
162 |         """
163 |         Check if the operator is a GPU-side operator based on its category.
164 | 
165 |         Returns
166 |             bool: True if it's a GPU-side operation, otherwise False.
167 |         """
168 |         gpu_categories = {"kernel", "gpu_memcpy"}
169 |         return self.category in gpu_categories
170 | 
171 |     def is_inter_gpu_comms_op(self) -> bool:
172 |         """
173 |         Check if the operator is a inter-GPU communication operator based on its name.
174 | 
175 |         Both point-to-point send/receive primitives and collective communication primitives are considered.
176 | 
177 |         Returns
178 |             bool: True if it's a inter-GPU communication, otherwise False.
179 |         """
180 |         return "ncclDevKernel" in self.name
181 | 


--------------------------------------------------------------------------------
/tests/converter/test_pytorch_converter.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | from typing import Dict
  3 | from unittest.mock import MagicMock, mock_open, patch
  4 | 
  5 | import pytest
  6 | from chakra.schema.protobuf.et_def_pb2 import (
  7 |     ALL_GATHER,
  8 |     ALL_REDUCE,
  9 |     ALL_TO_ALL,
 10 |     BROADCAST,
 11 |     COMM_COLL_NODE,
 12 |     COMP_NODE,
 13 |     METADATA_NODE,
 14 |     REDUCE_SCATTER,
 15 | )
 16 | from chakra.schema.protobuf.et_def_pb2 import Node as ChakraNode
 17 | from chakra.src.converter.pytorch_converter import PyTorchConverter
 18 | from chakra.src.converter.pytorch_node import PyTorchNode
 19 | 
 20 | 
 21 | @pytest.fixture
 22 | def sample_pytorch_data() -> Dict:
 23 |     return {
 24 |         "schema": "1.0.2-chakra.0.0.4",
 25 |         "pid": 1234,
 26 |         "time": "2023-01-01 12:00:00",
 27 |         "start_ts": 1000,
 28 |         "finish_ts": 2000,
 29 |         "nodes": [
 30 |             {
 31 |                 "id": 1,
 32 |                 "name": "node1",
 33 |                 "ctrl_deps": None,
 34 |                 "exclusive_dur": 50,
 35 |                 "inputs": {"values": "values", "shapes": "shapes", "types": "types"},
 36 |                 "outputs": {"values": "values", "shapes": "shapes", "types": "types"},
 37 |                 "attrs": [
 38 |                     {"name": "rf_id", "type": "uint64", "value": 0},
 39 |                     {"name": "fw_parent", "type": "uint64", "value": 0},
 40 |                     {"name": "seq_id", "type": "int64", "value": -1},
 41 |                     {"name": "scope", "type": "uint64", "value": 7},
 42 |                     {"name": "tid", "type": "uint64", "value": 1},
 43 |                     {"name": "fw_tid", "type": "uint64", "value": 0},
 44 |                     {"name": "op_schema", "type": "string", "value": ""},
 45 |                 ],
 46 |             },
 47 |             {
 48 |                 "id": 2,
 49 |                 "name": "node2",
 50 |                 "ctrl_deps": 1,
 51 |                 "exclusive_dur": 30,
 52 |                 "inputs": {"values": "values", "shapes": "shapes", "types": "types"},
 53 |                 "outputs": {"values": "values", "shapes": "shapes", "types": "types"},
 54 |                 "attrs": [
 55 |                     {"name": "rf_id", "type": "uint64", "value": 0},
 56 |                     {"name": "fw_parent", "type": "uint64", "value": 0},
 57 |                     {"name": "seq_id", "type": "int64", "value": -1},
 58 |                     {"name": "scope", "type": "uint64", "value": 7},
 59 |                     {"name": "tid", "type": "uint64", "value": 1},
 60 |                     {"name": "fw_tid", "type": "uint64", "value": 0},
 61 |                     {"name": "op_schema", "type": "string", "value": ""},
 62 |                 ],
 63 |             },
 64 |         ],
 65 |     }
 66 | 
 67 | 
 68 | @pytest.fixture
 69 | def mock_chakra_node() -> ChakraNode:
 70 |     node = ChakraNode()
 71 |     node.id = 1
 72 |     node.name = "node1"
 73 |     node.type = COMP_NODE
 74 |     return node
 75 | 
 76 | 
 77 | @patch("builtins.open", new_callable=mock_open)
 78 | def test_load_json_execution_traces(mock_file: MagicMock, sample_pytorch_data: Dict) -> None:
 79 |     mock_file.return_value.read.return_value = json.dumps(sample_pytorch_data)
 80 |     converter = PyTorchConverter()
 81 |     data = converter.load_json_execution_traces("input.json")
 82 |     assert data == sample_pytorch_data
 83 |     mock_file.assert_called_once_with("input.json", "r")
 84 | 
 85 | 
 86 | def test_parse_json_trace(sample_pytorch_data: Dict) -> None:
 87 |     converter = PyTorchConverter()
 88 |     json_metadata, json_node_map = converter.parse_json_trace(sample_pytorch_data)
 89 | 
 90 |     assert json_metadata["schema"] == "1.0.2-chakra.0.0.4"
 91 |     assert json_metadata["pid"] == 1234
 92 |     assert json_metadata["time"] == "2023-01-01 12:00:00"
 93 |     assert json_metadata["start_ts"] == 1000
 94 |     assert json_metadata["finish_ts"] == 2000
 95 |     assert len(json_node_map) == 2
 96 |     assert json_node_map[1].id == 1
 97 |     assert json_node_map[2].id == 2
 98 | 
 99 | 
100 | def create_sample_graph(parent_id: int = 0, expected_child_id: int = 0) -> Dict[int, PyTorchNode]:
101 |     node1_data = {
102 |         "id": 1,
103 |         "name": "node1",
104 |         "ctrl_deps": None,
105 |         "inputs": {"values": ["val1"], "shapes": ["shape1"], "types": ["type1"]},
106 |         "outputs": {"values": ["val1"], "shapes": ["shape1"], "types": ["type1"]},
107 |         "attrs": [],
108 |     }
109 |     node2_data = {
110 |         "id": 2,
111 |         "name": "node2",
112 |         "ctrl_deps": parent_id,
113 |         "inputs": {"values": ["val2"], "shapes": ["shape2"], "types": ["type2"]},
114 |         "outputs": {"values": ["val2"], "shapes": ["shape2"], "types": ["type2"]},
115 |         "attrs": [],
116 |     }
117 |     node1 = PyTorchNode("1.0.2-chakra.0.0.4", node1_data)
118 |     node2 = PyTorchNode("1.0.2-chakra.0.0.4", node2_data)
119 |     return {1: node1, 2: node2}
120 | 
121 | 
122 | @pytest.mark.parametrize("parent_id, expected_child_id", [(1, 2), (None, None)])
123 | def test_establish_parent_child_relationships(parent_id: int, expected_child_id: int) -> None:
124 |     converter = PyTorchConverter()
125 |     json_node_map = create_sample_graph(parent_id, expected_child_id)
126 | 
127 |     json_node_map = converter.establish_parent_child_relationships(json_node_map, [])
128 | 
129 |     if expected_child_id:
130 |         assert json_node_map[parent_id].children[0].id == expected_child_id
131 |     else:
132 |         assert len(json_node_map[1].children) == 0
133 | 
134 | 
135 | def test_convert_json_to_protobuf_nodes(sample_pytorch_data: Dict) -> None:
136 |     converter = PyTorchConverter()
137 |     json_metadata, json_node_map = converter.parse_json_trace(sample_pytorch_data)
138 |     json_node_map = converter.establish_parent_child_relationships(json_node_map, [])
139 |     chakra_nodes = {}
140 |     converter.convert_json_to_protobuf_nodes(json_node_map, chakra_nodes)
141 |     assert len(chakra_nodes) == 2
142 |     assert chakra_nodes[1].id == 1
143 |     assert chakra_nodes[2].id == 2
144 | 
145 | 
146 | def test_convert_ctrl_dep_to_data_dep(sample_pytorch_data: Dict) -> None:
147 |     converter = PyTorchConverter()
148 |     json_metadata, json_node_map = converter.parse_json_trace(sample_pytorch_data)
149 |     json_node_map = converter.establish_parent_child_relationships(json_node_map, [])
150 |     chakra_nodes = {}
151 |     converter.convert_json_to_protobuf_nodes(json_node_map, chakra_nodes)
152 |     root_node = chakra_nodes[1]
153 |     converter.convert_ctrl_dep_to_data_dep(json_node_map, chakra_nodes, root_node)
154 |     assert root_node.data_deps == []
155 | 
156 | 
157 | @patch("builtins.open", new_callable=mock_open)
158 | def test_write_chakra_et(mock_file: MagicMock, sample_pytorch_data: Dict) -> None:
159 |     converter = PyTorchConverter()
160 |     json_metadata, json_node_map = converter.parse_json_trace(sample_pytorch_data)
161 |     json_node_map = converter.establish_parent_child_relationships(json_node_map, [])
162 |     chakra_nodes = {}
163 |     converter.convert_json_to_protobuf_nodes(json_node_map, chakra_nodes)
164 |     converter.write_protobuf_execution_trace("output.et", json_metadata, chakra_nodes)
165 |     assert mock_file().write.called
166 | 
167 | 
168 | @pytest.mark.parametrize(
169 |     "pytorch_node_data, expected_type",
170 |     [
171 |         ({"name": "process_group:init", "is_gpu_op": False, "is_metadata_op": True}, METADATA_NODE),
172 |         ({"name": "ncclKernel", "is_gpu_op": True, "is_metadata_op": False}, COMM_COLL_NODE),
173 |         ({"name": "ncclDevKernel", "is_gpu_op": True, "is_metadata_op": False}, COMM_COLL_NODE),
174 |         ({"name": "c10d::all_reduce", "is_gpu_op": True, "is_metadata_op": False}, COMP_NODE),
175 |         ({"name": "other_op", "is_gpu_op": False, "is_metadata_op": False}, COMP_NODE),
176 |     ],
177 | )
178 | def test_get_protobuf_node_type_from_json_node(pytorch_node_data: Dict, expected_type: int) -> None:
179 |     # Create a mock PyTorchNode with the required attributes
180 |     pytorch_node = MagicMock(spec=PyTorchNode)
181 |     pytorch_node.name = pytorch_node_data["name"]
182 |     pytorch_node.is_gpu_op = MagicMock(return_value=pytorch_node_data["is_gpu_op"])
183 |     pytorch_node.is_metadata_op = MagicMock(return_value=pytorch_node_data["is_metadata_op"])
184 | 
185 |     # Create a mock json_node_map dictionary with actual PyTorchNode instances
186 |     mock_pytorch_node_data = {
187 |         "id": 0,
188 |         "name": "mock_node",
189 |         "ctrl_deps": None,
190 |         "exclusive_dur": 0,
191 |         "inputs": {"values": [], "shapes": [], "types": []},
192 |         "outputs": {"values": [], "shapes": [], "types": []},
193 |         "attrs": [],
194 |     }
195 |     mock_pytorch_node = PyTorchNode("1.0.2-chakra.0.0.4", mock_pytorch_node_data)
196 |     json_node_map = {0: mock_pytorch_node, 1: pytorch_node}
197 | 
198 |     converter = PyTorchConverter()
199 |     node_type = converter.get_protobuf_node_type_from_json_node(json_node_map, pytorch_node)
200 |     assert node_type == expected_type
201 | 
202 | 
203 | @pytest.mark.parametrize(
204 |     "name, expected_comm_type",
205 |     [
206 |         ("allreduce", ALL_REDUCE),
207 |         ("alltoall", ALL_TO_ALL),
208 |         ("allgather", ALL_GATHER),
209 |         ("reducescatter", REDUCE_SCATTER),
210 |         ("broadcast", BROADCAST),
211 |     ],
212 | )
213 | def test_get_collective_comm_type(name: str, expected_comm_type: int) -> None:
214 |     converter = PyTorchConverter()
215 |     comm_type = converter.get_collective_comm_type(name)
216 |     assert comm_type == expected_comm_type
217 | 


--------------------------------------------------------------------------------
/LICENSE.md:
--------------------------------------------------------------------------------
  1 | 
  2 |                                  Apache License
  3 |                            Version 2.0, January 2004
  4 |                         http://www.apache.org/licenses/
  5 | 
  6 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  7 | 
  8 |    1. Definitions.
  9 | 
 10 |       "License" shall mean the terms and conditions for use, reproduction,
 11 |       and distribution as defined by Sections 1 through 9 of this document.
 12 | 
 13 |       "Licensor" shall mean the copyright owner or entity authorized by
 14 |       the copyright owner that is granting the License.
 15 | 
 16 |       "Legal Entity" shall mean the union of the acting entity and all
 17 |       other entities that control, are controlled by, or are under common
 18 |       control with that entity. For the purposes of this definition,
 19 |       "control" means (i) the power, direct or indirect, to cause the
 20 |       direction or management of such entity, whether by contract or
 21 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 22 |       outstanding shares, or (iii) beneficial ownership of such entity.
 23 | 
 24 |       "You" (or "Your") shall mean an individual or Legal Entity
 25 |       exercising permissions granted by this License.
 26 | 
 27 |       "Source" form shall mean the preferred form for making modifications,
 28 |       including but not limited to software source code, documentation
 29 |       source, and configuration files.
 30 | 
 31 |       "Object" form shall mean any form resulting from mechanical
 32 |       transformation or translation of a Source form, including but
 33 |       not limited to compiled object code, generated documentation,
 34 |       and conversions to other media types.
 35 | 
 36 |       "Work" shall mean the work of authorship, whether in Source or
 37 |       Object form, made available under the License, as indicated by a
 38 |       copyright notice that is included in or attached to the work
 39 |       (an example is provided in the Appendix below).
 40 | 
 41 |       "Derivative Works" shall mean any work, whether in Source or Object
 42 |       form, that is based on (or derived from) the Work and for which the
 43 |       editorial revisions, annotations, elaborations, or other modifications
 44 |       represent, as a whole, an original work of authorship. For the purposes
 45 |       of this License, Derivative Works shall not include works that remain
 46 |       separable from, or merely link (or bind by name) to the interfaces of,
 47 |       the Work and Derivative Works thereof.
 48 | 
 49 |       "Contribution" shall mean any work of authorship, including
 50 |       the original version of the Work and any modifications or additions
 51 |       to that Work or Derivative Works thereof, that is intentionally
 52 |       submitted to Licensor for inclusion in the Work by the copyright owner
 53 |       or by an individual or Legal Entity authorized to submit on behalf of
 54 |       the copyright owner. For the purposes of this definition, "submitted"
 55 |       means any form of electronic, verbal, or written communication sent
 56 |       to the Licensor or its representatives, including but not limited to
 57 |       communication on electronic mailing lists, source code control systems,
 58 |       and issue tracking systems that are managed by, or on behalf of, the
 59 |       Licensor for the purpose of discussing and improving the Work, but
 60 |       excluding communication that is conspicuously marked or otherwise
 61 |       designated in writing by the copyright owner as "Not a Contribution."
 62 | 
 63 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 64 |       on behalf of whom a Contribution has been received by Licensor and
 65 |       subsequently incorporated within the Work.
 66 | 
 67 |    2. Grant of Copyright License. Subject to the terms and conditions of
 68 |       this License, each Contributor hereby grants to You a perpetual,
 69 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 70 |       copyright license to reproduce, prepare Derivative Works of,
 71 |       publicly display, publicly perform, sublicense, and distribute the
 72 |       Work and such Derivative Works in Source or Object form.
 73 | 
 74 |    3. Grant of Patent License. Subject to the terms and conditions of
 75 |       this License, each Contributor hereby grants to You a perpetual,
 76 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 77 |       (except as stated in this section) patent license to make, have made,
 78 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 79 |       where such license applies only to those patent claims licensable
 80 |       by such Contributor that are necessarily infringed by their
 81 |       Contribution(s) alone or by combination of their Contribution(s)
 82 |       with the Work to which such Contribution(s) was submitted. If You
 83 |       institute patent litigation against any entity (including a
 84 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 85 |       or a Contribution incorporated within the Work constitutes direct
 86 |       or contributory patent infringement, then any patent licenses
 87 |       granted to You under this License for that Work shall terminate
 88 |       as of the date such litigation is filed.
 89 | 
 90 |    4. Redistribution. You may reproduce and distribute copies of the
 91 |       Work or Derivative Works thereof in any medium, with or without
 92 |       modifications, and in Source or Object form, provided that You
 93 |       meet the following conditions:
 94 | 
 95 |       (a) You must give any other recipients of the Work or
 96 |           Derivative Works a copy of this License; and
 97 | 
 98 |       (b) You must cause any modified files to carry prominent notices
 99 |           stating that You changed the files; and
100 | 
101 |       (c) You must retain, in the Source form of any Derivative Works
102 |           that You distribute, all copyright, patent, trademark, and
103 |           attribution notices from the Source form of the Work,
104 |           excluding those notices that do not pertain to any part of
105 |           the Derivative Works; and
106 | 
107 |       (d) If the Work includes a "NOTICE" text file as part of its
108 |           distribution, then any Derivative Works that You distribute must
109 |           include a readable copy of the attribution notices contained
110 |           within such NOTICE file, excluding those notices that do not
111 |           pertain to any part of the Derivative Works, in at least one
112 |           of the following places: within a NOTICE text file distributed
113 |           as part of the Derivative Works; within the Source form or
114 |           documentation, if provided along with the Derivative Works; or,
115 |           within a display generated by the Derivative Works, if and
116 |           wherever such third-party notices normally appear. The contents
117 |           of the NOTICE file are for informational purposes only and
118 |           do not modify the License. You may add Your own attribution
119 |           notices within Derivative Works that You distribute, alongside
120 |           or as an addendum to the NOTICE text from the Work, provided
121 |           that such additional attribution notices cannot be construed
122 |           as modifying the License.
123 | 
124 |       You may add Your own copyright statement to Your modifications and
125 |       may provide additional or different license terms and conditions
126 |       for use, reproduction, or distribution of Your modifications, or
127 |       for any such Derivative Works as a whole, provided Your use,
128 |       reproduction, and distribution of the Work otherwise complies with
129 |       the conditions stated in this License.
130 | 
131 |    5. Submission of Contributions. Unless You explicitly state otherwise,
132 |       any Contribution intentionally submitted for inclusion in the Work
133 |       by You to the Licensor shall be under the terms and conditions of
134 |       this License, without any additional terms or conditions.
135 |       Notwithstanding the above, nothing herein shall supersede or modify
136 |       the terms of any separate license agreement you may have executed
137 |       with Licensor regarding such Contributions.
138 | 
139 |    6. Trademarks. This License does not grant permission to use the trade
140 |       names, trademarks, service marks, or product names of the Licensor,
141 |       except as required for reasonable and customary use in describing the
142 |       origin of the Work and reproducing the content of the NOTICE file.
143 | 
144 |    7. Disclaimer of Warranty. Unless required by applicable law or
145 |       agreed to in writing, Licensor provides the Work (and each
146 |       Contributor provides its Contributions) on an "AS IS" BASIS,
147 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
148 |       implied, including, without limitation, any warranties or conditions
149 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
150 |       PARTICULAR PURPOSE. You are solely responsible for determining the
151 |       appropriateness of using or redistributing the Work and assume any
152 |       risks associated with Your exercise of permissions under this License.
153 | 
154 |    8. Limitation of Liability. In no event and under no legal theory,
155 |       whether in tort (including negligence), contract, or otherwise,
156 |       unless required by applicable law (such as deliberate and grossly
157 |       negligent acts) or agreed to in writing, shall any Contributor be
158 |       liable to You for damages, including any direct, indirect, special,
159 |       incidental, or consequential damages of any character arising as a
160 |       result of this License or out of the use or inability to use the
161 |       Work (including but not limited to damages for loss of goodwill,
162 |       work stoppage, computer failure or malfunction, or any and all
163 |       other commercial damages or losses), even if such Contributor
164 |       has been advised of the possibility of such damages.
165 | 
166 |    9. Accepting Warranty or Additional Liability. While redistributing
167 |       the Work or Derivative Works thereof, You may choose to offer,
168 |       and charge a fee for, acceptance of support, warranty, indemnity,
169 |       or other liability obligations and/or rights consistent with this
170 |       License. However, in accepting such obligations, You may act only
171 |       on Your own behalf and on Your sole responsibility, not on behalf
172 |       of any other Contributor, and only if You agree to indemnify,
173 |       defend, and hold each Contributor harmless for any liability
174 |       incurred by, or claims asserted against, such Contributor by reason
175 |       of your accepting any such warranty or additional liability.
176 | 
177 |    END OF TERMS AND CONDITIONS
178 | 


--------------------------------------------------------------------------------
/src/converter/pytorch_node.py:
--------------------------------------------------------------------------------
  1 | import traceback
  2 | from enum import Enum
  3 | from typing import Any, Dict, List, Optional
  4 | 
  5 | from .pytorch_tensor import PyTorchTensor
  6 | 
  7 | 
  8 | class PyTorchNodeType(Enum):
  9 |     """
 10 |     Enum representing the type of a PyTorch node in an execution trace.
 11 | 
 12 |     Attributes
 13 |         CPU_OP (int): Represents a CPU operation.
 14 |         GPU_OP (int): Represents a GPU operation.
 15 |         LABEL (int): Represents a non-operator node (e.g., labels).
 16 |         METADATA (int): Represents a metadata node (e.g., process group initialization).
 17 |     """
 18 | 
 19 |     CPU_OP = 1
 20 |     GPU_OP = 2
 21 |     LABEL = 3  # Non-operator nodes
 22 |     METADATA = 4  # Metadata nodes
 23 | 
 24 | 
 25 | class PyTorchNode:
 26 |     """
 27 |     Represents a node in a PyTorch execution trace, initialized based on a schema version.
 28 | 
 29 |     Attributes
 30 |         schema (str): Schema version used for initialization.
 31 |         data_deps (List[PyTorchNode]): List of data-dependent parent nodes.
 32 |         children (List[PyTorchNode]): List of child nodes.
 33 |         gpu_children (List[PyTorchNode]): List of GPU-specific child nodes.
 34 |         record_param_comms_node (Optional[PyTorchNode]): Corresponding record_param_comms node.
 35 |         nccl_node (Optional[PyTorchNode]): Corresponding NCCL node.
 36 |         id (str): Identifier of the node.
 37 |         name (str): Name of the node.
 38 |         parent (Any): Parent of the node.
 39 |         inputs (Any): Inputs of the node.
 40 |         outputs (Any): Outputs of the node.
 41 |         inclusive_dur (Optional[float]): Inclusive duration of the node.
 42 |         exclusive_dur (float): Exclusive duration of the node.
 43 |         ts (Optional[float]): Timestamp of the node.
 44 |         inter_thread_dep (Any): Inter-thread dependency of the node.
 45 |         cat (Any): Category of the node.
 46 |         stream (int): Stream associated with the node.
 47 |         pg_name (str): Process Group name for the inter-GPU communication.
 48 |     """
 49 | 
 50 |     SUPPORTED_VERSIONS = ["1.0.2-chakra.0.0.4", "1.0.3-chakra.0.0.4", "1.1.0-chakra.0.0.4", "1.1.1-chakra.0.0.4"]
 51 | 
 52 |     def __init__(self, schema: str, node_data: Dict[str, Any]) -> None:
 53 |         """
 54 |         Initialize a PyTorchNode object using the node data and schema version provided.
 55 | 
 56 |         Args:
 57 |             schema (str): The schema version based on which the node will be initialized.
 58 |             node_data (Dict[str, Any]): Dictionary containing the data of the PyTorch node.
 59 |         """
 60 |         self.schema = schema
 61 |         self.data_deps: List["PyTorchNode"] = []
 62 |         self.children: List["PyTorchNode"] = []
 63 |         self.gpu_children: List["PyTorchNode"] = []
 64 |         self.record_param_comms_node: Optional["PyTorchNode"] = None
 65 |         self.nccl_node: Optional["PyTorchNode"] = None
 66 | 
 67 |         self.parse_data(node_data)
 68 | 
 69 |     def __repr__(self) -> str:
 70 |         """
 71 |         Provide a string representation of the PyTorchNode.
 72 | 
 73 |         Returns
 74 |             str: String representation of the node.
 75 |         """
 76 |         return (
 77 |             f"PyTorchNode(id={self.id}, name={self.name}, op_type={self.get_op_type()}, timestamp={self.ts}, "
 78 |             f"inclusive_duration={self.inclusive_dur}, exclusive_duration={self.exclusive_dur})"
 79 |         )
 80 | 
 81 |     def parse_data(self, node_data: Dict[str, Any]) -> None:
 82 |         """
 83 |         Parse node data based on the provided schema version.
 84 | 
 85 |         Args:
 86 |             node_data (Dict[str, Any]): The node data to be parsed.
 87 |         """
 88 |         if self.schema in self.SUPPORTED_VERSIONS:
 89 |             if self.schema in ["1.0.2-chakra.0.0.4", "1.0.3-chakra.0.0.4", "1.1.0-chakra.0.0.4", "1.1.1-chakra.0.0.4"]:
 90 |                 self._parse_data_1_0_3_chakra_0_0_4(node_data)
 91 |         else:
 92 |             raise ValueError(
 93 |                 f"Unsupported schema version '{self.schema}'. Please check if the schema version is in the list of "
 94 |                 f"supported versions: {self.SUPPORTED_VERSIONS}. The schema version of the trace is not supported by "
 95 |                 f"the converter. The schema version is determined by the PyTorch version used to collect Chakra host "
 96 |                 f"execution traces. Please consider changing the PyTorch version you are using. For more details, you "
 97 |                 f"can follow the git history of the relevant file: "
 98 |                 f"https://github.com/pytorch/pytorch/blob/7cd48df2dae7e2194438b162968c47d1f05bf20e/torch/csrc/"
 99 |                 f"profiler/standalone/execution_trace_observer.cpp#L308. Check which PyTorch versions generate Chakra "
100 |                 f"host traces that are supported by the converter."
101 |             )
102 | 
103 |     def _parse_data_1_0_3_chakra_0_0_4(self, node_data: Dict[str, Any]) -> None:
104 |         self.id = node_data["id"]
105 |         self.name = node_data["name"]
106 |         self.parent = node_data["ctrl_deps"]
107 |         self.inputs = node_data["inputs"]
108 |         self.outputs = node_data["outputs"]
109 |         self.inclusive_dur = node_data.get("inclusive_dur")
110 |         self.exclusive_dur = node_data.get("exclusive_dur", 0)
111 |         self.ts = node_data.get("ts")
112 |         self.inter_thread_dep = node_data.get("inter_thread_dep")
113 |         self.sync_dep = node_data.get("sync_dep")
114 |         self.cat = node_data.get("cat")
115 |         self.stream = node_data.get("stream", 0)
116 |         # In Colletive comms nodes, pg_name is in node_data if exists.
117 |         # In SendRecv nodes, pg_name is in the attrs if exists.
118 |         # Otherwise, pg_name is not present.
119 |         self.pg_name = node_data.get("pg_name", "")
120 | 
121 |         for attr in node_data.get("attrs", []):
122 |             setattr(self, attr["name"], attr["value"])
123 | 
124 |     def get_op_type(self) -> PyTorchNodeType:
125 |         """
126 |         Determine the type of PyTorch operation.
127 | 
128 |         Returns
129 |             PyTorchNodeType: The type of the PyTorch operation.
130 |         """
131 |         if "process_group:init" in self.name:
132 |             return PyTorchNodeType.METADATA
133 |         elif self.is_gpu_op():
134 |             return PyTorchNodeType.GPU_OP
135 |         elif hasattr(self, "op_schema") or hasattr(self, "outputs"):
136 |             return PyTorchNodeType.CPU_OP
137 |         else:
138 |             return PyTorchNodeType.LABEL
139 | 
140 |     def is_metadata_op(self) -> bool:
141 |         """
142 |         Check if the node is a METADATA operator.
143 | 
144 |         Returns
145 |             bool: True if the node is a METADATA operator, False otherwise.
146 |         """
147 |         return self.get_op_type() == PyTorchNodeType.METADATA
148 | 
149 |     def is_cpu_op(self) -> bool:
150 |         """
151 |         Check if the node is a CPU operator.
152 | 
153 |         Returns
154 |             bool: True if the node is a CPU operator, False otherwise.
155 |         """
156 |         return self.get_op_type() == PyTorchNodeType.CPU_OP
157 | 
158 |     def is_gpu_op(self) -> bool:
159 |         """
160 |         Check if the node is a GPU operator.
161 | 
162 |         Returns
163 |             bool: True if the node is a GPU operator, False otherwise.
164 |         """
165 |         return self.cat is not None
166 | 
167 |     def add_data_dep(self, parent_node: "PyTorchNode") -> None:
168 |         """
169 |         Add a data-dependent parent node to this node.
170 | 
171 |         Args:
172 |             parent_node (PyTorchNode): The parent node to be added.
173 |         """
174 |         self.data_deps.append(parent_node)
175 | 
176 |     def add_child(self, child_node: "PyTorchNode") -> None:
177 |         """
178 |         Add a child node to this node.
179 | 
180 |         Args:
181 |             child_node (PyTorchNode): The child node to be added.
182 |         """
183 |         self.children.append(child_node)
184 | 
185 |     def add_gpu_child(self, gpu_child_node: "PyTorchNode") -> None:
186 |         """
187 |         Add a child GPU node for this node.
188 | 
189 |         Args:
190 |             gpu_child_node (Optional[PyTorchNode]): The child GPU node to be added.
191 |         """
192 |         self.gpu_children.append(gpu_child_node)
193 | 
194 |     def is_record_param_comms_op(self) -> bool:
195 |         """
196 |         Check if the node is a record_param_comms operator.
197 | 
198 |         Returns
199 |             bool: True if the node is a record_param_comms operator, False otherwise.
200 |         """
201 |         return "record_param_comms" in self.name
202 | 
203 |     def is_nccl_op(self) -> bool:
204 |         """
205 |         Check if the node is a NCCL operator.
206 | 
207 |         Returns
208 |             bool: True if the node is a NCCL operator, False otherwise.
209 |         """
210 |         return "nccl:" in self.name
211 | 
212 |     @property
213 |     def comm_size(self) -> int:
214 |         """
215 |         Calculate the communication size for the given input types and shapes.
216 | 
217 |         Returns
218 |             int: The calculated communication size.
219 |         """
220 |         comm_size = 0
221 |         for input_value, input_type in zip(self.inputs["values"], self.inputs["types"]):
222 |             if "Tensor" in input_type:
223 |                 if input_type.startswith("GenericList[Tensor"):
224 |                     for inner_value in input_value:
225 |                         tensor = PyTorchTensor(inner_value)
226 |                         input_size = tensor.num_elem * tensor.elem_bytes
227 |                         comm_size += input_size
228 |                 else:
229 |                     tensor = PyTorchTensor(input_value)
230 |                     input_size = tensor.num_elem * tensor.elem_bytes
231 |                     comm_size += input_size
232 |         return comm_size
233 | 
234 |     @staticmethod
235 |     def get_data_type_size(data_type: str) -> int:
236 |         """
237 |         Return the data type size of a given data type in string.
238 | 
239 |         Args:
240 |             data_type (str): The data type as a string.
241 | 
242 |         Returns:
243 |             int: The size of the data type in bytes.
244 | 
245 |         Raises:
246 |             ValueError: If the data type is not supported.
247 |         """
248 |         data_type_size_map = {
249 |             "Tensor(float32)": 4,
250 |             "Tensor(float)": 4,
251 |             "Tensor(float64)": 8,
252 |             "Tensor(double)": 8,
253 |             "Tensor(float16)": 2,
254 |             "Tensor(half)": 2,
255 |             "Tensor(bfloat16)": 2,
256 |             "Tensor(complex64)": 8,
257 |             "Tensor(complex128)": 16,
258 |             "Tensor(uint8)": 1,
259 |             "Tensor(int8)": 1,
260 |             "Tensor(int16)": 2,
261 |             "Tensor(short)": 2,
262 |             "Tensor(int32)": 4,
263 |             "Tensor(int)": 4,
264 |             "Tensor(int64)": 8,
265 |             "Tensor(long)": 8,
266 |             "Tensor(c10::Half)": 2,
267 |             "Tensor(c10::BFloat16)": 2,
268 |             "Tensor(unsigned char)": 1,
269 |             "Tensor(long int)": 8,
270 |             # TODO: Add more types
271 |         }
272 |         try:
273 |             return data_type_size_map[data_type]
274 |         except KeyError as e:
275 |             traceback_str = traceback.format_exc()
276 |             raise ValueError(
277 |                 f"Unsupported data type: {data_type}. The data_type_size_map dictionary is used for mapping the "
278 |                 f"number of bytes for a given tensor data type. This dictionary may be incomplete. Please update the "
279 |                 f"data_type_size_map or report this issue to the maintainer by creating an issue. Traceback:\n"
280 |                 f"{traceback_str}"
281 |             ) from e
282 | 


--------------------------------------------------------------------------------
/src/trace_link/chakra_device_trace_loader.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | import sys
  3 | from concurrent.futures import ThreadPoolExecutor, as_completed
  4 | from typing import Dict, List, Tuple
  5 | 
  6 | from et_replay.utils import read_dictionary_from_json_file
  7 | 
  8 | from .kineto_operator import KinetoOperator
  9 | 
 10 | 
 11 | class ChakraDeviceTraceLoader:
 12 |     """Loads Chakra device traces."""
 13 | 
 14 |     def load(
 15 |         self, chakra_device_trace: str
 16 |     ) -> Tuple[
 17 |         List[KinetoOperator],
 18 |         Dict[int, List[KinetoOperator]],
 19 |         Dict[int, List[KinetoOperator]],
 20 |         Dict[int, KinetoOperator],
 21 |         List[KinetoOperator],
 22 |         Dict[int, KinetoOperator],
 23 |         Dict[int, KinetoOperator],
 24 |         int,
 25 |         int,
 26 |         Dict[int, Tuple[int, int]],
 27 |         Dict[int, KinetoOperator],
 28 |         List[KinetoOperator],
 29 |         List[int],
 30 |         Dict[int, KinetoOperator],
 31 |     ]:
 32 |         """
 33 |         Load and process the Chakra device trace.
 34 | 
 35 |         Args:
 36 |             chakra_device_trace (str): Path to the Chakra device trace file.
 37 | 
 38 |         Returns:
 39 |             Tuple containing various data structures needed for linking traces.
 40 |         """
 41 |         logging.debug(f"Starting to load Chakra device trace from file: {chakra_device_trace}.")
 42 |         chakra_trace_data = read_dictionary_from_json_file(chakra_device_trace)
 43 |         sorted_kineto_ops = sorted(
 44 |             [KinetoOperator(op) for op in chakra_trace_data["traceEvents"]],
 45 |             key=lambda op: op.timestamp,
 46 |         )
 47 | 
 48 |         dev_data = self.construct_dev_data_structures(sorted_kineto_ops, chakra_device_trace)
 49 |         self.calculate_exclusive_dur(dev_data["kineto_tid_cpu_ops_map"])
 50 | 
 51 |         dev_data["sorted_kineto_cpu_ops"] = sorted(dev_data["kineto_cpu_ops"], key=lambda op: op.timestamp)
 52 |         dev_data["sorted_kineto_cpu_op_ts"] = [op.timestamp for op in dev_data["sorted_kineto_cpu_ops"]]
 53 | 
 54 |         logging.debug(
 55 |             f"Processed Chakra device trace with {len(dev_data['kineto_cpu_ops'])} CPU ops, "
 56 |             f"{len(dev_data['kineto_id_cuda_launch_op_map'])} CPU launcher ops, "
 57 |             f"and {len(dev_data['kineto_gpu_ops'])} GPU ops."
 58 |         )
 59 |         logging.debug("Chakra device trace has been loaded and processed successfully.")
 60 |         return (
 61 |             dev_data["kineto_cpu_ops"],
 62 |             dev_data["kineto_tid_ops_map"],
 63 |             dev_data["kineto_tid_cpu_ops_map"],
 64 |             dev_data["kineto_correlation_cuda_runtime_map"],
 65 |             dev_data["kineto_gpu_ops"],
 66 |             dev_data["kineto_id_arrow_op_map"],
 67 |             dev_data["kineto_id_cuda_launch_op_map"],
 68 |             dev_data["kineto_process_start_time"],
 69 |             dev_data["kineto_process_end_time"],
 70 |             dev_data["kineto_thread_info"],
 71 |             dev_data["kineto_rf_id_to_kineto_op_map"],
 72 |             dev_data["sorted_kineto_cpu_ops"],
 73 |             dev_data["sorted_kineto_cpu_op_ts"],
 74 |             dev_data["kineto_external_id_to_kineto_op_map"],
 75 |         )
 76 | 
 77 |     def construct_dev_data_structures(self, kineto_ops: List[KinetoOperator], trace_file: str) -> Dict:
 78 |         """
 79 |         Construct necessary data structures required for trace linking from the provided Kineto operators.
 80 | 
 81 |         This method identifies process start time, end time, thread start time, and end time, and also categorizes
 82 |         operators into CPU, GPU, and other relevant groups.
 83 | 
 84 |         Args:
 85 |             kineto_ops (List[KinetoOperator]): List of Kineto operators to categorize.
 86 |             trace_file (str): Path to the trace file for logging purposes.
 87 | 
 88 |         Returns:
 89 |             Dict: Dictionary containing categorized operators and timing boundaries.
 90 |         """
 91 |         logging.debug("Categorizing Kineto operators and calculating timing boundaries.")
 92 |         process_start_time = sys.maxsize
 93 |         process_end_time = 0
 94 |         thread_info = {}
 95 | 
 96 |         kineto_cpu_ops = []
 97 |         kineto_tid_ops_map = {}
 98 |         kineto_tid_cpu_ops_map = {}
 99 |         kineto_correlation_cuda_runtime_map = {}
100 |         kineto_gpu_ops = []
101 |         kineto_id_arrow_op_map = {}
102 |         kineto_id_cuda_launch_op_map = {}
103 |         kineto_external_id_to_kineto_op_map = {}
104 | 
105 |         for op in kineto_ops:
106 |             kineto_tid_ops_map.setdefault(op.tid, []).append(op)
107 | 
108 |             if op.is_cpu_op():
109 |                 kineto_cpu_ops.append(op)
110 |                 kineto_tid_cpu_ops_map.setdefault(op.tid, []).append(op)
111 |                 logging.debug(f"Added CPU or user annotation op: {op.name}")
112 | 
113 |             elif op.is_kernel_launch_op():
114 |                 kineto_id_cuda_launch_op_map[op.external_id] = op
115 |                 if op.correlation in kineto_correlation_cuda_runtime_map:
116 |                     error_msg = (
117 |                         f"Duplicate correlation ID {op.correlation} found in kineto_id_cuda_launch_op_map. "
118 |                         "The kineto_id_cuda_launch_op_map works as a mapping to link GPU operators with the launcher "
119 |                         "CPU operator for the GPU operator. The correlation field works as a link, and this map has a "
120 |                         "mapping between the correlation and the launcher operator. Each kernel launch operator "
121 |                         "should have a unique correlation ID for linking it to a GPU operator. Therefore, duplicated "
122 |                         "correlation is not expected in the map. Please review the file manually to see if the "
123 |                         f"operator has an invalid correlation value in file: {trace_file}."
124 |                     )
125 |                     logging.error(error_msg)
126 |                     raise ValueError(error_msg)
127 |                 kineto_correlation_cuda_runtime_map[op.correlation] = op
128 |                 logging.debug(f"Added CPU launcher op: {op.name}")
129 | 
130 |             elif op.is_gpu_op():
131 |                 kineto_gpu_ops.append(op)
132 |                 logging.debug(f"Added GPU op: {op.name}")
133 | 
134 |             elif op.is_ac2g_op():  # arrow from CPU to GPU
135 |                 assert (op.phase == "s") or (op.phase == "f")
136 |                 if op.id is None:
137 |                     error_msg = (
138 |                         f"'id' field is None in Kineto operator: {op} in file: {trace_file}. This is unexpected as "
139 |                         "'id' should generally be populated for 'ac2g' operators. Please verify the validity of "
140 |                         "the Kineto trace and the operator data."
141 |                     )
142 |                     logging.error(error_msg)
143 |                     raise KeyError(error_msg)
144 | 
145 |                 kineto_id_arrow_op_map[op.id] = op
146 | 
147 |             # Update timing boundaries
148 |             if op.tid is not None:
149 |                 process_start_time = min(process_start_time, op.timestamp)
150 |                 process_end_time = max(process_end_time, op.timestamp + op.inclusive_dur)
151 |                 thread_start_end = thread_info.setdefault(op.tid, [sys.maxsize, 0])
152 |                 thread_start_end[0] = min(thread_start_end[0], op.timestamp)
153 |                 thread_start_end[1] = max(thread_start_end[1], op.timestamp + op.inclusive_dur)
154 | 
155 |             if op.external_id is not None:
156 |                 kineto_external_id_to_kineto_op_map[op.external_id] = op
157 | 
158 |         kineto_rf_id_to_kineto_op_map = {op.rf_id: op for op in kineto_cpu_ops if op.rf_id is not None}
159 | 
160 |         return {
161 |             "kineto_cpu_ops": kineto_cpu_ops,
162 |             "kineto_tid_ops_map": kineto_tid_ops_map,
163 |             "kineto_tid_cpu_ops_map": kineto_tid_cpu_ops_map,
164 |             "kineto_correlation_cuda_runtime_map": kineto_correlation_cuda_runtime_map,
165 |             "kineto_gpu_ops": kineto_gpu_ops,
166 |             "kineto_id_arrow_op_map": kineto_id_arrow_op_map,
167 |             "kineto_id_cuda_launch_op_map": kineto_id_cuda_launch_op_map,
168 |             "kineto_process_start_time": process_start_time,
169 |             "kineto_process_end_time": process_end_time,
170 |             "kineto_thread_info": thread_info,
171 |             "kineto_rf_id_to_kineto_op_map": kineto_rf_id_to_kineto_op_map,
172 |             "sorted_kineto_cpu_ops": [],
173 |             "sorted_kineto_cpu_op_ts": [],
174 |             "kineto_external_id_to_kineto_op_map": kineto_external_id_to_kineto_op_map,
175 |         }
176 | 
177 |     def calculate_exclusive_dur(self, kineto_tid_cpu_ops_map: Dict[int, List[KinetoOperator]]) -> None:
178 |         """
179 |         Calculate the exclusive duration of each operator in the Kineto traces in parallel.
180 | 
181 |         The exclusive duration is defined as the total duration of the operator minus any time spent in child operators,
182 |         effectively representing the time spent exclusively in that operator.
183 | 
184 |         Args:
185 |             kineto_tid_cpu_ops_map (Dict[int, List[KinetoOperator]]): Map of thread IDs to their corresponding Kineto
186 |                 operators.
187 |         """
188 |         logging.debug("Calculating exclusive durations for Kineto operators in parallel.")
189 | 
190 |         def process_ops_for_thread(ops: List[KinetoOperator]) -> None:
191 |             logging.debug(f"Processing {len(ops)} operators in thread.")
192 |             sorted_ops = sorted(ops, key=lambda op: (op.timestamp, op.inclusive_dur))
193 |             for i, op in enumerate(sorted_ops):
194 |                 exclusive_dur = op.inclusive_dur
195 |                 overlapping_regions = []
196 | 
197 |                 # Identify overlapping regions with child operators
198 |                 for child_op in sorted_ops[i + 1 :]:
199 |                     if child_op.timestamp >= op.timestamp and (child_op.timestamp + child_op.inclusive_dur) <= (
200 |                         op.timestamp + op.inclusive_dur
201 |                     ):
202 |                         overlap_start = child_op.timestamp
203 |                         overlap_end = child_op.timestamp + child_op.inclusive_dur
204 |                         overlapping_regions.append((overlap_start, overlap_end))
205 |                     if (op.timestamp + op.inclusive_dur) < child_op.timestamp:
206 |                         break
207 | 
208 |                 # Merge overlapping regions and calculate exclusive duration
209 |                 merged_regions = self.merge_overlapping_intervals(overlapping_regions)
210 |                 for start, end in merged_regions:
211 |                     exclusive_dur -= end - start
212 | 
213 |                 # Check if exclusive_dur is not negative or zero
214 |                 if exclusive_dur < 0:
215 |                     error_msg = (
216 |                         f"Exclusive duration calculation error for node '{op.name}' "
217 |                         f"(ts: {op.timestamp}, inclusive_dur: {op.inclusive_dur}, rf_id: {op.rf_id}): "
218 |                         f"Duration cannot be less than zero."
219 |                     )
220 |                     logging.error(error_msg)
221 |                     raise ValueError(error_msg)
222 | 
223 |                 op.exclusive_dur = exclusive_dur
224 |                 logging.debug(
225 |                     f"Node '{op.name}' (ts: {op.timestamp}, inclusive_dur: {op.inclusive_dur}, "
226 |                     f"rf_id: {op.rf_id}) exclusive duration: {op.exclusive_dur} microseconds."
227 |                 )
228 | 
229 |         with ThreadPoolExecutor() as executor:
230 |             futures = [executor.submit(process_ops_for_thread, ops) for ops in kineto_tid_cpu_ops_map.values()]
231 | 
232 |             for future in as_completed(futures):
233 |                 future.result()  # Wait for all threads to complete and handle any exceptions
234 | 
235 |         logging.debug("Exclusive durations for Kineto operators calculated successfully.")
236 | 
237 |     @staticmethod
238 |     def merge_overlapping_intervals(intervals: List[Tuple[int, int]]) -> List[Tuple[int, int]]:
239 |         """
240 |         Merge overlapping intervals into a single interval.
241 | 
242 |         Args:
243 |             intervals (List[Tuple[int, int]]): List of intervals.
244 | 
245 |         Returns:
246 |             List[Tuple[int, int]]: List of merged intervals.
247 |         """
248 |         if not intervals:
249 |             return []
250 | 
251 |         # Sort intervals based on the start time
252 |         intervals.sort(key=lambda x: x[0])
253 |         merged = [intervals[0]]
254 | 
255 |         for current in intervals:
256 |             prev = merged[-1]
257 |             if current[0] <= prev[1]:
258 |                 # There is overlap, merge the current interval with the previous one
259 |                 merged[-1] = (prev[0], max(prev[1], current[1]))
260 |             else:
261 |                 # No overlap, add the current interval
262 |                 merged.append(current)
263 | 
264 |         return merged
265 | 


--------------------------------------------------------------------------------
/src/generator/generator.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | 
  3 | from ...schema.protobuf.et_def_pb2 import (
  4 |     ALL_GATHER,
  5 |     ALL_REDUCE,
  6 |     ALL_TO_ALL,
  7 |     BARRIER,
  8 |     BROADCAST,
  9 |     COMM_COLL_NODE,
 10 |     COMM_RECV_NODE,
 11 |     COMM_SEND_NODE,
 12 |     COMP_NODE,
 13 |     MEM_LOAD_NODE,
 14 |     MEM_STORE_NODE,
 15 |     METADATA_NODE,
 16 |     REDUCE_SCATTER,
 17 |     BoolList,
 18 |     BytesList,
 19 |     DoubleList,
 20 |     Fixed32List,
 21 |     Fixed64List,
 22 |     FloatList,
 23 |     GlobalMetadata,
 24 |     Int32List,
 25 |     Int64List,
 26 |     Sfixed32List,
 27 |     Sfixed64List,
 28 |     Sint32List,
 29 |     Sint64List,
 30 |     StringList,
 31 |     Uint32List,
 32 |     Uint64List,
 33 | )
 34 | from ...schema.protobuf.et_def_pb2 import (
 35 |     AttributeProto as ChakraAttr,
 36 | )
 37 | from ...schema.protobuf.et_def_pb2 import (
 38 |     Node as ChakraNode,
 39 | )
 40 | from ...schema.protobuf.et_def_pb2 import (
 41 |     NodeType as ChakraNodeType,
 42 | )
 43 | from ..third_party.utils.protolib import encodeMessage as encode_message
 44 | 
 45 | NODE_ID = 0
 46 | 
 47 | 
 48 | def get_node(node_name: str, node_type: ChakraNodeType) -> ChakraNode:
 49 |     """Generate a new ChakraNode with a unique ID."""
 50 |     global NODE_ID
 51 |     node = ChakraNode()
 52 |     node.id = NODE_ID
 53 |     node.name = node_name
 54 |     node.type = node_type
 55 |     NODE_ID += 1
 56 |     return node
 57 | 
 58 | 
 59 | def get_comm_type_attr(comm_type: int) -> ChakraAttr:
 60 |     """Create a communication type attribute."""
 61 |     return ChakraAttr(name="comm_type", int64_val=comm_type)
 62 | 
 63 | 
 64 | def one_metadata_node_all_types(num_npus: int) -> None:
 65 |     """Generate metadata nodes with all types of attributes."""
 66 |     for npu_id in range(num_npus):
 67 |         output_filename = f"one_metadata_node_all_types.{npu_id}.et"
 68 |         with open(output_filename, "wb") as et:
 69 |             encode_message(et, GlobalMetadata(version="0.0.4"))
 70 | 
 71 |             node = get_node("METADATA_NODE", METADATA_NODE)
 72 |             node.attr.extend(
 73 |                 [
 74 |                     ChakraAttr(name="double", double_val=1.2345, doc_string="double"),
 75 |                     ChakraAttr(name="double_list", double_list=DoubleList(values=[1.2345, 2.3456])),
 76 |                     ChakraAttr(name="float", float_val=1.2345, doc_string="float"),
 77 |                     ChakraAttr(name="float_list", float_list=FloatList(values=[1.2345, 2.3456])),
 78 |                     ChakraAttr(name="int32", int32_val=12345, doc_string="int32"),
 79 |                     ChakraAttr(name="int32_list", int32_list=Int32List(values=[12345, 23456])),
 80 |                     ChakraAttr(name="int64", int64_val=9876543210, doc_string="int64"),
 81 |                     ChakraAttr(name="int64_list", int64_list=Int64List(values=[9876543210, 1234567890])),
 82 |                     ChakraAttr(name="uint32", uint32_val=12345, doc_string="uint32"),
 83 |                     ChakraAttr(name="uint32_list", uint32_list=Uint32List(values=[12345, 23456])),
 84 |                     ChakraAttr(name="uint64", uint64_val=9876543210, doc_string="uint64"),
 85 |                     ChakraAttr(name="uint64_list", uint64_list=Uint64List(values=[9876543210, 1234567890])),
 86 |                     ChakraAttr(name="sint32", sint32_val=-12345, doc_string="sint32"),
 87 |                     ChakraAttr(name="sint32_list", sint32_list=Sint32List(values=[12345, -23456])),
 88 |                     ChakraAttr(name="sint64", sint64_val=-9876543210, doc_string="sint64"),
 89 |                     ChakraAttr(name="sint64_list", sint64_list=Sint64List(values=[9876543210, -1234567890])),
 90 |                     ChakraAttr(name="fixed32", fixed32_val=12345),
 91 |                     ChakraAttr(name="fixed32_list", fixed32_list=Fixed32List(values=[12345, 23456])),
 92 |                     ChakraAttr(name="fixed64", fixed64_val=9876543210),
 93 |                     ChakraAttr(name="fixed64_list", fixed64_list=Fixed64List(values=[9876543210, 1234567890])),
 94 |                     ChakraAttr(name="sfixed32", sfixed32_val=-12345),
 95 |                     ChakraAttr(name="sfixed32_list", sfixed32_list=Sfixed32List(values=[12345, -23456])),
 96 |                     ChakraAttr(name="sfixed64", sfixed64_val=-9876543210),
 97 |                     ChakraAttr(name="sfixed64_list", sfixed64_list=Sfixed64List(values=[9876543210, -1234567890])),
 98 |                     ChakraAttr(name="bool", bool_val=True, doc_string="bool"),
 99 |                     ChakraAttr(name="bool_list", bool_list=BoolList(values=[i % 2 == 0 for i in range(10)])),
100 |                     ChakraAttr(name="string", string_val="12345", doc_string="string"),
101 |                     ChakraAttr(name="string_list", string_list=StringList(values=[str(12345 + i) for i in range(10)])),
102 |                     ChakraAttr(name="bytes", bytes_val=bytes("12345", "utf-8")),
103 |                     ChakraAttr(
104 |                         name="bytes_list",
105 |                         bytes_list=BytesList(values=[bytes(str(12345 + i), "utf-8") for i in range(10)]),
106 |                     ),
107 |                 ]
108 |             )
109 | 
110 |             encode_message(et, node)
111 | 
112 | 
113 | def one_remote_mem_load_node(num_npus: int, tensor_size: int) -> None:
114 |     """Generate remote memory load nodes."""
115 |     for npu_id in range(num_npus):
116 |         output_filename = f"one_remote_mem_load_node.{npu_id}.et"
117 |         with open(output_filename, "wb") as et:
118 |             encode_message(et, GlobalMetadata(version="0.0.4"))
119 | 
120 |             node = get_node("MEM_LOAD_NODE", MEM_LOAD_NODE)
121 |             node.attr.append(ChakraAttr(name="is_cpu_op", bool_val=False))
122 |             node.attr.append(ChakraAttr(name="tensor_size", uint64_val=tensor_size))
123 |             encode_message(et, node)
124 | 
125 | 
126 | def one_remote_mem_store_node(num_npus: int, tensor_size: int) -> None:
127 |     """Generate remote memory store nodes."""
128 |     for npu_id in range(num_npus):
129 |         output_filename = f"one_remote_mem_store_node.{npu_id}.et"
130 |         with open(output_filename, "wb") as et:
131 |             encode_message(et, GlobalMetadata(version="0.0.4"))
132 | 
133 |             node = get_node("MEM_STORE_NODE", MEM_STORE_NODE)
134 |             node.attr.append(ChakraAttr(name="is_cpu_op", bool_val=False))
135 |             node.attr.append(ChakraAttr(name="tensor_size", uint64_val=tensor_size))
136 |             encode_message(et, node)
137 | 
138 | 
139 | def one_comp_node(num_npus: int, runtime: int) -> None:
140 |     """Generate computation nodes with a given runtime."""
141 |     for npu_id in range(num_npus):
142 |         output_filename = f"one_comp_node.{npu_id}.et"
143 |         with open(output_filename, "wb") as et:
144 |             encode_message(et, GlobalMetadata(version="0.0.4"))
145 | 
146 |             node = get_node("COMP_NODE", COMP_NODE)
147 |             node.attr.append(ChakraAttr(name="is_cpu_op", bool_val=False))
148 |             node.duration_micros = runtime
149 |             encode_message(et, node)
150 | 
151 | 
152 | def two_comp_nodes_independent(num_npus: int, runtime: int) -> None:
153 |     """Generate two independent computation nodes."""
154 |     for npu_id in range(num_npus):
155 |         output_filename = f"two_comp_nodes_independent.{npu_id}.et"
156 |         with open(output_filename, "wb") as et:
157 |             encode_message(et, GlobalMetadata(version="0.0.4"))
158 | 
159 |             for _ in range(2):
160 |                 node = get_node("COMP_NODE", COMP_NODE)
161 |                 node.attr.append(ChakraAttr(name="is_cpu_op", bool_val=False))
162 |                 node.duration_micros = runtime
163 |                 encode_message(et, node)
164 | 
165 | 
166 | def two_comp_nodes_dependent(num_npus: int, runtime: int) -> None:
167 |     """Generate two dependent computation nodes."""
168 |     for npu_id in range(num_npus):
169 |         output_filename = f"two_comp_nodes_dependent.{npu_id}.et"
170 |         with open(output_filename, "wb") as et:
171 |             encode_message(et, GlobalMetadata(version="0.0.4"))
172 | 
173 |             parent_node = get_node("COMP_NODE", COMP_NODE)
174 |             parent_node.attr.append(ChakraAttr(name="is_cpu_op", bool_val=False))
175 |             parent_node.duration_micros = runtime
176 |             encode_message(et, parent_node)
177 | 
178 |             child_node = get_node("COMP_NODE", COMP_NODE)
179 |             child_node.attr.append(ChakraAttr(name="is_cpu_op", bool_val=False))
180 |             child_node.duration_micros = runtime
181 |             child_node.data_deps.append(parent_node.id)
182 |             encode_message(et, child_node)
183 | 
184 | 
185 | def generate_comm_coll_node(num_npus: int, comm_size: int, comm_type: int, node_name: str) -> None:
186 |     """Generate communication collective nodes."""
187 |     for npu_id in range(num_npus):
188 |         output_filename = f"{node_name}.{npu_id}.et"
189 |         with open(output_filename, "wb") as et:
190 |             encode_message(et, GlobalMetadata(version="0.0.4"))
191 | 
192 |             node = get_node(node_name, COMM_COLL_NODE)
193 |             node.attr.append(ChakraAttr(name="is_cpu_op", bool_val=False))
194 |             node.attr.extend([get_comm_type_attr(comm_type), ChakraAttr(name="comm_size", int64_val=comm_size)])
195 |             encode_message(et, node)
196 | 
197 | 
198 | def one_comm_coll_node_allreduce(num_npus: int, comm_size: int) -> None:
199 |     """Generate one AllReduce communication collective node."""
200 |     generate_comm_coll_node(num_npus, comm_size, ALL_REDUCE, "ALL_REDUCE")
201 | 
202 | 
203 | def one_comm_coll_node_alltoall(num_npus: int, comm_size: int) -> None:
204 |     """Generate one AllToAll communication collective node."""
205 |     generate_comm_coll_node(num_npus, comm_size, ALL_TO_ALL, "ALL_TO_ALL")
206 | 
207 | 
208 | def one_comm_coll_node_allgather(num_npus: int, comm_size: int) -> None:
209 |     """Generate one AllGather communication collective node."""
210 |     generate_comm_coll_node(num_npus, comm_size, ALL_GATHER, "ALL_GATHER")
211 | 
212 | 
213 | def one_comm_coll_node_reducescatter(num_npus: int, comm_size: int) -> None:
214 |     """Generate one ReduceScatter communication collective node."""
215 |     generate_comm_coll_node(num_npus, comm_size, REDUCE_SCATTER, "REDUCE_SCATTER")
216 | 
217 | 
218 | def one_comm_coll_node_broadcast(num_npus: int, comm_size: int) -> None:
219 |     """Generate one Broadcast communication collective node."""
220 |     generate_comm_coll_node(num_npus, comm_size, BROADCAST, "BROADCAST")
221 | 
222 | 
223 | def one_comm_coll_node_barrier(num_npus: int) -> None:
224 |     """Generate one Barrier communication collective node."""
225 |     generate_comm_coll_node(num_npus, comm_size=0, comm_type=BARRIER, node_name="BARRIER")
226 | 
227 | 
228 | def one_comm_send_node(num_npus: int, tensor_size: int) -> None:
229 |     """Generate communication send nodes."""
230 |     for npu_id in range(num_npus):
231 |         output_filename = f"one_comm_send_node.{npu_id}.et"
232 |         with open(output_filename, "wb") as et:
233 |             encode_message(et, GlobalMetadata(version="0.0.4"))
234 | 
235 |             node = get_node("COMM_SEND_NODE", COMM_SEND_NODE)
236 |             node.attr.append(ChakraAttr(name="is_cpu_op", bool_val=False))
237 |             node.attr.append(ChakraAttr(name="tensor_size", uint64_val=tensor_size))
238 |             encode_message(et, node)
239 | 
240 | 
241 | def one_comm_recv_node(num_npus: int, tensor_size: int) -> None:
242 |     """Generate communication receive nodes."""
243 |     for npu_id in range(num_npus):
244 |         output_filename = f"one_comm_recv_node.{npu_id}.et"
245 |         with open(output_filename, "wb") as et:
246 |             encode_message(et, GlobalMetadata(version="0.0.4"))
247 | 
248 |             node = get_node("COMM_RECV_NODE", COMM_RECV_NODE)
249 |             node.attr.append(ChakraAttr(name="is_cpu_op", bool_val=False))
250 |             node.attr.append(ChakraAttr(name="tensor_size", uint64_val=tensor_size))
251 |             encode_message(et, node)
252 | 
253 | 
254 | def main() -> None:
255 |     parser = argparse.ArgumentParser(description="Execution Trace Generator")
256 |     parser.add_argument("--num_npus", type=int, default=64, help="Number of NPUs")
257 |     parser.add_argument("--default_runtime", type=int, default=5, help="Default runtime of compute nodes")
258 |     parser.add_argument("--default_tensor_size", type=int, default=1024, help="Default tensor size of memory nodes")
259 |     parser.add_argument(
260 |         "--default_comm_size", type=int, default=65536, help="Default communication size of communication nodes"
261 |     )
262 |     args = parser.parse_args()
263 | 
264 |     one_metadata_node_all_types(args.num_npus)
265 |     one_remote_mem_load_node(args.num_npus, args.default_tensor_size)
266 |     one_remote_mem_store_node(args.num_npus, args.default_tensor_size)
267 |     one_comp_node(args.num_npus, args.default_runtime)
268 |     two_comp_nodes_independent(args.num_npus, args.default_runtime)
269 |     two_comp_nodes_dependent(args.num_npus, args.default_runtime)
270 |     one_comm_coll_node_allreduce(args.num_npus, args.default_comm_size)
271 |     one_comm_coll_node_alltoall(args.num_npus, args.default_comm_size)
272 |     one_comm_coll_node_allgather(args.num_npus, args.default_comm_size)
273 |     one_comm_coll_node_reducescatter(args.num_npus, args.default_comm_size)
274 |     one_comm_coll_node_broadcast(args.num_npus, args.default_comm_size)
275 |     one_comm_coll_node_barrier(args.num_npus)
276 |     one_comm_send_node(args.num_npus, args.default_tensor_size)
277 |     one_comm_recv_node(args.num_npus, args.default_tensor_size)
278 | 
279 | 
280 | if __name__ == "__main__":
281 |     main()
282 | 


--------------------------------------------------------------------------------