├── .gitmodules ├── proto.lock ├── requirements-dev.txt ├── doc └── timeline_visualizer.png ├── tests ├── data │ ├── json_trace.tar.gz │ ├── 1.0.2-chakra.0.0.4.tgz │ └── feeder_tests_trace.tar.gz ├── trace_link │ ├── test_chakra_host_trace_loader.py │ ├── test_chakra_device_trace_loader.py │ ├── test_unique_id_assigner.py │ └── test_kineto_operator.py ├── jsonizer │ └── test_jsonizer.py ├── converter │ ├── test_pytorch_tensor.py │ ├── test_pytorch_node.py │ └── test_pytorch_converter.py ├── visualizer │ └── test_visualizer.py └── feeder │ ├── tests.cpp │ └── wrapper_tests.cpp ├── setup.cfg ├── .gitignore ├── .github ├── CODEOWNERS ├── ISSUE_TEMPLATE │ ├── general_question.md │ ├── feature_request.md │ └── bug_report.md ├── workflows │ ├── cpp_lint.yml │ ├── python_lint.yml │ ├── python_tests.yml │ ├── end_to_end_tests.yml │ ├── feeder_tests.yml │ ├── cla.yml │ └── codeql.yml └── PULL_REQUEST_TEMPLATE.md ├── setup.py ├── CONTRIBUTING.md ├── src ├── jsonizer │ └── jsonizer.py ├── feeder │ ├── et_feeder.h │ ├── wrapper_node.h │ ├── et_feeder_node.h │ ├── json_node.h │ ├── json_node.cpp │ ├── et_feeder.cpp │ └── et_feeder_node.cpp ├── trace_link │ ├── chakra_host_trace_loader.py │ ├── trace_link.py │ ├── unique_id_assigner.py │ ├── kineto_operator.py │ └── chakra_device_trace_loader.py ├── visualizer │ └── visualizer.py ├── converter │ ├── pytorch_tensor.py │ ├── converter.py │ └── pytorch_node.py ├── timeline_visualizer │ └── timeline_visualizer.py ├── third_party │ └── utils │ │ ├── protoio.hh │ │ ├── protolib.py │ │ └── protoio.cc └── generator │ └── generator.py ├── README.md ├── .clang-format ├── pyproject.toml ├── schema └── protobuf │ └── et_def.proto ├── USER_GUIDE.md └── LICENSE.md /.gitmodules: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /proto.lock: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /requirements-dev.txt: -------------------------------------------------------------------------------- 1 | pyright==1.1.359 2 | pytest==8.1.1 3 | ruff==0.3.7 4 | vulture==2.11 5 | -------------------------------------------------------------------------------- /doc/timeline_visualizer.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mlcommons/chakra/HEAD/doc/timeline_visualizer.png -------------------------------------------------------------------------------- /tests/data/json_trace.tar.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mlcommons/chakra/HEAD/tests/data/json_trace.tar.gz -------------------------------------------------------------------------------- /tests/data/1.0.2-chakra.0.0.4.tgz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mlcommons/chakra/HEAD/tests/data/1.0.2-chakra.0.0.4.tgz -------------------------------------------------------------------------------- /tests/data/feeder_tests_trace.tar.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mlcommons/chakra/HEAD/tests/data/feeder_tests_trace.tar.gz -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [build_grpc] 2 | proto_files = et_def.proto 3 | grpc_files = et_def.proto 4 | proto_path = schema/protobuf/ 5 | output_path = schema/protobuf/ 6 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.egg-info/ 2 | build/ 3 | *_pb2*.py* 4 | *.pyc 5 | __pycache__/ 6 | *.egg 7 | *.et 8 | *.dot 9 | .pyre 10 | *et_def.pb.cc 11 | *et_def.pb.h -------------------------------------------------------------------------------- /.github/CODEOWNERS: -------------------------------------------------------------------------------- 1 | # These owners will be the default owners for everything in the repo. 2 | # Unless a later match takes precedence,they will be requested for review when someone opens a pull request. 3 | * @mlcommons/wg-chakra 4 | 5 | /CODEOWNERS @mlcommons/staff 6 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/general_question.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: General question 3 | about: Ask a question or seek clarification about the project 4 | title: '' 5 | labels: 'question' 6 | assignees: '' 7 | --- 8 | 9 | > Please provide a detailed description of your question or the information you seek. 10 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature_request.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Feature request 3 | about: Suggest an idea for this project 4 | title: '' 5 | labels: '' 6 | assignees: '' 7 | --- 8 | 9 | ## Problem Related to the Feature 10 | > A clear and concise description of what the problem is. 11 | 12 | ## Proposed Solution 13 | > A clear and concise description of what you want to happen. 14 | -------------------------------------------------------------------------------- /.github/workflows/cpp_lint.yml: -------------------------------------------------------------------------------- 1 | name: C++ Lint 2 | 3 | on: pull_request 4 | 5 | jobs: 6 | cpp-lint: 7 | runs-on: ubuntu-latest 8 | 9 | steps: 10 | - name: Checkout Code 11 | uses: actions/checkout@v2 12 | 13 | - name: Format and Lint C++ Code 14 | uses: DoozyX/clang-format-lint-action@v0.18.1 15 | with: 16 | source: '.' 17 | extensions: 'cc,cpp,h,hh' 18 | clangFormatVersion: 16 19 | style: file 20 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug_report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Bug report 3 | about: Create a report to help us improve 4 | title: '' 5 | labels: '' 6 | assignees: '' 7 | --- 8 | 9 | ## Describe the Bug 10 | > A clear and concise description of what the bug is. 11 | 12 | ## Steps to Reproduce 13 | > Steps to reproduce the behavior. 14 | > Please include the version information where the bug was observed. 15 | 16 | ## Expected Behavior 17 | > A clear and concise description of what you expected to happen. 18 | 19 | ## Screenshots 20 | > If applicable, add screenshots to help explain your problem. 21 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup 2 | from setuptools.command.build import build 3 | 4 | class build_grpc(build): 5 | """ 6 | Custom build class to include gRPC build commands. 7 | 8 | This class modifies the default build process to include additional sub-commands 9 | necessary for building gRPC components. 10 | 11 | Attributes 12 | sub_commands (list): List of sub-commands to be executed during the build process. 13 | """ 14 | 15 | sub_commands = [("build_grpc", None)] + build.sub_commands 16 | 17 | 18 | setup(cmdclass={"build": build_grpc}) 19 | -------------------------------------------------------------------------------- /.github/workflows/python_lint.yml: -------------------------------------------------------------------------------- 1 | name: Python Lint 2 | 3 | on: pull_request 4 | 5 | jobs: 6 | python-lint: 7 | runs-on: ubuntu-latest 8 | 9 | steps: 10 | - name: Checkout Code 11 | uses: actions/checkout@v2 12 | 13 | - name: Setup Python Environment 14 | uses: actions/setup-python@v2 15 | with: 16 | python-version: '3.10.14' 17 | 18 | - name: Install Dependencies 19 | run: | 20 | pip install -r requirements-dev.txt 21 | 22 | - name: Lint Python Code 23 | run: | 24 | ruff format . 25 | ruff check . 26 | 27 | - name: Run Pyright 28 | run: | 29 | pyright 30 | 31 | - name: Run vulture check 32 | run: vulture src/ tests/ 33 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | ## Contributing 2 | 3 | The best way to contribute to the MLCommons is to get involved with one of our many project communities. You find more information about getting involved with MLCommons [here](https://mlcommons.org/en/get-involved/#getting-started). 4 | 5 | Generally we encourage people to become a MLCommons member if they wish to contribute to MLCommons projects, but outside pull requests are very welcome too. 6 | 7 | Regardless of if you are a member, your organization needs to sign the MLCommons CLA. Please fill out this [CLA sign up form](https://forms.gle/Ew1KkBVpyeJDuRw67) form to get started. 8 | 9 | MLCommons project work is tracked with issue trackers and pull requests. Modify the project in your own fork and issue a pull request once you want other developers to take a look at what you have done and discuss the proposed changes. Ensure that cla-bot and other checks pass for your Pull requests. 10 | -------------------------------------------------------------------------------- /.github/PULL_REQUEST_TEMPLATE.md: -------------------------------------------------------------------------------- 1 | ## Summary 2 | Provide a concise summary of the changes introduced by this pull request. Detail the purpose and scope of the changes, referencing any relevant issues or discussions. Explain how these changes address the problem or improve the project. 3 | 4 | ## Test Plan 5 | In this section, describe the testing you have performed to verify the changes. Include: 6 | - A clear description of the testing environment. 7 | - The steps you followed to test the new features or bug fixes. 8 | - Any specific commands used during testing, along with their outputs. 9 | - A description of the results and observations from your testing. 10 | This information is crucial for reviewers to understand how the changes have been validated. 11 | 12 | ## Additional Notes 13 | Include any other notes or comments about the pull request here. This can include challenges faced, future considerations, or context that reviewers might find helpful. 14 | -------------------------------------------------------------------------------- /.github/workflows/python_tests.yml: -------------------------------------------------------------------------------- 1 | name: Python Unit Tests 2 | 3 | on: pull_request 4 | 5 | jobs: 6 | python-tests: 7 | runs-on: ubuntu-latest 8 | 9 | steps: 10 | - name: Checkout Code 11 | uses: actions/checkout@v2 12 | 13 | - name: Setup Python Environment 14 | uses: actions/setup-python@v2 15 | with: 16 | python-version: '3.10.14' 17 | 18 | - name: Install Chakra 19 | run: | 20 | pip install . 21 | 22 | - name: Install PARAM 23 | run: | 24 | git clone https://github.com/facebookresearch/param.git 25 | cd param/et_replay 26 | git checkout 7b19f586dd8b267333114992833a0d7e0d601630 27 | pip install . 28 | 29 | - name: Install HTA 30 | run: | 31 | git clone https://github.com/facebookresearch/HolisticTraceAnalysis.git 32 | cd HolisticTraceAnalysis 33 | git checkout d731cc2e2249976c97129d409a83bd53d93051f6 34 | git submodule update --init 35 | pip install -r requirements.txt 36 | pip install -e . 37 | 38 | - name: Install Dependencies 39 | run: | 40 | pip install -r requirements-dev.txt 41 | 42 | - name: Run Unit Tests 43 | run: | 44 | python -m pytest -vv tests 45 | -------------------------------------------------------------------------------- /src/jsonizer/jsonizer.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | 3 | from google.protobuf.json_format import MessageToJson 4 | 5 | from ...schema.protobuf.et_def_pb2 import ( 6 | GlobalMetadata, 7 | ) 8 | from ...schema.protobuf.et_def_pb2 import ( 9 | Node as ChakraNode, 10 | ) 11 | from ..third_party.utils.protolib import decodeMessage as decode_message 12 | from ..third_party.utils.protolib import openFileRd as open_file_rd 13 | 14 | 15 | def main() -> None: 16 | parser = argparse.ArgumentParser(description="Converts Chakra execution trace to JSON format.") 17 | parser.add_argument( 18 | "--input_filename", type=str, required=True, help="Specifies the input filename of the Chakra execution trace." 19 | ) 20 | parser.add_argument( 21 | "--output_filename", type=str, required=True, help="Specifies the output filename for the JSON data." 22 | ) 23 | args = parser.parse_args() 24 | 25 | execution_trace = open_file_rd(args.input_filename) 26 | node = ChakraNode() 27 | with open(args.output_filename, "w") as file: 28 | global_metadata = GlobalMetadata() 29 | decode_message(execution_trace, global_metadata) 30 | file.write(MessageToJson(global_metadata)) 31 | while decode_message(execution_trace, node): 32 | file.write(MessageToJson(node)) 33 | execution_trace.close() 34 | 35 | 36 | if __name__ == "__main__": 37 | main() 38 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Chakra 2 | 3 | Chakra is an open and interoperable graph-based representation of AI/ML workloads focused on enabling and accelerating AI SW/HW co-design. Chakra execution traces represent key operations, such as compute, memory, and communication, data and control dependencies, timing, and resource constraints. 4 | 5 | This is a repository of Chakra schema and a complementary set of tools and capabilities to enable the collection, analysis, generation, and adoption of Chakra execution traces by a broad range of simulators, emulators, and replay tools. 6 | 7 | Chakra is under active development as a [MLCommons](https://mlcommons.org/en)® research project. Please see [MLCommons Chakra Working Group](https://mlcommons.org/en/groups/research-chakratracebench/) for more details for participating in this effort. 8 | 9 | A detailed description of the original motivation and guiding principles can be found [here](https://arxiv.org/abs/2305.14516). The paper was published prior to Chakra becoming a MLCommons project. Please cite this repository to refer to the latest Chakra schema and tools. 10 | 11 | ## Installation 12 | 13 | Check out [`USER_GUIDE`](USER_GUIDE.md) for details. 14 | 15 | ## License 16 | 17 | Chakra is released under the MIT license. Please see the [`LICENSE.md`](LICENSE.md) file for more information. 18 | 19 | ## Contributing 20 | 21 | We actively welcome your pull requests! Please see [`CONTRIBUTING.md`](CONTRIBUTING.md) for more info. 22 | -------------------------------------------------------------------------------- /tests/trace_link/test_chakra_host_trace_loader.py: -------------------------------------------------------------------------------- 1 | from unittest.mock import MagicMock 2 | 3 | import pytest 4 | from chakra.src.trace_link.chakra_host_trace_loader import ChakraHostTraceLoader 5 | from et_replay.execution_trace import Node as PyTorchOperator 6 | 7 | 8 | @pytest.fixture 9 | def mock_trace(): 10 | """Fixture to create a mock trace with a specific structure.""" 11 | # Create a mock trace node structure 12 | root_node = MagicMock(spec=PyTorchOperator) 13 | child_node1 = MagicMock(spec=PyTorchOperator) 14 | child_node2 = MagicMock(spec=PyTorchOperator) 15 | 16 | # Setup mock hierarchy 17 | root_node.children = [child_node1, child_node2] 18 | root_node.id = 1 19 | child_node1.children = [] 20 | child_node1.id = 2 21 | child_node2.children = [] 22 | child_node2.id = 3 23 | 24 | mock_trace = MagicMock() 25 | mock_trace.get_nodes.return_value = [None, root_node] 26 | 27 | return mock_trace 28 | 29 | 30 | @pytest.fixture 31 | def loader(): 32 | """Fixture to create a ChakraHostTraceLoader instance.""" 33 | return ChakraHostTraceLoader() 34 | 35 | 36 | def test_extract_chakra_host_ops(loader, mock_trace): 37 | """Test the extract_chakra_host_ops method.""" 38 | root_node = mock_trace.get_nodes()[1] 39 | 40 | result = loader.extract_chakra_host_ops(root_node) 41 | 42 | assert len(result) == 3 43 | assert result[0].id == 1 44 | assert result[1].id == 2 45 | assert result[2].id == 3 46 | -------------------------------------------------------------------------------- /tests/jsonizer/test_jsonizer.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import tempfile 3 | from unittest.mock import mock_open, patch 4 | 5 | from chakra.schema.protobuf.et_def_pb2 import GlobalMetadata 6 | from chakra.schema.protobuf.et_def_pb2 import Node as ChakraNode 7 | from chakra.src.jsonizer.jsonizer import main 8 | from google.protobuf.json_format import MessageToJson 9 | 10 | 11 | @patch("chakra.src.jsonizer.jsonizer.open_file_rd") 12 | @patch("chakra.src.jsonizer.jsonizer.decode_message") 13 | @patch("builtins.open", new_callable=mock_open) 14 | def test_main(mock_file_open, mock_decode_message, mock_open_file_rd) -> None: 15 | """ 16 | Tests the main function for converting Chakra execution trace to JSON format. 17 | """ 18 | with tempfile.NamedTemporaryFile(suffix=".json") as temp_output: 19 | args = argparse.Namespace(input_filename="input_file", output_filename=temp_output.name) 20 | mock_node = ChakraNode() 21 | mock_global_metadata = GlobalMetadata() 22 | 23 | mock_decode_message.side_effect = [mock_global_metadata, mock_node, False] 24 | 25 | with patch("argparse.ArgumentParser.parse_args", return_value=args): 26 | main() 27 | 28 | mock_open_file_rd.assert_called_with("input_file") 29 | mock_decode_message.assert_called() 30 | mock_file_open.assert_called_with(temp_output.name, "w") 31 | mock_file_open().write.assert_any_call(MessageToJson(mock_global_metadata)) 32 | mock_file_open().write.assert_any_call(MessageToJson(mock_node)) 33 | -------------------------------------------------------------------------------- /.github/workflows/end_to_end_tests.yml: -------------------------------------------------------------------------------- 1 | name: End-to-End Tests 2 | 3 | on: pull_request 4 | 5 | jobs: 6 | end-to-end-tests: 7 | runs-on: ubuntu-latest 8 | 9 | steps: 10 | - name: Checkout Code 11 | uses: actions/checkout@v2 12 | 13 | - name: Setup Python Environment 14 | uses: actions/setup-python@v2 15 | with: 16 | python-version: '3.10.14' 17 | 18 | - name: Install Chakra 19 | run: | 20 | pip install . 21 | 22 | - name: Install PARAM 23 | run: | 24 | git clone https://github.com/facebookresearch/param.git 25 | cd param/et_replay 26 | git checkout 7b19f586dd8b267333114992833a0d7e0d601630 27 | pip install . 28 | 29 | - name: Install HTA 30 | run: | 31 | git clone https://github.com/facebookresearch/HolisticTraceAnalysis.git 32 | cd HolisticTraceAnalysis 33 | git checkout d731cc2e2249976c97129d409a83bd53d93051f6 34 | git submodule update --init 35 | pip install -r requirements.txt 36 | pip install -e . 37 | 38 | - name: Test chakra_trace_link Without Arguments 39 | run: | 40 | chakra_trace_link || [ $? -eq 2 ] 41 | 42 | - name: Test chakra_converter Without Arguments 43 | run: | 44 | chakra_converter || [ $? -eq 2 ] 45 | 46 | - name: Test chakra_visualizer Without Arguments 47 | run: | 48 | chakra_visualizer || [ $? -eq 2 ] 49 | 50 | - name: Test chakra_jsonizer Without Arguments 51 | run: | 52 | chakra_jsonizer || [ $? -eq 2 ] 53 | 54 | - name: Test chakra_timeline_visualizer Without Arguments 55 | run: | 56 | chakra_timeline_visualizer || [ $? -eq 2 ] 57 | -------------------------------------------------------------------------------- /src/feeder/et_feeder.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | #include "et_feeder_node.h" 10 | #include "protoio.hh" 11 | 12 | namespace Chakra { 13 | struct CompareNodes : public std::binary_function< 14 | std::shared_ptr, 15 | std::shared_ptr, 16 | bool> { 17 | bool operator()( 18 | const std::shared_ptr lhs, 19 | const std::shared_ptr rhs) const { 20 | return lhs->getChakraNode()->id() > rhs->getChakraNode()->id(); 21 | } 22 | }; 23 | 24 | class ETFeeder { 25 | public: 26 | ETFeeder(std::string filename); 27 | ~ETFeeder(); 28 | 29 | void addNode(std::shared_ptr node); 30 | void removeNode(uint64_t node_id); 31 | bool hasNodesToIssue(); 32 | std::shared_ptr getNextIssuableNode(); 33 | void pushBackIssuableNode(uint64_t node_id); 34 | std::shared_ptr lookupNode(uint64_t node_id); 35 | void freeChildrenNodes(uint64_t node_id); 36 | void readGlobalMetadata(); 37 | std::shared_ptr readNode(); 38 | void readNextWindow(); 39 | void resolveDep(); 40 | 41 | private: 42 | ProtoInputStream trace_; 43 | const uint32_t window_size_; 44 | bool et_complete_; 45 | 46 | std::unordered_map> dep_graph_{}; 47 | std::unordered_set dep_free_node_id_set_{}; 48 | std::priority_queue< 49 | std::shared_ptr, 50 | std::vector>, 51 | CompareNodes> 52 | dep_free_node_queue_{}; 53 | std::unordered_set> dep_unresolved_node_set_{}; 54 | }; 55 | 56 | } // namespace Chakra -------------------------------------------------------------------------------- /.github/workflows/feeder_tests.yml: -------------------------------------------------------------------------------- 1 | name: Feeder tests 2 | 3 | on: [ push, pull_request ] 4 | 5 | jobs: 6 | feeder-tests: 7 | runs-on: ubuntu-latest 8 | 9 | steps: 10 | - name: Checkout Code 11 | uses: actions/checkout@v4 12 | - name: Install protoc 13 | run: | 14 | sudo apt update 15 | sudo apt install protobuf-compiler libprotobuf-dev 16 | - name: Install Google Test Framework 17 | run: | 18 | sudo apt update 19 | sudo apt install libgtest-dev 20 | - name: Extract trace for feeder tests 21 | run: tar -xvf tests/data/feeder_tests_trace.tar.gz 22 | - name: Build 23 | run: | 24 | SCRIPT_DIR=. 25 | BUILD_DIR="${SCRIPT_DIR:?}"/build 26 | CHAKRA_ET_DIR="${SCRIPT_DIR:?}"/schema/protobuf 27 | protoc et_def.proto \ 28 | --proto_path="${CHAKRA_ET_DIR:?}" \ 29 | --cpp_out="${CHAKRA_ET_DIR:?}" 30 | g++ -Wall -I src/third_party/utils -I schema/protobuf -I src/feeder -c schema/protobuf/et_def.pb.cc -o schema/protobuf/et_def.pb.o 31 | g++ -Wall -I src/third_party/utils -I schema/protobuf -I src/feeder -c src/feeder/et_feeder.cpp -o src/feeder/et_feeder.o 32 | g++ -Wall -I src/third_party/utils -I schema/protobuf -I src/feeder -c src/feeder/et_feeder_node.cpp -o src/feeder/et_feeder_node.o 33 | g++ -Wall -I src/third_party/utils -I schema/protobuf -I src/feeder -c src/third_party/utils/protoio.cc -o src/third_party/utils/protoio.o 34 | g++ -Wall -I src/third_party/utils -I schema/protobuf -I src/feeder -c tests/feeder/tests.cpp -o tests/feeder/tests.o 35 | g++ -Wall -I src/third_party/utils -I schema/protobuf -I src/feeder -o feeder_tests schema/protobuf/et_def.pb.o src/feeder/et_feeder.o src/feeder/et_feeder_node.o src/third_party/utils/protoio.o tests/feeder/tests.o -lgtest -lgtest_main -lprotobuf -lpthread 36 | - name: Run tests 37 | run: ./feeder_tests -------------------------------------------------------------------------------- /tests/trace_link/test_chakra_device_trace_loader.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from chakra.src.trace_link.chakra_device_trace_loader import ChakraDeviceTraceLoader 3 | from chakra.src.trace_link.kineto_operator import KinetoOperator 4 | 5 | 6 | @pytest.fixture 7 | def trace_loader(): 8 | return ChakraDeviceTraceLoader() 9 | 10 | 11 | @pytest.mark.parametrize( 12 | "kineto_ops, expected_exclusive_durs", 13 | [ 14 | ( 15 | [ 16 | {"ts": 100, "dur": 10, "inclusive_dur": 10}, 17 | {"ts": 105, "dur": 3, "inclusive_dur": 3}, 18 | {"ts": 108, "dur": 1, "inclusive_dur": 1}, 19 | ], 20 | [6, 3, 1], # Expected exclusive durations 21 | ), 22 | ( 23 | [ 24 | {"ts": 100, "dur": 20, "inclusive_dur": 20}, 25 | {"ts": 105, "dur": 5, "inclusive_dur": 5}, 26 | {"ts": 110, "dur": 5, "inclusive_dur": 5}, 27 | ], 28 | [10, 5, 5], # Expected exclusive durations 29 | ), 30 | ], 31 | ) 32 | def test_calculate_exclusive_dur(trace_loader, kineto_ops, expected_exclusive_durs): 33 | kineto_tid_cpu_ops_map = {1: [KinetoOperator(op) for op in kineto_ops]} 34 | trace_loader.calculate_exclusive_dur(kineto_tid_cpu_ops_map) 35 | 36 | for i, op in enumerate(kineto_tid_cpu_ops_map[1]): 37 | assert op.exclusive_dur == expected_exclusive_durs[i] 38 | 39 | 40 | @pytest.mark.parametrize( 41 | "intervals, expected_result", 42 | [ 43 | ([(1, 3), (2, 6), (8, 10), (15, 18)], [(1, 6), (8, 10), (15, 18)]), 44 | ([(1, 4), (4, 5)], [(1, 5)]), 45 | ([], []), 46 | ([(1, 2), (2, 3), (3, 4)], [(1, 4)]), 47 | ([(1, 5), (2, 6), (6, 8), (7, 9)], [(1, 9)]), 48 | ], 49 | ) 50 | def test_merge_overlapping_intervals(intervals, expected_result): 51 | result = ChakraDeviceTraceLoader.merge_overlapping_intervals(intervals) 52 | assert result == expected_result 53 | -------------------------------------------------------------------------------- /.github/workflows/cla.yml: -------------------------------------------------------------------------------- 1 | name: "cla-bot" 2 | on: 3 | issue_comment: 4 | types: [created] 5 | pull_request_target: 6 | types: [opened,closed,synchronize] 7 | 8 | jobs: 9 | cla-check: 10 | runs-on: ubuntu-latest 11 | steps: 12 | - name: "MLCommons CLA bot check" 13 | if: (github.event.comment.body == 'recheck') || github.event_name == 'pull_request_target' 14 | # Alpha Release 15 | uses: mlcommons/cla-bot@master 16 | env: 17 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 18 | # the below token should have repo scope and must be manually added by you in the repository's secret 19 | PERSONAL_ACCESS_TOKEN : ${{ secrets.MLCOMMONS_BOT_CLA_TOKEN }} 20 | with: 21 | path-to-signatures: 'cla-bot/v1/cla.json' 22 | # branch should not be protected 23 | branch: 'main' 24 | allowlist: user1,bot* 25 | remote-organization-name: mlcommons 26 | remote-repository-name: systems 27 | 28 | #below are the optional inputs - If the optional inputs are not given, then default values will be taken 29 | #remote-organization-name: enter the remote organization name where the signatures should be stored (Default is storing the signatures in the same repository) 30 | #remote-repository-name: enter the remote repository name where the signatures should be stored (Default is storing the signatures in the same repository) 31 | #create-file-commit-message: 'For example: Creating file for storing CLA Signatures' 32 | #signed-commit-message: 'For example: $contributorName has signed the CLA in #$pullRequestNo' 33 | #custom-notsigned-prcomment: 'pull request comment with Introductory message to ask new contributors to sign' 34 | #custom-pr-sign-comment: 'The signature to be committed in order to sign the CLA' 35 | #custom-allsigned-prcomment: 'pull request comment when all contributors has signed, defaults to **CLA Assistant Lite bot** All Contributors have signed the CLA.' 36 | -------------------------------------------------------------------------------- /tests/trace_link/test_unique_id_assigner.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from src.trace_link.unique_id_assigner import UniqueIdAssigner 4 | 5 | 6 | @pytest.fixture 7 | def assigner(): 8 | """Fixture to create a new UniqueIdAssigner instance for each test.""" 9 | return UniqueIdAssigner() 10 | 11 | 12 | def test_assign_or_retrieve_id_new(assigner): 13 | """ 14 | Test that a new unique ID is correctly assigned to a new original ID. 15 | """ 16 | first_id = assigner.assign_or_retrieve_id(10) 17 | assert first_id == 0 # Expect the first assigned ID to be 0 18 | 19 | 20 | def test_assign_or_retrieve_id_existing(assigner): 21 | """ 22 | Test that the same original ID retrieves the same unique ID upon subsequent calls. 23 | """ 24 | first_id = assigner.assign_or_retrieve_id(10) 25 | second_id = assigner.assign_or_retrieve_id(10) 26 | assert second_id == first_id # Ensure it retrieves the same ID 27 | 28 | 29 | def test_assign_or_retrieve_id_distinct(assigner): 30 | """ 31 | Test that different original IDs receive different unique IDs. 32 | """ 33 | first_id = assigner.assign_or_retrieve_id(10) 34 | second_id = assigner.assign_or_retrieve_id(20) 35 | assert second_id != first_id 36 | assert second_id == 1 # This should be the next unique ID 37 | 38 | 39 | def test_generate_new_id_sequence(assigner): 40 | """ 41 | Test that generate_new_id consistently returns incrementing IDs. 42 | """ 43 | ids = [assigner.generate_new_id() for _ in range(5)] 44 | expected_ids = list(range(5)) 45 | assert ids == expected_ids 46 | 47 | 48 | def test_lookup_new_id_assigned(assigner): 49 | """ 50 | Test lookup of new IDs, ensuring assigned IDs return the correct new ID. 51 | """ 52 | original_id = 30 53 | new_id = assigner.assign_or_retrieve_id(original_id) 54 | assert assigner.lookup_new_id(original_id) == new_id 55 | 56 | 57 | def test_lookup_new_id_unassigned(assigner): 58 | """ 59 | Test lookup for an unassigned ID returns the original ID. 60 | """ 61 | unassigned_id = 40 62 | assert assigner.lookup_new_id(unassigned_id) == unassigned_id 63 | -------------------------------------------------------------------------------- /tests/converter/test_pytorch_tensor.py: -------------------------------------------------------------------------------- 1 | from src.converter.pytorch_tensor import PyTorchTensor, list_to_pytorch_tensor 2 | 3 | 4 | def test_pytorch_tensor_initialization(): 5 | """Test initialization of PyTorchTensor object.""" 6 | tensor_data = [1, 2, 3, 4, 5, 6] 7 | tensor = PyTorchTensor(tensor_data) 8 | assert tensor.tensor_data == tensor_data 9 | 10 | 11 | def test_pytorch_tensor_is_valid(): 12 | """Test the is_valid method of PyTorchTensor.""" 13 | valid_data = [1, 2, 3, 4, 5, 6] 14 | invalid_data_1 = [1, 2, 3, 4, 5] # Less than 6 elements 15 | invalid_data_2 = [1, 2, 3, 4, 5, 6, 7] # More than 6 elements 16 | invalid_data_3 = [1, 2, 3, 4, 5, "a"] # Non-integer element 17 | 18 | valid_tensor = PyTorchTensor(valid_data) 19 | invalid_tensor_1 = PyTorchTensor(invalid_data_1) 20 | invalid_tensor_2 = PyTorchTensor(invalid_data_2) 21 | invalid_tensor_3 = PyTorchTensor(invalid_data_3) 22 | 23 | assert valid_tensor.is_valid() is True 24 | assert invalid_tensor_1.is_valid() is False 25 | assert invalid_tensor_2.is_valid() is False 26 | assert invalid_tensor_3.is_valid() is False 27 | 28 | 29 | def test_pytorch_tensor_properties(): 30 | """Test property methods of PyTorchTensor.""" 31 | tensor_data = [1, 2, 3, 4, 5, 6] 32 | tensor = PyTorchTensor(tensor_data) 33 | 34 | assert tensor.tensor_id == 1 35 | assert tensor.storage_id == 2 36 | assert tensor.offset == 3 37 | assert tensor.num_elem == 4 38 | assert tensor.elem_bytes == 5 39 | 40 | 41 | def test_pytorch_tensor_has_valid_storage_id(): 42 | """Test has_valid_storage_id method of PyTorchTensor.""" 43 | valid_storage_id_data = [1, 2, 3, 4, 5, 6] 44 | invalid_storage_id_data = [1, 0, 3, 4, 5, 6] # storage_id = 0 45 | 46 | valid_tensor = PyTorchTensor(valid_storage_id_data) 47 | invalid_tensor = PyTorchTensor(invalid_storage_id_data) 48 | 49 | assert valid_tensor.has_valid_storage_id() is True 50 | assert invalid_tensor.has_valid_storage_id() is False 51 | 52 | 53 | def test_list_to_pytorch_tensor(): 54 | """Test list_to_pytorch_tensor function.""" 55 | tensor_data = [1, 2, 3, 4, 5, 6] 56 | tensor = list_to_pytorch_tensor(tensor_data) 57 | 58 | assert isinstance(tensor, PyTorchTensor) 59 | assert tensor.tensor_data == tensor_data 60 | -------------------------------------------------------------------------------- /src/trace_link/chakra_host_trace_loader.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import sys 3 | from typing import List 4 | 5 | from et_replay.execution_trace import Node as PyTorchOperator 6 | from et_replay.utils import load_execution_trace_file 7 | 8 | # Increase the recursion limit for deep Chakra host execution traces. 9 | sys.setrecursionlimit(10**6) 10 | 11 | 12 | class ChakraHostTraceLoader: 13 | """Loads Chakra host traces.""" 14 | 15 | def load(self, chakra_host_trace_file: str) -> List[PyTorchOperator]: 16 | """ 17 | Load and process the Chakra Host Execution Trace. 18 | 19 | Args: 20 | chakra_host_trace_file (str): Path to the PyTorch execution trace file. 21 | 22 | Returns: 23 | List[PyTorchOperator]: List of PyTorch operators. 24 | """ 25 | logging.debug(f"Starting to load Chakra host execution trace from file: {chakra_host_trace_file}.") 26 | chakra_host_trace = load_execution_trace_file(chakra_host_trace_file) 27 | 28 | root_node = chakra_host_trace.get_nodes()[1] # Root node is usually 1-based 29 | chakra_host_ops = self.extract_chakra_host_ops(root_node) 30 | logging.debug(f"Extracted {len(chakra_host_ops)} operators from Chakra host execution trace.") 31 | logging.debug("Chakra host execution trace has been loaded and processed successfully.") 32 | 33 | return chakra_host_ops 34 | 35 | def extract_chakra_host_ops(self, node: PyTorchOperator) -> List[PyTorchOperator]: 36 | """ 37 | Extract and sort nodes from the PyTorch execution trace recursively. 38 | 39 | This method traverses the execution trace starting from the provided node, extracting all the operator nodes 40 | recursively, and then returns them sorted by their identifiers. 41 | 42 | Args: 43 | node (PyTorchOperator): Starting node for extraction. 44 | 45 | Returns: 46 | List[PyTorchOperator]: Sorted list of extracted PyTorchOperator nodes. 47 | """ 48 | nodes = [] 49 | 50 | def traverse(node: PyTorchOperator): 51 | nodes.append(node) 52 | for child in node.children: 53 | traverse(child) 54 | 55 | traverse(node) 56 | logging.debug(f"Traversed {len(nodes)} nodes from root node ID: {node.id}") 57 | return sorted(nodes, key=lambda x: x.id) 58 | -------------------------------------------------------------------------------- /src/trace_link/trace_link.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import logging 3 | 4 | from .trace_linker import TraceLinker 5 | 6 | 7 | def main() -> None: 8 | parser = argparse.ArgumentParser( 9 | description=( 10 | "This tool links Chakra host execution traces with Chakra device traces. Chakra host execution " 11 | "traces include host-side (CPU) operators only, missing GPU operators. While these traces show " 12 | "dependencies between operators, they lack operator duration. Chakra device traces include " 13 | "device-side (GPU) operators in an unstructured timeline without explicit dependencies. This tool " 14 | "adds duration information to CPU operators in Chakra host traces and encodes GPU operators into the " 15 | "final Chakra host + device trace in JSON format. The trace linker also identifies key dependencies, " 16 | "such as inter-thread and synchronization dependencies. For more information, see the guide at https://" 17 | "github.com/mlcommons/chakra/wiki/Chakra-Execution-Trace-Collection-%E2%80%90-A-Comprehensive-Guide-on-" 18 | "Merging-PyTorch-and-Kineto-Traces" 19 | ) 20 | ) 21 | parser.add_argument("--rank", type=int, required=True, help="Rank for the input traces") 22 | parser.add_argument( 23 | "--chakra-host-trace", 24 | type=str, 25 | required=True, 26 | help="Path to the Chakra host execution trace (formerly called PyTorch execution traces)", 27 | ) 28 | parser.add_argument( 29 | "--chakra-device-trace", 30 | type=str, 31 | required=True, 32 | help="Path to the Chakra device execution trace (also known as Kineto traces)", 33 | ) 34 | parser.add_argument( 35 | "--output-file", 36 | type=str, 37 | required=True, 38 | help="Path for the output Chakra host + device trace in the JSON format", 39 | ) 40 | parser.add_argument("--log-level", default="INFO", type=str, help="Log output verbosity level") 41 | 42 | args = parser.parse_args() 43 | 44 | logging.basicConfig(level=args.log_level.upper()) 45 | 46 | linker = TraceLinker() 47 | linker.link(args.rank, args.chakra_host_trace, args.chakra_device_trace, args.output_file) 48 | 49 | logging.info(f"Linking process successful. Output file is available at {args.output_file}.") 50 | logging.info("Please run the chakra_converter for further postprocessing.") 51 | 52 | 53 | if __name__ == "__main__": 54 | main() 55 | -------------------------------------------------------------------------------- /tests/visualizer/test_visualizer.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import tempfile 3 | from unittest.mock import patch 4 | 5 | from chakra.src.visualizer.visualizer import escape_label, main 6 | 7 | 8 | def test_escape_label() -> None: 9 | """ 10 | Tests the escape_label function. 11 | """ 12 | assert escape_label("a{b}c") == "a\\{b\\}c" 13 | assert escape_label("a(b)c") == "a\\(b\\)c" 14 | assert escape_label("ac") == "a\\c" 15 | assert escape_label("a[b]c") == "a\\[b\\]c" 16 | assert escape_label("a|b&c-d") == "a\\|b\\&c\\-d" 17 | 18 | 19 | @patch("chakra.src.visualizer.visualizer.open_file_rd") 20 | @patch("chakra.src.visualizer.visualizer.decode_message") 21 | @patch("chakra.src.visualizer.visualizer.graphviz.Digraph") 22 | def test_main_pdf(mock_graphviz_digraph, mock_decode_message, mock_open_file_rd) -> None: 23 | """ 24 | Tests the main function for PDF output. 25 | """ 26 | with tempfile.NamedTemporaryFile(suffix=".pdf") as temp_output: 27 | args = argparse.Namespace(input_filename="input_file", output_filename=temp_output.name) 28 | mock_node = mock_open_file_rd.return_value 29 | mock_global_metadata = mock_open_file_rd.return_value 30 | 31 | mock_decode_message.side_effect = [mock_global_metadata, mock_node, False] 32 | 33 | with patch("argparse.ArgumentParser.parse_args", return_value=args): 34 | main() 35 | 36 | mock_open_file_rd.assert_called_with("input_file") 37 | mock_decode_message.assert_called() 38 | mock_graphviz_digraph.return_value.render.assert_called() 39 | 40 | 41 | @patch("chakra.src.visualizer.visualizer.open_file_rd") 42 | @patch("chakra.src.visualizer.visualizer.decode_message") 43 | @patch("chakra.src.visualizer.visualizer.nx.write_graphml") 44 | def test_main_graphml(mock_write_graphml, mock_decode_message, mock_open_file_rd) -> None: 45 | """ 46 | Tests the main function for GraphML output. 47 | """ 48 | with tempfile.NamedTemporaryFile(suffix=".graphml") as temp_output: 49 | args = argparse.Namespace(input_filename="input_file", output_filename=temp_output.name) 50 | mock_node = mock_open_file_rd.return_value 51 | mock_global_metadata = mock_open_file_rd.return_value 52 | 53 | mock_decode_message.side_effect = [mock_global_metadata, mock_node, False] 54 | 55 | with patch("argparse.ArgumentParser.parse_args", return_value=args): 56 | main() 57 | 58 | mock_open_file_rd.assert_called_with("input_file") 59 | mock_decode_message.assert_called() 60 | mock_write_graphml.assert_called() 61 | -------------------------------------------------------------------------------- /src/trace_link/unique_id_assigner.py: -------------------------------------------------------------------------------- 1 | from typing import Dict 2 | 3 | 4 | class UniqueIdAssigner: 5 | """ 6 | Assigns unique IDs to items, ensuring each item gets a distinct ID. 7 | 8 | This class is used to maintain a consistent and unique mapping of original identifiers to new unique identifiers. 9 | It's particularly useful in scenarios where the uniqueness of IDs across different entities or iterations needs to 10 | be preserved. 11 | 12 | Attributes 13 | next_id (int): The next unique ID to be assigned. 14 | original_to_new_ids (Dict[int, int]): A mapping from original IDs to their corresponding new unique IDs. This 15 | helps in retrieving already assigned unique IDs and ensures the same original ID always maps to the same 16 | unique ID. 17 | """ 18 | 19 | def __init__(self) -> None: 20 | """Initialize the UniqueIdAssigner with a starting ID of 0.""" 21 | self.next_id: int = 0 22 | self.original_to_new_ids: Dict[int, int] = {} 23 | 24 | def assign_or_retrieve_id(self, original_id: int) -> int: 25 | """ 26 | Assign a new unique ID to the given original ID if it doesn't have one already. 27 | 28 | Args: 29 | original_id (int): The original ID for which a unique ID is needed. 30 | 31 | Returns: 32 | int: A unique ID corresponding to the original ID. 33 | """ 34 | if original_id not in self.original_to_new_ids: 35 | self.original_to_new_ids[original_id] = self.next_id 36 | self.next_id += 1 37 | 38 | return self.original_to_new_ids[original_id] 39 | 40 | def generate_new_id(self) -> int: 41 | """ 42 | Generate a new unique ID without needing an original ID. 43 | 44 | This is useful for cases where new entities are created that do not have an existing identifier. 45 | 46 | Returns 47 | int: A new unique ID. 48 | """ 49 | unique_id = self.next_id 50 | self.next_id += 1 51 | return unique_id 52 | 53 | def lookup_new_id(self, original_id: int) -> int: 54 | """ 55 | Retrieve the new unique ID for a given original ID, if it has been assigned. 56 | 57 | This method is useful for checking if a unique ID has already been assigned to an original ID and retrieving it. 58 | 59 | Args: 60 | original_id (int): The original ID to look up. 61 | 62 | Returns: 63 | int: The new unique ID if it has been assigned, otherwise returns the original ID. 64 | """ 65 | return self.original_to_new_ids.get(original_id, original_id) 66 | -------------------------------------------------------------------------------- /src/feeder/wrapper_node.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "et_feeder.h" 4 | #include "et_feeder_node.h" 5 | #include "json_node.h" 6 | 7 | using json = nlohmann::json; 8 | 9 | enum format { Protobuf, JSON }; 10 | 11 | // WrapperNode class wraps protobuf and JSON 12 | class WrapperNode { 13 | private: 14 | enum format format_type_; 15 | Chakra::ETFeeder* et_feeder_; 16 | std::shared_ptr node_{nullptr}; 17 | std::ifstream jsonfile_; 18 | json data_; 19 | JSONNode json_node_; 20 | int64_t node_idx_ = -1; 21 | std::queue> push_back_queue_proto; 22 | std::queue push_back_queue_json; 23 | std::unordered_map dep_graph_json{}; 24 | std::unordered_set dep_free_node_id_set_json{}; 25 | std::priority_queue< 26 | JSONNode, // type of stored elements 27 | std::vector, // underlying container to store elements 28 | CompareJSONNodesGT> // compare type providing a strick weak ordering 29 | dep_free_node_queue_json{}; 30 | std::unordered_set> 31 | dep_unresolved_node_set_json{}; 32 | int window_size_json; 33 | bool json_et_complete_; 34 | 35 | public: 36 | WrapperNode(); 37 | WrapperNode(const WrapperNode& t); 38 | WrapperNode(std::string filename); 39 | ~WrapperNode(); 40 | void releaseMemory(); 41 | void createWrapper(std::string filename); 42 | std::shared_ptr getProtobufNode(); 43 | JSONNode getJSONNode(); 44 | void addNode(JSONNode node); 45 | void addNode(std::shared_ptr node); 46 | void removeNode(uint64_t node_id); 47 | void readNextWindow(); 48 | JSONNode readNode(uint64_t node_id); 49 | void resolveDep(); 50 | void pushBackIssuableNode(uint64_t node_id); 51 | void freeChildrenNodes(uint64_t node_id); 52 | bool isValidNode(); 53 | void push_to_queue(); 54 | bool is_queue_empty(); 55 | void queue_front(); 56 | void pop_from_queue(); 57 | void getNextIssuableNode(); 58 | uint64_t getNodeID(); 59 | std::string getNodeName(); 60 | int getNodeType(); 61 | bool isCPUOp(); 62 | uint64_t getRuntime(); 63 | uint64_t getNumOps(); 64 | uint64_t getTensorSize(); 65 | int64_t getCommType(); 66 | uint32_t getCommPriority(); 67 | uint64_t getCommSize(); 68 | uint32_t getCommSrc(); 69 | uint32_t getCommDst(); 70 | uint32_t getCommTag(); 71 | bool hasNodesToIssue(); 72 | void lookupNode(uint64_t node_id); 73 | void getChildren( 74 | std::vector>& childrenNodes); 75 | void getChildren(std::vector& childrenNodes); 76 | int64_t findNodeIndexJSON(uint64_t node_id); 77 | }; -------------------------------------------------------------------------------- /src/feeder/et_feeder_node.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | #include "et_def.pb.h" 9 | 10 | namespace Chakra { 11 | 12 | class ETFeederNode { 13 | public: 14 | ETFeederNode(std::shared_ptr node); 15 | std::shared_ptr getChakraNode(); 16 | void addChild(std::shared_ptr node); 17 | std::vector> getChildren(); 18 | void addDepUnresolvedParentID(uint64_t node_id); 19 | std::vector getDepUnresolvedParentIDs(); 20 | void setDepUnresolvedParentIDs( 21 | std::vector const& dep_unresolved_parent_ids); 22 | 23 | const ChakraProtoMsg::AttributeProto& get_other_attr( 24 | const std::string& attr_name) const; 25 | bool has_other_attr(const std::string& attr_name) const; 26 | 27 | uint64_t id(); 28 | std::string name(); 29 | bool is_cpu_op(); 30 | ChakraProtoMsg::NodeType type(); 31 | uint64_t runtime(); 32 | uint64_t num_ops(); 33 | uint32_t tensor_loc(); 34 | uint64_t tensor_size(); 35 | ChakraProtoMsg::CollectiveCommType comm_type(); 36 | uint32_t comm_priority(); 37 | uint64_t comm_size(); 38 | uint32_t comm_src(); 39 | uint32_t comm_dst(); 40 | uint32_t comm_tag(); 41 | std::string pg_name(); 42 | std::string get_inputs_values() const; 43 | std::string get_inputs_shapes() const; 44 | std::string get_inputs_types() const; 45 | std::string get_outputs_values() const; 46 | std::string get_outputs_shapes() const; 47 | std::string get_outputs_types() const; 48 | 49 | private: 50 | void assign_attr_val( 51 | std::shared_ptr node, 52 | int i, 53 | void* member); 54 | 55 | std::shared_ptr node_{nullptr}; 56 | std::unordered_set> children_set_{}; 57 | std::vector> children_vec_{}; 58 | std::vector dep_unresolved_parent_ids_{}; 59 | std::unordered_map 60 | other_attrs_{}; 61 | 62 | uint64_t id_; 63 | std::string name_; 64 | bool is_cpu_op_; 65 | uint64_t runtime_; 66 | uint64_t num_ops_; 67 | uint32_t tensor_loc_; 68 | uint64_t tensor_size_; 69 | ChakraProtoMsg::CollectiveCommType comm_type_; 70 | uint32_t comm_priority_; 71 | uint64_t comm_size_; 72 | uint32_t comm_src_; 73 | uint32_t comm_dst_; 74 | uint32_t comm_tag_; 75 | std::string pg_name_; 76 | std::string inputs_values_; 77 | std::string inputs_shapes_; 78 | std::string inputs_types_; 79 | std::string outputs_values_; 80 | std::string outputs_shapes_; 81 | std::string outputs_types_; 82 | }; 83 | 84 | } // namespace Chakra 85 | -------------------------------------------------------------------------------- /.clang-format: -------------------------------------------------------------------------------- 1 | --- 2 | AccessModifierOffset: -1 3 | AlignAfterOpenBracket: AlwaysBreak 4 | AlignConsecutiveAssignments: false 5 | AlignConsecutiveDeclarations: false 6 | AlignEscapedNewlinesLeft: true 7 | AlignOperands: false 8 | AlignTrailingComments: false 9 | AllowAllParametersOfDeclarationOnNextLine: false 10 | AllowShortBlocksOnASingleLine: false 11 | AllowShortCaseLabelsOnASingleLine: false 12 | AllowShortFunctionsOnASingleLine: Empty 13 | AllowShortIfStatementsOnASingleLine: false 14 | AllowShortLoopsOnASingleLine: false 15 | AlwaysBreakAfterReturnType: None 16 | AlwaysBreakBeforeMultilineStrings: true 17 | AlwaysBreakTemplateDeclarations: true 18 | BinPackArguments: false 19 | BinPackParameters: false 20 | BraceWrapping: 21 | AfterClass: false 22 | AfterControlStatement: false 23 | AfterEnum: false 24 | AfterFunction: false 25 | AfterNamespace: false 26 | AfterObjCDeclaration: false 27 | AfterStruct: false 28 | AfterUnion: false 29 | BeforeCatch: false 30 | BeforeElse: false 31 | IndentBraces: false 32 | BreakBeforeBinaryOperators: None 33 | BreakBeforeBraces: Attach 34 | BreakBeforeTernaryOperators: true 35 | BreakConstructorInitializersBeforeComma: false 36 | BreakAfterJavaFieldAnnotations: false 37 | BreakStringLiterals: false 38 | ColumnLimit: 80 39 | CommentPragmas: '^ IWYU pragma:' 40 | CompactNamespaces: false 41 | ConstructorInitializerAllOnOneLineOrOnePerLine: true 42 | ConstructorInitializerIndentWidth: 4 43 | ContinuationIndentWidth: 4 44 | Cpp11BracedListStyle: true 45 | DerivePointerAlignment: false 46 | DisableFormat: false 47 | ForEachMacros: [ FOR_EACH_RANGE, FOR_EACH, ] 48 | IncludeCategories: 49 | - Regex: '^<.*\.h(pp)?>' 50 | Priority: 1 51 | - Regex: '^<.*' 52 | Priority: 2 53 | - Regex: '.*' 54 | Priority: 3 55 | IndentCaseLabels: true 56 | IndentWidth: 2 57 | IndentWrappedFunctionNames: false 58 | KeepEmptyLinesAtTheStartOfBlocks: false 59 | MacroBlockBegin: '' 60 | MacroBlockEnd: '' 61 | MaxEmptyLinesToKeep: 1 62 | NamespaceIndentation: None 63 | ObjCBlockIndentWidth: 2 64 | ObjCSpaceAfterProperty: false 65 | ObjCSpaceBeforeProtocolList: false 66 | PenaltyBreakBeforeFirstCallParameter: 1 67 | PenaltyBreakComment: 300 68 | PenaltyBreakFirstLessLess: 120 69 | PenaltyBreakString: 1000 70 | PenaltyExcessCharacter: 1000000 71 | PenaltyReturnTypeOnItsOwnLine: 2000000 72 | PointerAlignment: Left 73 | ReflowComments: true 74 | SortIncludes: true 75 | SpaceAfterCStyleCast: false 76 | SpaceBeforeAssignmentOperators: true 77 | SpaceBeforeParens: ControlStatements 78 | SpaceInEmptyParentheses: false 79 | SpacesBeforeTrailingComments: 1 80 | SpacesInAngles: false 81 | SpacesInContainerLiterals: true 82 | SpacesInCStyleCastParentheses: false 83 | SpacesInParentheses: false 84 | SpacesInSquareBrackets: false 85 | Standard: Cpp11 86 | TabWidth: 8 87 | UseTab: Never 88 | ... 89 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = [ "setuptools>=61", 3 | "wheel", 4 | "setuptools-grpc", 5 | "grpcio-tools"] 6 | build-backend = "setuptools.build_meta" 7 | 8 | [project] 9 | name = "chakra" 10 | requires-python = ">=3.7" 11 | version = "0.0.4" 12 | readme = "README.md" 13 | license = {file = "LICENSE.md"} 14 | authors = [ 15 | {name = "MLCommons", email = "chakra@mlcommons.org"}, 16 | ] 17 | dependencies = [ 18 | "protobuf", 19 | "graphviz", 20 | "networkx", 21 | "pydot", 22 | "HolisticTraceAnalysis @ git+https://github.com/facebookresearch/HolisticTraceAnalysis.git@d731cc2e2249976c97129d409a83bd53d93051f6" 23 | ] 24 | 25 | [project.urls] 26 | Homepage = "https://github.com/mlcommons/chakra" 27 | Documentation = "https://github.com/mlcommons/chakra/README.md" 28 | Repository = "https://github.com/mlcommons/chakra.git" 29 | 30 | [tool.setuptools.package-dir] 31 | "chakra.schema.protobuf" = "schema/protobuf" 32 | "chakra.src.converter" = "src/converter" 33 | "chakra.src.generator" = "src/generator" 34 | "chakra.src.jsonizer" = "src/jsonizer" 35 | "chakra.src.third_party" = "src/third_party" 36 | "chakra.src.timeline_visualizer" = "src/timeline_visualizer" 37 | "chakra.src.trace_link" = "src/trace_link" 38 | "chakra.src.visualizer" = "src/visualizer" 39 | 40 | [tool.setuptools.package-data] 41 | "chakra.schema.protobuf" = ["et_def.proto"] 42 | 43 | [project.scripts] 44 | chakra_converter = "chakra.src.converter.converter:main" 45 | chakra_generator = "chakra.src.generator.generator:main" 46 | chakra_jsonizer = "chakra.src.jsonizer.jsonizer:main" 47 | chakra_timeline_visualizer = "chakra.src.timeline_visualizer.timeline_visualizer:main" 48 | chakra_trace_link = "chakra.src.trace_link.trace_link:main" 49 | chakra_visualizer = "chakra.src.visualizer.visualizer:main" 50 | 51 | [tool.ruff] 52 | target-version = "py39" 53 | line-length = 120 54 | exclude = [ 55 | "src/converter/text_converter.py", 56 | "src/third_party/utils/protolib.py", 57 | ] 58 | 59 | [tool.ruff.lint] 60 | select = ["I", "B", "E", "D", "F", "SIM", "W", "C90", "EXE"] 61 | ignore = [ 62 | "D407", # Missing dashed underline after section 63 | "D203", # conflicts with D211 64 | "D212", # conflicts with D213 65 | "D413", # Missing blank line after last section 66 | 67 | # TODO: Remove these once we have docstrings 68 | "D100", # Missing docstring in public module 69 | "D102", # Missing docstring in public method 70 | "D103", # Missing docstring in public function 71 | "D104", # Missing docstring in public package 72 | "D107", # Missing docstring in `__init__` 73 | ] 74 | 75 | [tool.ruff.lint.per-file-ignores] 76 | "**/tests/*" = ["D"] 77 | 78 | [tool.ruff.format] 79 | indent-style = "space" 80 | 81 | [tool.pyright] 82 | typeCheckingMode = "basic" 83 | exclude = [ 84 | "**/__pycache__", 85 | "**/build/", 86 | "setup.py", 87 | "src/third_party/utils/protolib.py" 88 | ] 89 | reportMissingImports = false 90 | reportAttributeAccessIssue = false 91 | 92 | [tool.vulture] 93 | ignore_names = ["mock_process_thread"] 94 | min_confidence = 100 95 | -------------------------------------------------------------------------------- /src/visualizer/visualizer.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import re 3 | 4 | import graphviz 5 | import networkx as nx 6 | 7 | from ...schema.protobuf.et_def_pb2 import GlobalMetadata, Node 8 | from ..third_party.utils.protolib import decodeMessage as decode_message 9 | from ..third_party.utils.protolib import openFileRd as open_file_rd 10 | 11 | 12 | def escape_label(label: str) -> str: 13 | """ 14 | Escapes special characters in labels for graph rendering. 15 | 16 | Args: 17 | label (str): The original label string. 18 | 19 | Returns: 20 | str: The escaped label string. 21 | """ 22 | # Define special characters to escape 23 | special_chars = "{}()<>\\[\\]|&-" 24 | # Escape special characters 25 | return re.sub(f"([{special_chars}])", r"\\\1", label) 26 | 27 | 28 | def main() -> None: 29 | """Generate an output graph file in the specified format (PDF, DOT, or GraphML).""" 30 | parser = argparse.ArgumentParser(description="Execution Trace Visualizer") 31 | parser.add_argument("--input_filename", type=str, required=True, help="Input Chakra execution trace filename") 32 | parser.add_argument( 33 | "--output_filename", 34 | type=str, 35 | required=True, 36 | help=( 37 | "Output graph filename. Supported extensions are pdf, dot, and graphml. " 38 | "Recommend using graphml for large graphs for rendering speed." 39 | ), 40 | ) 41 | args = parser.parse_args() 42 | 43 | et = open_file_rd(args.input_filename) 44 | node = Node() 45 | gm = GlobalMetadata() 46 | 47 | # Determine the file type to be created based on the output filename 48 | if args.output_filename.endswith((".pdf", ".dot")): 49 | f = graphviz.Digraph() 50 | decode_message(et, gm) 51 | while decode_message(et, node): 52 | escaped_label = escape_label(node.name) 53 | f.node(name=f"{node.id}", label=escaped_label, id=str(node.id), shape="record") 54 | 55 | # Handling data dependencies 56 | for data_dep_id in node.data_deps: 57 | f.edge(str(data_dep_id), str(node.id), arrowhead="normal") # using "normal" arrow for data_deps 58 | 59 | # Handling control dependencies 60 | for ctrl_dep_id in node.ctrl_deps: 61 | f.edge(str(ctrl_dep_id), str(node.id), arrowhead="tee") # using "tee" arrow for ctrl_deps 62 | 63 | if args.output_filename.endswith(".pdf"): 64 | f.render(args.output_filename.replace(".pdf", ""), format="pdf", cleanup=True) 65 | else: # ends with ".dot" 66 | f.render(args.output_filename.replace(".dot", ""), format="dot", cleanup=True) 67 | elif args.output_filename.endswith(".graphml"): 68 | G = nx.DiGraph() 69 | decode_message(et, gm) 70 | while decode_message(et, node): 71 | G.add_node(node.id, label=node.name) 72 | 73 | # Handling data dependencies 74 | for data_dep_id in node.data_deps: 75 | G.add_edge(data_dep_id, node.id, dependency="data") 76 | 77 | # Handling control dependencies 78 | for ctrl_dep_id in node.ctrl_deps: 79 | G.add_edge(ctrl_dep_id, node.id, dependency="control") 80 | 81 | nx.write_graphml(G, args.output_filename) 82 | else: 83 | print("Unknown output file extension. Must be one of pdf, dot, graphml.") 84 | 85 | et.close() 86 | 87 | 88 | if __name__ == "__main__": 89 | main() 90 | -------------------------------------------------------------------------------- /src/converter/pytorch_tensor.py: -------------------------------------------------------------------------------- 1 | from typing import List 2 | 3 | 4 | class PyTorchTensor: 5 | """ 6 | Represents a tensor with its associated properties. 7 | 8 | Attributes: 9 | tensor_data (List[int]): Data of the tensor including tensor_id, storage_id, offset, number of elements, and 10 | size of each element in bytes. 11 | 12 | Note: 13 | For more details on the tensor data structure, refer to: 14 | https://github.com/pytorch/pytorch/blob/7cd48df2dae7e2194438b162968c47d1f05bf20e/torch/csrc/profiler/ 15 | standalone/execution_trace_observer.cpp#L400 16 | """ 17 | 18 | def __init__(self, tensor_data: List[int]) -> None: 19 | """ 20 | Initialize a PyTorchTensor object with the provided tensor data. 21 | 22 | Args: 23 | tensor_data (List[int]): Data of the tensor including tensor_id, storage_id, offset, number of elements, 24 | and size of each element in bytes. 25 | """ 26 | self.tensor_data = tensor_data 27 | 28 | def is_valid(self) -> bool: 29 | """ 30 | Check if the tensor data is valid. 31 | 32 | Returns 33 | bool: True if tensor_data is a list of exactly six integers, False otherwise. 34 | """ 35 | return ( 36 | isinstance(self.tensor_data, list) 37 | and len(self.tensor_data) == 6 38 | and all(isinstance(item, int) for item in self.tensor_data) 39 | ) 40 | 41 | @property 42 | def tensor_id(self) -> int: 43 | """ 44 | Return the tensor ID. 45 | 46 | Returns 47 | int: Tensor ID. 48 | """ 49 | return self.tensor_data[0] 50 | 51 | @property 52 | def storage_id(self) -> int: 53 | """ 54 | Return the storage ID. 55 | 56 | Returns 57 | int: Storage ID. 58 | """ 59 | return self.tensor_data[1] 60 | 61 | @property 62 | def offset(self) -> int: 63 | """ 64 | Return the offset. 65 | 66 | Returns 67 | int: Offset value. 68 | """ 69 | return self.tensor_data[2] 70 | 71 | @property 72 | def num_elem(self) -> int: 73 | """ 74 | Return the number of elements in the tensor. 75 | 76 | Returns 77 | int: Number of elements. 78 | """ 79 | return self.tensor_data[3] 80 | 81 | @property 82 | def elem_bytes(self) -> int: 83 | """ 84 | Return the size of each element in bytes. 85 | 86 | Returns 87 | int: Size of each element in bytes. 88 | """ 89 | return self.tensor_data[4] 90 | 91 | def has_valid_storage_id(self) -> bool: 92 | """ 93 | Check if the tensor has a valid storage ID. 94 | 95 | Returns 96 | bool: True if the storage ID is greater than 0, False otherwise. 97 | """ 98 | return self.storage_id > 0 99 | 100 | 101 | def list_to_pytorch_tensor(tensor_list: List[int]) -> PyTorchTensor: 102 | """ 103 | Convert a list representation of a tensor into a PyTorchTensor object. 104 | 105 | Args: 106 | tensor_list (List[int]): Data representing a tensor, including tensor_id, storage_id, offset, num_elem, 107 | elem_bytes. 108 | 109 | Returns: 110 | PyTorchTensor: The PyTorchTensor object created from the data. 111 | """ 112 | return PyTorchTensor(tensor_list) 113 | -------------------------------------------------------------------------------- /schema/protobuf/et_def.proto: -------------------------------------------------------------------------------- 1 | syntax = "proto3"; 2 | 3 | package ChakraProtoMsg; 4 | 5 | message AttributeProto { 6 | string name = 1; 7 | string doc_string = 2; 8 | 9 | oneof value { 10 | double double_val = 3; 11 | DoubleList double_list = 4; 12 | float float_val = 5; 13 | FloatList float_list = 6; 14 | int32 int32_val = 7; 15 | Int32List int32_list = 8; 16 | int64 int64_val = 9; 17 | Int64List int64_list = 10; 18 | uint32 uint32_val = 11; 19 | Uint32List uint32_list = 12; 20 | uint64 uint64_val = 13; 21 | Uint64List uint64_list = 14; 22 | sint32 sint32_val = 15; 23 | Sint32List sint32_list = 16; 24 | sint64 sint64_val = 17; 25 | Sint64List sint64_list = 18; 26 | fixed32 fixed32_val = 19; 27 | Fixed32List fixed32_list = 20; 28 | fixed64 fixed64_val = 21; 29 | Fixed64List fixed64_list = 22; 30 | sfixed32 sfixed32_val = 23; 31 | Sfixed32List sfixed32_list = 24; 32 | sfixed64 sfixed64_val = 25; 33 | Sfixed64List sfixed64_list = 26; 34 | bool bool_val = 27; 35 | BoolList bool_list = 28; 36 | string string_val = 29; 37 | StringList string_list = 30; 38 | bytes bytes_val = 31; 39 | BytesList bytes_list = 32; 40 | } 41 | } 42 | 43 | message DoubleList { 44 | repeated double values = 1; 45 | } 46 | 47 | message FloatList { 48 | repeated float values = 1; 49 | } 50 | 51 | message Int32List { 52 | repeated int32 values = 1; 53 | } 54 | 55 | message Int64List { 56 | repeated int64 values = 1; 57 | } 58 | 59 | message Uint32List { 60 | repeated uint32 values = 1; 61 | } 62 | 63 | message Uint64List { 64 | repeated uint64 values = 1; 65 | } 66 | 67 | message Sint32List { 68 | repeated sint32 values = 1; 69 | } 70 | 71 | message Sint64List { 72 | repeated sint64 values = 1; 73 | } 74 | 75 | message Fixed32List { 76 | repeated fixed32 values = 1; 77 | } 78 | 79 | message Fixed64List { 80 | repeated fixed64 values = 1; 81 | } 82 | 83 | message Sfixed32List { 84 | repeated sfixed32 values = 1; 85 | } 86 | 87 | message Sfixed64List { 88 | repeated sfixed64 values = 1; 89 | } 90 | 91 | message BoolList { 92 | repeated bool values = 1; 93 | } 94 | 95 | message StringList { 96 | repeated string values = 1; 97 | } 98 | 99 | message BytesList { 100 | repeated bytes values = 1; 101 | } 102 | 103 | message GlobalMetadata { 104 | string version = 1; 105 | repeated AttributeProto attr = 2; 106 | } 107 | 108 | enum NodeType { 109 | INVALID_NODE = 0; 110 | METADATA_NODE = 1; 111 | MEM_LOAD_NODE = 2; 112 | MEM_STORE_NODE = 3; 113 | COMP_NODE = 4; 114 | COMM_SEND_NODE = 5; 115 | COMM_RECV_NODE = 6; 116 | COMM_COLL_NODE = 7; 117 | } 118 | 119 | enum CollectiveCommType { 120 | ALL_REDUCE = 0; 121 | REDUCE = 1; 122 | ALL_GATHER = 2; 123 | GATHER = 3; 124 | SCATTER = 4; 125 | BROADCAST = 5; 126 | ALL_TO_ALL = 6; 127 | REDUCE_SCATTER = 7; 128 | REDUCE_SCATTER_BLOCK = 8; 129 | BARRIER = 9; 130 | } 131 | 132 | message Node { 133 | uint64 id = 1; 134 | string name = 2; 135 | NodeType type = 3; 136 | 137 | // Control and data dependencies 138 | repeated uint64 ctrl_deps = 4; 139 | repeated uint64 data_deps = 5; 140 | 141 | // Timing information 142 | uint64 start_time_micros = 6; 143 | uint64 duration_micros = 7; 144 | 145 | IOInfo inputs = 8; 146 | IOInfo outputs = 9; 147 | repeated AttributeProto attr = 10; 148 | } 149 | 150 | message IOInfo { 151 | string values = 1; 152 | string shapes = 2; 153 | string types = 3; 154 | } 155 | 156 | message Tensor { 157 | uint64 tensor_id = 1; // An unique ID for the TensorImpl object. 158 | uint64 storage_id = 2; // An unique ID for the underlying storage object. 159 | uint64 offset = 3; // Offset to the storage memory. 160 | uint64 num_elem = 4; // Number of elements in the storage. 161 | uint64 elem_bytes = 5; // Number of bytes per element. 162 | string device = 6; // Tensor object device location. 163 | } 164 | -------------------------------------------------------------------------------- /tests/feeder/tests.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include "et_feeder.h" 3 | 4 | class ETFeederTest : public ::testing::Test { 5 | protected: 6 | ETFeederTest() {} 7 | virtual ~ETFeederTest() {} 8 | 9 | void SetUp(const std::string& filename) { 10 | trace = new Chakra::ETFeeder(filename); 11 | } 12 | 13 | virtual void TearDown() { 14 | delete trace; 15 | } 16 | 17 | Chakra::ETFeeder* trace; 18 | }; 19 | 20 | TEST_F(ETFeederTest, ConstructorNodeIDTest) { 21 | SetUp("tests/data/chakra.0.et"); 22 | std::shared_ptr node = trace->getNextIssuableNode(); 23 | uint64_t firstNodeID = node->id(); 24 | ASSERT_EQ(firstNodeID, 216); 25 | 26 | node = trace->getNextIssuableNode(); 27 | uint64_t secondNodeID = node->id(); 28 | ASSERT_EQ(secondNodeID, 432); 29 | } 30 | 31 | TEST_F(ETFeederTest, ConstructorNodeValuesTest) { 32 | SetUp("tests/data/chakra.0.et"); 33 | std::shared_ptr node = trace->getNextIssuableNode(); 34 | ChakraProtoMsg::NodeType firstNodeType = node->type(); 35 | ASSERT_EQ(firstNodeType, ChakraProtoMsg::COMP_NODE); 36 | ASSERT_TRUE(node->is_cpu_op()); 37 | 38 | std::string attr = "rf_id"; 39 | ChakraProtoMsg::AttributeProto rf_id = node->get_other_attr(attr); 40 | ASSERT_EQ(rf_id.int64_val(), 2); 41 | 42 | node = trace->getNextIssuableNode(); 43 | uint64_t secondNodeType = node->type(); 44 | ASSERT_EQ(secondNodeType, ChakraProtoMsg::COMM_COLL_NODE); 45 | ASSERT_TRUE(node->is_cpu_op()); 46 | 47 | rf_id = node->get_other_attr(attr); 48 | ASSERT_EQ(rf_id.int64_val(), 110); 49 | } 50 | 51 | TEST_F(ETFeederTest, ConstructorETFeederTest) { 52 | SetUp("tests/data/chakra.0.et"); 53 | std::shared_ptr node = trace->getNextIssuableNode(); 54 | std::vector> children = 55 | node->getChildren(); 56 | ASSERT_EQ(children[0]->id(), 217); 57 | ASSERT_EQ(children[1]->id(), 430); 58 | ASSERT_EQ(children[2]->id(), 435); 59 | } 60 | 61 | TEST_F(ETFeederTest, RemoveTest) { 62 | SetUp("tests/data/chakra.0.et"); 63 | std::shared_ptr node = trace->lookupNode(216); 64 | ASSERT_EQ(node->id(), 216); 65 | trace->removeNode(216); 66 | freopen("/dev/null", "w", stderr); 67 | try { 68 | node = trace->lookupNode(216); 69 | ASSERT_TRUE(false) << "node should be removed \n"; 70 | } catch (const std::exception& e) { 71 | // this is the desired behaviour 72 | } 73 | freopen("/dev/tty", "w", stderr); 74 | } 75 | 76 | TEST_F(ETFeederTest, RemoveAndGetNextTest) { 77 | SetUp("tests/data/chakra.0.et"); 78 | std::shared_ptr node = trace->lookupNode(216); 79 | ASSERT_EQ(node->id(), 216); 80 | trace->removeNode(216); 81 | node = trace->getNextIssuableNode(); 82 | ASSERT_EQ(node->id(), 216); 83 | } 84 | 85 | TEST_F(ETFeederTest, FreeChildrenTest) { 86 | SetUp("tests/data/chakra.0.et"); 87 | std::shared_ptr node = trace->lookupNode(216); 88 | ASSERT_EQ(node->id(), 216); 89 | trace->freeChildrenNodes(216); 90 | node = trace->getNextIssuableNode(); 91 | ASSERT_EQ(node->id(), 216); 92 | node = trace->getNextIssuableNode(); 93 | ASSERT_EQ(node->id(), 217); 94 | } 95 | 96 | TEST_F(ETFeederTest, HasNodesToIssueTest) { 97 | SetUp("tests/data/chakra.0.et"); 98 | std::shared_ptr node = trace->getNextIssuableNode(); 99 | ASSERT_EQ(node->id(), 216); 100 | ASSERT_TRUE(trace->hasNodesToIssue()); 101 | trace->removeNode(5); 102 | ASSERT_TRUE(trace->hasNodesToIssue()); 103 | } 104 | 105 | TEST_F(ETFeederTest, PushBackIssuableNodeTest) { 106 | SetUp("tests/data/chakra.0.et"); 107 | std::shared_ptr node; 108 | trace->pushBackIssuableNode(217); 109 | node = trace->getNextIssuableNode(); 110 | ASSERT_EQ(node->id(), 216); 111 | node = trace->getNextIssuableNode(); 112 | ASSERT_EQ(node->id(), 217); 113 | } 114 | 115 | TEST_F(ETFeederTest, AddNodeTest) { 116 | SetUp("tests/data/chakra.0.et"); 117 | std::shared_ptr node; 118 | node = trace->lookupNode(216); 119 | trace->removeNode(216); 120 | trace->addNode(node); 121 | std::shared_ptr node2; 122 | node2 = trace->lookupNode(216); 123 | ASSERT_EQ(node2->id(), 216); 124 | } 125 | 126 | TEST_F(ETFeederTest, NodeGetChildrenTest) { 127 | SetUp("tests/data/chakra.0.et"); 128 | std::shared_ptr node; 129 | node = trace->lookupNode(216); 130 | std::vector> children = 131 | node->getChildren(); 132 | ASSERT_EQ(children[0]->id(), 217); 133 | ASSERT_EQ(children[2]->id(), 435); 134 | } 135 | 136 | int main(int argc, char** argv) { 137 | ::testing::InitGoogleTest(&argc, argv); 138 | return RUN_ALL_TESTS(); 139 | } -------------------------------------------------------------------------------- /.github/workflows/codeql.yml: -------------------------------------------------------------------------------- 1 | # For most projects, this workflow file will not need changing; you simply need 2 | # to commit it to your repository. 3 | # 4 | # You may wish to alter this file to override the set of languages analyzed, 5 | # or to provide custom queries or build logic. 6 | # 7 | # ******** NOTE ******** 8 | # We have attempted to detect the languages in your repository. Please check 9 | # the `language` matrix defined below to confirm you have the correct set of 10 | # supported CodeQL languages. 11 | # 12 | name: "CodeQL" 13 | 14 | on: 15 | push: 16 | branches: [ "main" ] 17 | pull_request: 18 | branches: [ "main" ] 19 | # schedule: 20 | # - cron: '36 19 * * 3' 21 | 22 | jobs: 23 | analyze: 24 | name: Analyze (${{ matrix.language }}) 25 | # Runner size impacts CodeQL analysis time. To learn more, please see: 26 | # - https://gh.io/recommended-hardware-resources-for-running-codeql 27 | # - https://gh.io/supported-runners-and-hardware-resources 28 | # - https://gh.io/using-larger-runners (GitHub.com only) 29 | # Consider using larger runners or machines with greater resources for possible analysis time improvements. 30 | runs-on: ${{ (matrix.language == 'swift' && 'macos-latest') || 'ubuntu-latest' }} 31 | timeout-minutes: ${{ (matrix.language == 'swift' && 120) || 360 }} 32 | permissions: 33 | # required for all workflows 34 | security-events: write 35 | 36 | # required to fetch internal or private CodeQL packs 37 | packages: read 38 | 39 | # only required for workflows in private repositories 40 | actions: read 41 | contents: read 42 | 43 | strategy: 44 | fail-fast: false 45 | matrix: 46 | include: 47 | - language: c-cpp 48 | build-mode: manual 49 | - language: python 50 | build-mode: none 51 | # CodeQL supports the following values keywords for 'language': 'c-cpp', 'csharp', 'go', 'java-kotlin', 'javascript-typescript', 'python', 'ruby', 'swift' 52 | # Use `c-cpp` to analyze code written in C, C++ or both 53 | # Use 'java-kotlin' to analyze code written in Java, Kotlin or both 54 | # Use 'javascript-typescript' to analyze code written in JavaScript, TypeScript or both 55 | # To learn more about changing the languages that are analyzed or customizing the build mode for your analysis, 56 | # see https://docs.github.com/en/code-security/code-scanning/creating-an-advanced-setup-for-code-scanning/customizing-your-advanced-setup-for-code-scanning. 57 | # If you are analyzing a compiled language, you can modify the 'build-mode' for that language to customize how 58 | # your codebase is analyzed, see https://docs.github.com/en/code-security/code-scanning/creating-an-advanced-setup-for-code-scanning/codeql-code-scanning-for-compiled-languages 59 | steps: 60 | - name: Checkout repository 61 | uses: actions/checkout@v4 62 | 63 | # Initializes the CodeQL tools for scanning. 64 | - name: Initialize CodeQL 65 | uses: github/codeql-action/init@v3 66 | with: 67 | languages: ${{ matrix.language }} 68 | build-mode: ${{ matrix.build-mode }} 69 | queries: security-extended 70 | # If you wish to specify custom queries, you can do so here or in a config file. 71 | # By default, queries listed here will override any specified in a config file. 72 | # Prefix the list here with "+" to use these queries and those in the config file. 73 | 74 | # For more details on CodeQL's query packs, refer to: https://docs.github.com/en/code-security/code-scanning/automatically-scanning-your-code-for-vulnerabilities-and-errors/configuring-code-scanning#using-queries-in-ql-packs 75 | # queries: security-extended,security-and-quality 76 | 77 | # If the analyze step fails for one of the languages you are analyzing with 78 | # "We were unable to automatically build your code", modify the matrix above 79 | # to set the build mode to "manual" for that language. Then modify this step 80 | # to build your code. 81 | # ℹ️ Command-line programs to run using the OS shell. 82 | # 📚 See https://docs.github.com/en/actions/using-workflows/workflow-syntax-for-github-actions#jobsjob_idstepsrun 83 | - if: matrix.build-mode == 'manual' 84 | run: | 85 | sudo apt update 86 | sudo apt install protobuf-compiler libprotobuf-dev 87 | SCRIPT_DIR=. 88 | BUILD_DIR="${SCRIPT_DIR:?}"/build 89 | CHAKRA_ET_DIR="${SCRIPT_DIR:?}"/schema/protobuf 90 | protoc et_def.proto \ 91 | --proto_path="${CHAKRA_ET_DIR:?}" \ 92 | --cpp_out="${CHAKRA_ET_DIR:?}" 93 | g++ -shared -fPIC -Wall src/feeder/et_feeder.cpp src/feeder/et_feeder_node.cpp src/third_party/utils/protoio.cc schema/protobuf/et_def.pb.cc -o libfeeder.so -lprotobuf -I . -I src/feeder -I src/third_party/utils -I schema/protobuf 94 | 95 | - name: Perform CodeQL Analysis 96 | uses: github/codeql-action/analyze@v3 97 | with: 98 | category: "/language:${{matrix.language}}" 99 | -------------------------------------------------------------------------------- /src/feeder/json_node.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | 11 | using json = nlohmann::json; 12 | 13 | enum NodeType : int { 14 | INVALID_NODE = 0, 15 | METADATA_NODE = 1, 16 | MEM_LOAD_NODE = 2, 17 | MEM_STORE_NODE = 3, 18 | COMP_NODE = 4, 19 | COMM_SEND_NODE = 5, 20 | COMM_RECV_NODE = 6, 21 | COMM_COLL_NODE = 7 22 | }; 23 | 24 | class JSONNode { 25 | private: 26 | uint64_t node_id; 27 | std::string node_name; 28 | int node_type; 29 | bool is_cpu_op; 30 | uint64_t runtime; 31 | uint64_t num_ops; 32 | uint64_t tensor_size; 33 | int64_t comm_type; 34 | uint32_t comm_priority; 35 | uint64_t comm_size; 36 | uint32_t comm_src; 37 | uint32_t comm_dst; 38 | uint32_t comm_tag; 39 | 40 | public: 41 | std::vector data_deps{}; 42 | std::vector dep_unresolved_parent_ids_json{}; 43 | std::vector children_vec_json{}; 44 | 45 | // Compare function for set 46 | struct CompareJSONNodesLT { 47 | bool operator()(const JSONNode& a, const JSONNode& b) const { 48 | return a.node_id < b.node_id; 49 | } 50 | }; 51 | std::set children_set_json{}; 52 | 53 | JSONNode(); 54 | JSONNode(const JSONNode& t); 55 | JSONNode(json data, uint64_t id); 56 | uint64_t id() const; 57 | std::string name() const; 58 | int type() const; 59 | bool isCPUOp() const; 60 | uint64_t getRuntime() const; 61 | uint64_t getNumOps() const; 62 | uint64_t getTensorSize() const; 63 | int64_t getCommType() const; 64 | uint32_t getCommPriority() const; 65 | uint64_t getCommSize() const; 66 | uint32_t getCommSrc() const; 67 | uint32_t getCommDst() const; 68 | uint32_t getCommTag() const; 69 | void addDepUnresolvedParentID(uint64_t node_id); 70 | std::vector getDepUnresolvedParentIDs(); 71 | void setDepUnresolvedParentIDs( 72 | std::vector const& dep_unresolved_parent_ids); 73 | void addChild(JSONNode node); 74 | std::vector getChildren(); 75 | 76 | // Define the == operator for comparison 77 | bool operator==(const JSONNode& other) const { 78 | return node_id == other.node_id && node_name == other.node_name && 79 | node_type == other.node_type && is_cpu_op == other.is_cpu_op && 80 | runtime == other.runtime && num_ops == other.num_ops && 81 | tensor_size == other.tensor_size && comm_type == other.comm_type && 82 | comm_priority == other.comm_priority && comm_size == other.comm_size && 83 | comm_src == other.comm_src && comm_dst == other.comm_dst && 84 | comm_tag == other.comm_tag && data_deps == other.data_deps && 85 | dep_unresolved_parent_ids_json == 86 | other.dep_unresolved_parent_ids_json && 87 | children_vec_json == other.children_vec_json && 88 | children_set_json == other.children_set_json; 89 | } 90 | 91 | // Overload the assignment operator 92 | JSONNode& operator=(const JSONNode& other) { 93 | if (this != &other) { 94 | // Copy all member variables 95 | node_id = other.node_id; 96 | node_name = other.node_name; 97 | node_type = other.node_type; 98 | is_cpu_op = other.is_cpu_op; 99 | runtime = other.runtime; 100 | num_ops = other.num_ops; 101 | tensor_size = other.tensor_size; 102 | comm_type = other.comm_type; 103 | comm_priority = other.comm_priority; 104 | comm_size = other.comm_size; 105 | comm_src = other.comm_src; 106 | comm_dst = other.comm_dst; 107 | comm_tag = other.comm_tag; 108 | data_deps = other.data_deps; 109 | dep_unresolved_parent_ids_json = other.dep_unresolved_parent_ids_json; 110 | children_vec_json = other.children_vec_json; 111 | children_set_json = other.children_set_json; 112 | } 113 | return *this; 114 | } 115 | }; 116 | 117 | // Define a custom hash function for unordered set 118 | namespace std { 119 | template <> 120 | struct hash { 121 | std::size_t operator()(const JSONNode& node) const { 122 | std::size_t h1 = std::hash()(node.id()); 123 | std::size_t h2 = std::hash()(node.name()); 124 | std::size_t h3 = std::hash()(node.type()); 125 | std::size_t h4 = std::hash()(node.isCPUOp()); 126 | std::size_t h5 = std::hash()(node.getRuntime()); 127 | 128 | // A prime number for bit manipulation 129 | const std::size_t prime = 31; 130 | 131 | // Combine the hash of the current member with the hashes of the previous 132 | // members 133 | std::size_t hash = h1; 134 | hash = hash * prime + h2; 135 | hash = hash * prime + h3; 136 | hash = hash * prime + h4; 137 | hash = hash * prime + h5; 138 | 139 | return hash; 140 | } 141 | }; 142 | } // namespace std 143 | 144 | // Compare function for JSON node for priority queue 145 | struct CompareJSONNodesGT 146 | : public std::binary_function { 147 | bool operator()(const JSONNode lhs, const JSONNode rhs) const { 148 | return lhs.id() > rhs.id(); 149 | } 150 | }; -------------------------------------------------------------------------------- /src/converter/converter.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import logging 3 | 4 | from .pytorch_converter import PyTorchConverter 5 | from .text_converter import TextConverter 6 | 7 | 8 | def setup_logging(log_filename: str) -> None: 9 | """Set up logging to file and stream handlers.""" 10 | formatter = logging.Formatter("%(levelname)s [%(asctime)s] %(message)s", datefmt="%m/%d/%Y %I:%M:%S %p") 11 | 12 | file_handler = logging.FileHandler(log_filename, mode="w") 13 | file_handler.setLevel(logging.DEBUG) 14 | file_handler.setFormatter(formatter) 15 | 16 | stream_handler = logging.StreamHandler() 17 | stream_handler.setLevel(logging.WARNING) 18 | stream_handler.setFormatter(formatter) 19 | 20 | stream_handler = logging.StreamHandler() 21 | stream_handler.setLevel(logging.INFO) 22 | stream_handler.setFormatter(formatter) 23 | 24 | logging.basicConfig(level=logging.DEBUG, handlers=[file_handler, stream_handler]) 25 | 26 | 27 | def convert_text(args: argparse.Namespace) -> None: 28 | """Convert text input trace to Chakra execution trace.""" 29 | converter = TextConverter(args.input, args.output, args.num_npus, args.num_passes) 30 | converter.convert() 31 | 32 | 33 | def convert_pytorch(args: argparse.Namespace) -> None: 34 | """Convert PyTorch input trace to Chakra execution trace.""" 35 | converter = PyTorchConverter() 36 | converter.convert(args.input, args.output, args.simulate) 37 | 38 | 39 | def main() -> None: 40 | """Convert to Chakra execution trace in the protobuf format.""" 41 | parser = argparse.ArgumentParser( 42 | description=( 43 | "Chakra execution trace converter for simulators. This converter is designed for any downstream " 44 | "simulators that take Chakra execution traces in the protobuf format. This converter takes an input file " 45 | "in another format and generates a Chakra execution trace output in the protobuf format." 46 | ) 47 | ) 48 | 49 | parser.add_argument("--log-filename", type=str, default="debug.log", help="Log filename") 50 | 51 | subparsers = parser.add_subparsers(title="subcommands", description="Valid subcommands", help="Input type") 52 | 53 | pytorch_parser = subparsers.add_parser( 54 | "PyTorch", 55 | help="Convert Chakra host + device execution trace in JSON to Chakra host + device execution trace in the " 56 | "Chakra schema with protobuf format", 57 | ) 58 | pytorch_parser.add_argument( 59 | "--input", type=str, required=True, help="Input Chakra host + device traces in the JSON format" 60 | ) 61 | pytorch_parser.add_argument( 62 | "--output", type=str, required=True, help="Output Chakra host + device traces in the protobuf format" 63 | ) 64 | pytorch_parser.add_argument( 65 | "--simulate", 66 | action="store_true", 67 | help=( 68 | "Enable simulation of operators after the conversion for validation and debugging purposes. This option " 69 | "allows simulation of traces without running them through a simulator. Users can validate the converter " 70 | "or simulator against actual measured values using tools like chrome://tracing or https://perfetto.dev/. " 71 | "Read the duration of the timeline and compare the total execution time against the final simulation time " 72 | "of a trace. Disabled by default because it takes a long time." 73 | ), 74 | ) 75 | pytorch_parser.set_defaults(func=convert_pytorch) 76 | 77 | text_parser = subparsers.add_parser( 78 | "Text", help="Convert text-based model description to Chakra schema-based traces in the protobuf format" 79 | ) 80 | text_parser.add_argument( 81 | "--input", 82 | type=str, 83 | required=True, 84 | help=( 85 | "Input file in the text format that describes a model. This follows the text format used in ASTRA-sim: " 86 | "https://github.com/astra-sim/astra-sim" 87 | ), 88 | ) 89 | text_parser.add_argument( 90 | "--output", type=str, required=True, help="Output Chakra execution trace filename in the protobuf format" 91 | ) 92 | text_parser.add_argument( 93 | "--num-npus", 94 | type=int, 95 | required=True, 96 | help="Number of NPUs in a system. Determines the number of traces the converter generates", 97 | ) 98 | text_parser.add_argument( 99 | "--num-passes", 100 | type=int, 101 | required=True, 102 | help=( 103 | "Number of loops when generating traces based on the text input file. Increasing the number of passes " 104 | "increases the number of training iterations for a given text input." 105 | ), 106 | ) 107 | text_parser.set_defaults(func=convert_text) 108 | 109 | args = parser.parse_args() 110 | 111 | if "func" in args: 112 | setup_logging(args.log_filename) 113 | args.func(args) 114 | logging.info(f"Conversion successful. Output file is available at {args.output}.") 115 | else: 116 | parser.print_help() 117 | 118 | 119 | if __name__ == "__main__": 120 | main() 121 | -------------------------------------------------------------------------------- /src/feeder/json_node.cpp: -------------------------------------------------------------------------------- 1 | #include "json_node.h" 2 | 3 | // JSONNode default constructor 4 | JSONNode::JSONNode() {} 5 | 6 | // JSONNode copy constructor 7 | JSONNode::JSONNode(const JSONNode& t) { 8 | node_id = t.node_id; 9 | node_name = t.node_name; 10 | node_type = t.node_type; 11 | is_cpu_op = t.is_cpu_op; 12 | runtime = t.runtime; 13 | data_deps = t.data_deps; 14 | dep_unresolved_parent_ids_json = t.dep_unresolved_parent_ids_json; 15 | children_vec_json = t.children_vec_json; 16 | children_set_json = t.children_set_json; 17 | 18 | if (node_type == NodeType::COMM_SEND_NODE || 19 | node_type == NodeType::COMM_RECV_NODE || 20 | node_type == NodeType::COMM_COLL_NODE) { 21 | tensor_size = t.tensor_size; 22 | comm_type = t.comm_type; 23 | comm_priority = t.comm_priority; 24 | comm_size = t.comm_size; 25 | comm_src = t.comm_src; 26 | comm_dst = t.comm_dst; 27 | comm_tag = t.comm_tag; 28 | } 29 | } 30 | 31 | // JSONNode constructor 32 | JSONNode::JSONNode(json data, uint64_t id) { 33 | try { 34 | node_id = data["workload_graph"][id]["Id"]; 35 | } catch (...) { 36 | std::cerr << "node_id not specified in ET" << std::endl; 37 | } 38 | try { 39 | node_name = data["workload_graph"][id]["Name"]; 40 | } catch (...) { 41 | std::cerr << "node_name not specified in ET" << std::endl; 42 | } 43 | try { 44 | node_type = data["workload_graph"][id]["NodeType"]; 45 | } catch (...) { 46 | std::cerr << "node_type not specified in ET" << std::endl; 47 | } 48 | try { 49 | is_cpu_op = data["workload_graph"][id]["is_cpu_op"]; 50 | } catch (...) { 51 | std::cerr << "is_cpu_op not specified in ET" << std::endl; 52 | } 53 | try { 54 | runtime = data["workload_graph"][id]["runtime"]; 55 | } catch (...) { 56 | } 57 | try { 58 | data_deps = 59 | data["workload_graph"][id]["data_deps"].get>(); 60 | } catch (...) { 61 | std::cerr << "data deps not specified in ET" << std::endl; 62 | } 63 | 64 | if (node_type == NodeType::COMM_SEND_NODE || 65 | node_type == NodeType::COMM_RECV_NODE || 66 | node_type == NodeType::COMM_COLL_NODE) { 67 | try { 68 | tensor_size = data["workload_graph"][id]["tensor_size"]; 69 | } catch (...) { 70 | } 71 | try { 72 | comm_type = data["workload_graph"][id]["comm_type"]; 73 | } catch (...) { 74 | } 75 | try { 76 | comm_priority = data["workload_graph"][id]["comm_priority"]; 77 | } catch (...) { 78 | comm_priority = 0; // Protobuf defaults to 0 79 | } 80 | try { 81 | comm_size = data["workload_graph"][id]["comm_size"]; 82 | } catch (...) { 83 | } 84 | try { 85 | comm_src = data["workload_graph"][id]["comm_src"]; 86 | } catch (...) { 87 | } 88 | try { 89 | comm_dst = data["workload_graph"][id]["comm_dst"]; 90 | } catch (...) { 91 | } 92 | try { 93 | comm_tag = data["workload_graph"][id]["comm_tag"]; 94 | } catch (...) { 95 | } 96 | } 97 | } 98 | 99 | // Node id 100 | uint64_t JSONNode::id() const { 101 | return node_id; 102 | } 103 | 104 | // Node name 105 | std::string JSONNode::name() const { 106 | return node_name; 107 | } 108 | 109 | // Node type 110 | int JSONNode::type() const { 111 | return node_type; 112 | } 113 | 114 | // Check if CPU OP 115 | bool JSONNode::isCPUOp() const { 116 | return is_cpu_op; 117 | } 118 | 119 | // Runtime 120 | uint64_t JSONNode::getRuntime() const { 121 | return runtime; 122 | } 123 | 124 | // Num ops 125 | uint64_t JSONNode::getNumOps() const { 126 | return num_ops; 127 | } 128 | 129 | // Tensor size 130 | uint64_t JSONNode::getTensorSize() const { 131 | return tensor_size; 132 | } 133 | 134 | // Comm type 135 | int64_t JSONNode::getCommType() const { 136 | return comm_type; 137 | } 138 | 139 | // Comm priority 140 | uint32_t JSONNode::getCommPriority() const { 141 | return comm_priority; 142 | } 143 | 144 | // Comm size 145 | uint64_t JSONNode::getCommSize() const { 146 | return comm_size; 147 | } 148 | 149 | // Comm src 150 | uint32_t JSONNode::getCommSrc() const { 151 | return comm_src; 152 | } 153 | 154 | // Comm dst 155 | uint32_t JSONNode::getCommDst() const { 156 | return comm_dst; 157 | } 158 | 159 | // Comm tag 160 | uint32_t JSONNode::getCommTag() const { 161 | return comm_tag; 162 | } 163 | 164 | // Dependency unresolved parent IDs 165 | void JSONNode::addDepUnresolvedParentID(uint64_t node_id) { 166 | dep_unresolved_parent_ids_json.emplace_back(node_id); 167 | } 168 | 169 | // Get dependency unresolved parent IDs 170 | std::vector JSONNode::getDepUnresolvedParentIDs() { 171 | return dep_unresolved_parent_ids_json; 172 | } 173 | 174 | // Set dependency unresolved parent IDs 175 | void JSONNode::setDepUnresolvedParentIDs( 176 | std::vector const& dep_unresolved_parent_ids) { 177 | dep_unresolved_parent_ids_json = dep_unresolved_parent_ids; 178 | } 179 | 180 | // Add child 181 | void JSONNode::addChild(JSONNode node) { 182 | // Avoid adding the same child node multiple times 183 | // addChild is called multiple times to resolve dependencies 184 | if (children_set_json.find(node) != children_set_json.end()) { 185 | return; 186 | } 187 | children_vec_json.emplace_back(node); 188 | children_set_json.emplace(node); 189 | } 190 | 191 | // Get children vector 192 | std::vector JSONNode::getChildren() { 193 | return children_vec_json; 194 | } -------------------------------------------------------------------------------- /tests/feeder/wrapper_tests.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include "WrapperNode.h" 3 | 4 | class WrapperNodeTest : public ::testing::Test { 5 | protected: 6 | WrapperNodeTest() {} 7 | virtual ~WrapperNodeTest() {} 8 | 9 | void SetUp(const std::string& filename) { 10 | node.createWrapper(filename); 11 | } 12 | 13 | virtual void TearDown() { 14 | node.releaseMemory(); 15 | } 16 | 17 | WrapperNode node; 18 | }; 19 | 20 | TEST_F(WrapperNodeTest, ConstructorNodeIDTest) { 21 | // tests/data/small_chakra.0.json is a pruned dataset for quick tests 22 | // tests/data/chakra.0.json is the full dataset, which is also available 23 | SetUp("tests/data/small_chakra.0.json"); 24 | node.getNextIssuableNode(); 25 | uint64_t firstNodeID = node.getNodeID(); 26 | ASSERT_EQ(firstNodeID, 216); 27 | 28 | node.getNextIssuableNode(); 29 | uint64_t secondNodeID = node.getNodeID(); 30 | ASSERT_EQ(secondNodeID, 432); 31 | } 32 | 33 | TEST_F(WrapperNodeTest, ConstructorNodeValuesTest) { 34 | SetUp("tests/data/small_chakra.0.json"); 35 | node.getNextIssuableNode(); 36 | uint64_t firstNodeType = node.getNodeType(); 37 | ASSERT_EQ(firstNodeType, ChakraProtoMsg::COMP_NODE); 38 | ASSERT_TRUE(node.isCPUOp()); 39 | 40 | node.getNextIssuableNode(); 41 | uint64_t secondNodeType = node.getNodeType(); 42 | ASSERT_EQ(secondNodeType, ChakraProtoMsg::COMM_COLL_NODE); 43 | ASSERT_TRUE(node.isCPUOp()); 44 | } 45 | 46 | TEST_F(WrapperNodeTest, ConstructorWrapperNodeTest) { 47 | std::string filename = "tests/data/small_chakra.0.json"; 48 | std::string ext = filename.substr(filename.find_last_of(".") + 1); 49 | SetUp(filename); 50 | node.getNextIssuableNode(); 51 | if (ext == "et") { 52 | std::vector> children; 53 | node.getChildren(children); 54 | ASSERT_EQ(children[0]->id(), 217); 55 | ASSERT_EQ(children[1]->id(), 430); 56 | ASSERT_EQ(children[2]->id(), 435); 57 | } else if (ext == "json") { 58 | std::vector children; 59 | node.getChildren(children); 60 | ASSERT_EQ(children[0].id(), 217); 61 | ASSERT_EQ(children[1].id(), 430); 62 | ASSERT_EQ(children[2].id(), 435); 63 | } 64 | } 65 | 66 | TEST_F(WrapperNodeTest, RemoveTest) { 67 | SetUp("tests/data/small_chakra.0.json"); 68 | node.lookupNode(216); 69 | ASSERT_EQ(node.getNodeID(), 216); 70 | node.removeNode(216); 71 | freopen("/dev/null", "w", stderr); 72 | try { 73 | node.lookupNode(216); 74 | ASSERT_TRUE(false) << "node should be removed \n"; 75 | } catch (const std::exception& e) { 76 | // this is the desired behaviour 77 | } 78 | freopen("/dev/tty", "w", stderr); 79 | } 80 | 81 | TEST_F(WrapperNodeTest, RemoveAndGetNextTest) { 82 | SetUp("tests/data/small_chakra.0.json"); 83 | node.lookupNode(216); 84 | ASSERT_EQ(node.getNodeID(), 216); 85 | node.removeNode(216); 86 | node.getNextIssuableNode(); 87 | ASSERT_EQ(node.getNodeID(), 216); 88 | } 89 | 90 | TEST_F(WrapperNodeTest, FreeChildrenTest) { 91 | SetUp("tests/data/small_chakra.0.json"); 92 | node.lookupNode(216); 93 | ASSERT_EQ(node.getNodeID(), 216); 94 | node.freeChildrenNodes(216); 95 | node.getNextIssuableNode(); 96 | ASSERT_EQ(node.getNodeID(), 216); 97 | node.getNextIssuableNode(); 98 | ASSERT_EQ(node.getNodeID(), 217); 99 | } 100 | 101 | TEST_F(WrapperNodeTest, HasNodesToIssueTest) { 102 | SetUp("tests/data/small_chakra.0.json"); 103 | node.getNextIssuableNode(); 104 | ASSERT_EQ(node.getNodeID(), 216); 105 | ASSERT_TRUE(node.hasNodesToIssue()); 106 | node.removeNode(5); 107 | ASSERT_TRUE(node.hasNodesToIssue()); 108 | } 109 | 110 | TEST_F(WrapperNodeTest, PushBackIssuableNodeTest) { 111 | SetUp("tests/data/small_chakra.0.json"); 112 | node.pushBackIssuableNode(217); 113 | node.getNextIssuableNode(); 114 | ASSERT_EQ(node.getNodeID(), 216); 115 | node.getNextIssuableNode(); 116 | ASSERT_EQ(node.getNodeID(), 217); 117 | } 118 | 119 | TEST_F(WrapperNodeTest, AddNodeTest) { 120 | std::string filename = "tests/data/small_chakra.0.json"; 121 | std::string ext = filename.substr(filename.find_last_of(".") + 1); 122 | SetUp(filename); 123 | if (ext == "et") { 124 | std::shared_ptr pnode1; 125 | node.lookupNode(216); 126 | pnode1 = node.getProtobufNode(); 127 | node.removeNode(216); 128 | node.addNode(pnode1); 129 | std::shared_ptr pnode2; 130 | node.lookupNode(216); 131 | pnode2 = node.getProtobufNode(); 132 | ASSERT_EQ(pnode2->id(), 216); 133 | } else if (ext == "json") { 134 | JSON jnode1; 135 | node.lookupNode(216); 136 | jnode1 = node.getJSONNode(); 137 | node.removeNode(216); 138 | node.addNode(jnode1); 139 | JSONNode jnode2; 140 | node.lookupNode(216); 141 | jnode2 = node.getJSONNode(); 142 | ASSERT_EQ(jnode2.id(), 216); 143 | } 144 | } 145 | 146 | TEST_F(WrapperNodeTest, NodeGetChildrenTest) { 147 | std::string filename = "tests/data/small_chakra.0.json"; 148 | std::string ext = filename.substr(filename.find_last_of(".") + 1); 149 | SetUp(filename); 150 | node.lookupNode(216); 151 | if (ext == "et") { 152 | std::vector> children; 153 | node.getChildren(children); 154 | ASSERT_EQ(children[0]->id(), 217); 155 | ASSERT_EQ(children[2]->id(), 435); 156 | } else if (ext == "json") { 157 | std::vector children; 158 | node.getChildren(children); 159 | ASSERT_EQ(children[0].id(), 217); 160 | ASSERT_EQ(children[2].id(), 435); 161 | } 162 | } 163 | 164 | int main(int argc, char** argv) { 165 | ::testing::InitGoogleTest(&argc, argv); 166 | return RUN_ALL_TESTS(); 167 | } 168 | -------------------------------------------------------------------------------- /src/feeder/et_feeder.cpp: -------------------------------------------------------------------------------- 1 | #include "et_feeder.h" 2 | 3 | #include 4 | 5 | using namespace std; 6 | using namespace Chakra; 7 | 8 | ETFeeder::ETFeeder(string filename) 9 | : trace_(filename), window_size_(4096 * 256), et_complete_(false) { 10 | if (!trace_.is_open()) { // Assuming a method to check if file is open 11 | throw std::runtime_error("Failed to open trace file: " + filename); 12 | } 13 | 14 | try { 15 | readGlobalMetadata(); 16 | readNextWindow(); 17 | } catch (const std::exception& e) { 18 | cerr << "Error in constructor: " << e.what() << endl; 19 | throw; // Rethrow the exception for caller to handle 20 | } 21 | } 22 | 23 | ETFeeder::~ETFeeder() {} 24 | 25 | void ETFeeder::addNode(shared_ptr node) { 26 | dep_graph_[node->getChakraNode()->id()] = node; 27 | } 28 | 29 | void ETFeeder::removeNode(uint64_t node_id) { 30 | dep_graph_.erase(node_id); 31 | 32 | if (!et_complete_ && (dep_free_node_queue_.size() < window_size_)) { 33 | readNextWindow(); 34 | } 35 | } 36 | 37 | bool ETFeeder::hasNodesToIssue() { 38 | return !(dep_graph_.empty() && dep_free_node_queue_.empty()); 39 | } 40 | 41 | shared_ptr ETFeeder::getNextIssuableNode() { 42 | if (dep_free_node_queue_.size() != 0) { 43 | shared_ptr node = dep_free_node_queue_.top(); 44 | dep_free_node_id_set_.erase(node->getChakraNode()->id()); 45 | dep_free_node_queue_.pop(); 46 | return node; 47 | } else { 48 | return nullptr; 49 | } 50 | } 51 | 52 | void ETFeeder::pushBackIssuableNode(uint64_t node_id) { 53 | shared_ptr node = dep_graph_[node_id]; 54 | dep_free_node_id_set_.emplace(node_id); 55 | dep_free_node_queue_.emplace(node); 56 | } 57 | 58 | shared_ptr ETFeeder::lookupNode(uint64_t node_id) { 59 | try { 60 | return dep_graph_.at(node_id); 61 | } catch (const std::out_of_range& e) { 62 | std::cerr << "looking for node_id=" << node_id 63 | << " in dep graph, however, not loaded yet" << std::endl; 64 | throw(e); 65 | } 66 | } 67 | 68 | void ETFeeder::freeChildrenNodes(uint64_t node_id) { 69 | shared_ptr node = dep_graph_[node_id]; 70 | for (auto child : node->getChildren()) { 71 | auto child_chakra = child->getChakraNode(); 72 | for (auto it = child_chakra->mutable_data_deps()->begin(); 73 | it != child_chakra->mutable_data_deps()->end(); 74 | ++it) { 75 | if (*it == node_id) { 76 | child_chakra->mutable_data_deps()->erase(it); 77 | break; 78 | } 79 | } 80 | if (child_chakra->data_deps().size() == 0) { 81 | dep_free_node_id_set_.emplace(child_chakra->id()); 82 | dep_free_node_queue_.emplace(child); 83 | } 84 | } 85 | } 86 | 87 | void ETFeeder::readGlobalMetadata() { 88 | if (!trace_.is_open()) { 89 | throw runtime_error( 90 | "Trace file closed unexpectedly during reading global metadata."); 91 | } 92 | shared_ptr pkt_msg = 93 | make_shared(); 94 | trace_.read(*pkt_msg); 95 | } 96 | 97 | shared_ptr ETFeeder::readNode() { 98 | shared_ptr pkt_msg = 99 | make_shared(); 100 | if (!trace_.read(*pkt_msg)) { 101 | return nullptr; 102 | } 103 | shared_ptr node = make_shared(pkt_msg); 104 | 105 | bool dep_unresolved = false; 106 | for (int i = 0; i < pkt_msg->data_deps_size(); ++i) { 107 | auto parent_node = dep_graph_.find(pkt_msg->data_deps(i)); 108 | if (parent_node != dep_graph_.end()) { 109 | parent_node->second->addChild(node); 110 | } else { 111 | dep_unresolved = true; 112 | node->addDepUnresolvedParentID(pkt_msg->data_deps(i)); 113 | } 114 | } 115 | 116 | if (dep_unresolved) { 117 | dep_unresolved_node_set_.emplace(node); 118 | } 119 | 120 | return node; 121 | } 122 | 123 | void ETFeeder::resolveDep() { 124 | for (auto it = dep_unresolved_node_set_.begin(); 125 | it != dep_unresolved_node_set_.end();) { 126 | shared_ptr node = *it; 127 | vector dep_unresolved_parent_ids = 128 | node->getDepUnresolvedParentIDs(); 129 | for (auto inner_it = dep_unresolved_parent_ids.begin(); 130 | inner_it != dep_unresolved_parent_ids.end();) { 131 | auto parent_node = dep_graph_.find(*inner_it); 132 | if (parent_node != dep_graph_.end()) { 133 | parent_node->second->addChild(node); 134 | inner_it = dep_unresolved_parent_ids.erase(inner_it); 135 | } else { 136 | ++inner_it; 137 | } 138 | } 139 | if (dep_unresolved_parent_ids.size() == 0) { 140 | it = dep_unresolved_node_set_.erase(it); 141 | } else { 142 | node->setDepUnresolvedParentIDs(dep_unresolved_parent_ids); 143 | ++it; 144 | } 145 | } 146 | } 147 | 148 | void ETFeeder::readNextWindow() { 149 | if (!trace_.is_open()) { 150 | throw runtime_error( 151 | "Trace file closed unexpectedly during reading next window."); 152 | } 153 | uint32_t num_read = 0; 154 | do { 155 | shared_ptr new_node = readNode(); 156 | if (new_node == nullptr) { 157 | et_complete_ = true; 158 | break; 159 | } 160 | 161 | addNode(new_node); 162 | ++num_read; 163 | 164 | resolveDep(); 165 | } while ((num_read < window_size_) || (dep_unresolved_node_set_.size() != 0)); 166 | 167 | for (auto node_id_node : dep_graph_) { 168 | uint64_t node_id = node_id_node.first; 169 | shared_ptr node = node_id_node.second; 170 | if ((dep_free_node_id_set_.count(node_id) == 0) && 171 | (node->getChakraNode()->data_deps().size() == 0)) { 172 | dep_free_node_id_set_.emplace(node_id); 173 | dep_free_node_queue_.emplace(node); 174 | } 175 | } 176 | } -------------------------------------------------------------------------------- /tests/converter/test_pytorch_node.py: -------------------------------------------------------------------------------- 1 | import json 2 | import tarfile 3 | from pathlib import Path 4 | from typing import Any, Dict 5 | 6 | import pytest 7 | from chakra.src.converter.pytorch_node import PyTorchNode 8 | 9 | 10 | @pytest.fixture 11 | def extract_tar_gz_file(tmp_path: Path) -> Path: 12 | """ 13 | Fixture to extract a tar.gz file to a temporary directory. 14 | 15 | Args: 16 | tmp_path (Path): Temporary directory path provided by pytest. 17 | 18 | Returns: 19 | Path: Path to the extracted directory. 20 | """ 21 | tar_gz_file = Path("tests/data/1.0.2-chakra.0.0.4.tgz") 22 | extracted_dir = tmp_path / "extracted" 23 | extracted_dir.mkdir() 24 | 25 | with tarfile.open(tar_gz_file, "r:gz") as tar: 26 | tar.extractall(path=extracted_dir) 27 | 28 | return extracted_dir 29 | 30 | 31 | def load_pytorch_execution_traces(file_path: str) -> Dict[str, Any]: 32 | """ 33 | Loads PyTorch execution traces from a file. 34 | 35 | Args: 36 | file_path (str): Path to the PyTorch execution trace file. 37 | 38 | Returns: 39 | Dict[str, Any]: Parsed PyTorch execution trace data. 40 | """ 41 | with open(file_path, "r") as pytorch_et: 42 | return json.load(pytorch_et) 43 | 44 | 45 | def test_pytorch_node_parsing(extract_tar_gz_file: Path) -> None: 46 | """ 47 | Test to check if PyTorchNode can parse nodes properly from the extracted data. 48 | 49 | Args: 50 | extract_tar_gz_file (Path): Path to the extracted directory containing 51 | the PyTorch execution trace file. 52 | """ 53 | pytorch_trace_file = extract_tar_gz_file / "1.0.2-chakra.0.0.4.json" 54 | pytorch_et_data = load_pytorch_execution_traces(str(pytorch_trace_file)) 55 | 56 | pytorch_schema = pytorch_et_data["schema"] 57 | pytorch_nodes = pytorch_et_data["nodes"] 58 | 59 | for node_data in pytorch_nodes: 60 | node = PyTorchNode(pytorch_schema, node_data) 61 | assert node is not None # Check if node is instantiated properly 62 | 63 | 64 | @pytest.fixture 65 | def sample_node_data_1_0_2_chakra_0_0_4() -> Dict: 66 | return { 67 | "id": 1, 68 | "name": "node1", 69 | "ctrl_deps": None, 70 | "inputs": {"values": "values", "shapes": "shapes", "types": "types"}, 71 | "outputs": {"values": "values", "shapes": "shapes", "types": "types"}, 72 | "attrs": [ 73 | {"name": "rf_id", "type": "uint64", "value": 0}, 74 | {"name": "fw_parent", "type": "uint64", "value": 0}, 75 | {"name": "seq_id", "type": "int64", "value": -1}, 76 | {"name": "scope", "type": "uint64", "value": 7}, 77 | {"name": "tid", "type": "uint64", "value": 1}, 78 | {"name": "fw_tid", "type": "uint64", "value": 0}, 79 | {"name": "op_schema", "type": "string", "value": ""}, 80 | ], 81 | "exclusive_dur": 50, 82 | } 83 | 84 | 85 | @pytest.fixture 86 | def sample_node_data_1_0_3_chakra_0_0_4() -> Dict: 87 | return { 88 | "id": 2, 89 | "name": "node2", 90 | "ctrl_deps": 1, 91 | "inputs": {"values": [], "shapes": [], "types": []}, 92 | "outputs": {"values": [], "shapes": [], "types": []}, 93 | "attrs": [ 94 | {"name": "rf_id", "type": "uint64", "value": 2}, 95 | {"name": "fw_parent", "type": "uint64", "value": 0}, 96 | {"name": "seq_id", "type": "int64", "value": -1}, 97 | {"name": "scope", "type": "uint64", "value": 7}, 98 | {"name": "tid", "type": "uint64", "value": 1}, 99 | {"name": "fw_tid", "type": "uint64", "value": 0}, 100 | {"name": "op_schema", "type": "string", "value": ""}, 101 | ], 102 | "exclusive_dur": 30, 103 | } 104 | 105 | 106 | @pytest.fixture 107 | def sample_node_data_unsupported_schema() -> Dict: 108 | return { 109 | "id": 4, 110 | "name": "## process_group:init ##", 111 | "ctrl_deps": 3, 112 | "inputs": { 113 | "values": [], 114 | "shapes": [[]], 115 | "types": ["String"], 116 | }, 117 | "outputs": {"values": [], "shapes": [], "types": []}, 118 | "attrs": [ 119 | {"name": "rf_id", "type": "uint64", "value": 2}, 120 | {"name": "fw_parent", "type": "uint64", "value": 0}, 121 | {"name": "seq_id", "type": "int64", "value": -1}, 122 | {"name": "scope", "type": "uint64", "value": 7}, 123 | {"name": "tid", "type": "uint64", "value": 1}, 124 | {"name": "fw_tid", "type": "uint64", "value": 0}, 125 | {"name": "op_schema", "type": "string", "value": ""}, 126 | ], 127 | "exclusive_dur": 40, 128 | } 129 | 130 | 131 | def test_pytorch_node_parsing_1_0_2_chakra_0_0_4(sample_node_data_1_0_2_chakra_0_0_4) -> None: 132 | schema = "1.0.2-chakra.0.0.4" 133 | node = PyTorchNode(schema, sample_node_data_1_0_2_chakra_0_0_4) 134 | assert node is not None 135 | assert node.schema == schema 136 | assert isinstance(node.id, int) 137 | assert isinstance(node.name, str) 138 | assert node.exclusive_dur == 50 139 | 140 | 141 | def test_pytorch_node_parsing_1_0_3_chakra_0_0_4(sample_node_data_1_0_3_chakra_0_0_4) -> None: 142 | schema = "1.0.3-chakra.0.0.4" 143 | node = PyTorchNode(schema, sample_node_data_1_0_3_chakra_0_0_4) 144 | assert node is not None 145 | assert node.schema == schema 146 | assert isinstance(node.id, int) 147 | assert isinstance(node.name, str) 148 | assert node.exclusive_dur == 30 149 | 150 | 151 | def test_pytorch_node_unsupported_schema(sample_node_data_unsupported_schema) -> None: 152 | schema = "9999.9999.9999-chakra.0.0.4" 153 | with pytest.raises(ValueError, match=f"Unsupported schema version '{schema}'"): 154 | PyTorchNode(schema, sample_node_data_unsupported_schema) 155 | -------------------------------------------------------------------------------- /USER_GUIDE.md: -------------------------------------------------------------------------------- 1 | # Chakra User Guide 2 | ## Installation 3 | ### Step 1: Set up a Virtual Environment 4 | It's advisable to create a virtual environment using Python 3.10.2. 5 | 6 | ```bash 7 | # Create a virtual environment 8 | $ python3 -m venv chakra_env 9 | 10 | # Activate the virtual environment 11 | $ source chakra_env/bin/activate 12 | ``` 13 | 14 | ### Step 2: Install Chakra 15 | With the virtual environment activated, install the Chakra package using pip. 16 | 17 | ```bash 18 | # Install package from source 19 | $ pip install . 20 | 21 | # Install latest from GitHub 22 | $ pip install https://github.com/mlcommons/chakra/archive/refs/heads/main.zip 23 | 24 | # Install specific revision from GitHub 25 | $ pip install https://github.com/mlcommons/chakra/archive/ae7c671db702eb1384015bb2618dc753eed787f2.zip 26 | ``` 27 | 28 | ### Step 3: Install PARAM 29 | Installing PARAM is necessary for Chakra to function properly as it imports essential components from it. 30 | 31 | ```bash 32 | $ git clone git@github.com:facebookresearch/param.git 33 | $ cd param/et_replay 34 | $ git checkout 7b19f586dd8b267333114992833a0d7e0d601630 35 | $ pip install . 36 | ``` 37 | 38 | ### Step 4: Install Holistic Trace Analysis 39 | Installing Holistic Trace Analysis is necessary for Trace link. 40 | 41 | ```bash 42 | $ git clone https://github.com/facebookresearch/HolisticTraceAnalysis.git 43 | $ cd HolisticTraceAnalysis 44 | $ git checkout d731cc2e2249976c97129d409a83bd53d93051f6 45 | $ git submodule update --init 46 | $ pip install -r requirements.txt 47 | $ pip install -e . 48 | ``` 49 | 50 | ### Step 5: Uninstalling Chakra 51 | To uninstall Chakra, use the following command within the virtual environment. 52 | 53 | ```bash 54 | $ pip uninstall chakra 55 | ``` 56 | 57 | ## Tools Overview 58 | ### Execution Trace Link (chakra_trace_link) 59 | Merge Chakra host execution trace and Chakra device execution trace to encode GPU operators into the output execution trace. 60 | ```bash 61 | $ chakra_trace_link \ 62 | --chakra-host-trace /path/to/chakra_host_trace \ 63 | --chakra-device-trace /path/to/chakra_device_trace \ 64 | --rank [RANK] \ 65 | --output-file /path/to/chakra_host_device_trace.json 66 | ``` 67 | 68 | ### Execution Trace Converter (chakra_converter) 69 | Converts the execution traces from `chakra_trace_link` into traces in the protobuf format. It is responsible for identifying and encoding dependencies for simulation as well. The converter is designed for any downstream simulators that take Chakra execution traces in the protobuf format. It takes an input file in another format and generates a Chakra execution trace output in the protobuf format. 70 | ```bash 71 | $ chakra_converter PyTorch \ 72 | --input /path/to/chakra_host_device_trace.json \ 73 | --output /path/to/chakra_trace \ 74 | [--simulate] \ 75 | ``` 76 | * --input: Path to the input file containing the merged Chakra host and device traces in JSON format. 77 | * --output: Path to the output file where the converted Chakra trace will be saved in protobuf format. 78 | * --simulate: (Optional) Enable simulation of operators after the conversion for validation and debugging purposes. This option allows simulation of traces without running them through a simulator. Users can validate the converter or simulator against actual measured values using tools like chrome://tracing or https://perfetto.dev/. Read the duration of the timeline and compare the total execution time against the final simulation time of a trace. Disabled by default because it takes a long time. 79 | 80 | ### Execution Trace Feeder (et_feeder) 81 | The Execution Trace Feeder (et_feeder) is a C++ library designed to feed Chakra traces into any compatible C++ simulator. This library specifically provides dependency-free nodes to a simulator, which must import the feeder as a library. Currently, ASTRA-sim is the only simulator that supports this trace feeder. Below are the commands to run execution traces on ASTRA-sim: 82 | ```bash 83 | $ git clone --recurse-submodules git@github.com:astra-sim/astra-sim.git 84 | $ cd astra-sim 85 | $ git checkout Chakra 86 | $ git submodule update --init --recursive 87 | $ cd extern/graph_frontend/chakra/ 88 | $ git checkout main 89 | $ cd - 90 | $ ./build/astra_analytical/build.sh -c 91 | 92 | $ cd extern/graph_frontend/chakra/ 93 | $ python -m chakra.et_generator.et_generator\ 94 | --num_npus 95 | 96 | $ cd - 97 | $ ./run.sh 98 | ``` 99 | 100 | ### Execution Trace Visualizer (chakra_visualizer) 101 | This tool visualizes execution traces in various formats. Here is an example command: 102 | 103 | ```bash 104 | $ chakra_visualizer \ 105 | --input_filename /path/to/chakra_et 106 | --output_filename /path/to/output.[graphml|pdf|dot] 107 | ``` 108 | 109 | ### Execution Trace Jsonizer (chakra_jsonizer) 110 | Provides a readable JSON format of execution traces: 111 | 112 | ```bash 113 | $ chakra_jsonizer \ 114 | --input_filename /path/to/chakra_et \ 115 | --output_filename /path/to/output_json 116 | ``` 117 | 118 | ### Timeline Visualizer (chakra_timeline_visualizer) 119 | Visualizes the execution timeline of traces. This tool serves as a reference implementation for visualizing the simulation of Chakra traces. After simulating Chakra traces, you can visualize the timeline of operator executions. Update the simulator to present when operators are issued and completed. Below is the format needed: 120 | ```csv 121 | issue,=npu_id,=curr_cycle,=node_id,=node_name 122 | callback,=npu_id,=curr_cycle,=node_id,=node_name 123 | ... 124 | ``` 125 | 126 | You can visualize the timeline with the command below. 127 | ```bash 128 | $ chakra_timeline_visualizer \ 129 | --input_filename /path/to/input.csv \ 130 | --output_filename /path/to/output.json \ 131 | --num_npus 4 \ 132 | --npu_frequency 1.5GHz 133 | ``` 134 | 135 | When you open the output file with `chrome://tracing`, you will see an execution timeline like the one below. 136 | ![](doc/timeline_visualizer.png) 137 | -------------------------------------------------------------------------------- /src/feeder/et_feeder_node.cpp: -------------------------------------------------------------------------------- 1 | #include "et_feeder_node.h" 2 | 3 | using namespace std; 4 | using namespace Chakra; 5 | 6 | ETFeederNode::ETFeederNode(std::shared_ptr node) { 7 | this->node_ = node; 8 | this->id_ = node->id(); 9 | this->name_ = node->name(); 10 | this->runtime_ = node->duration_micros(); 11 | this->is_cpu_op_ = 0; 12 | this->num_ops_ = 0; 13 | 14 | if (node->has_inputs()) { 15 | this->inputs_values_ = static_cast(node->inputs().values()); 16 | this->inputs_shapes_ = static_cast(node->inputs().shapes()); 17 | this->inputs_types_ = static_cast(node->inputs().types()); 18 | } 19 | 20 | if (node->has_outputs()) { 21 | this->outputs_values_ = static_cast(node->outputs().values()); 22 | this->outputs_shapes_ = static_cast(node->outputs().shapes()); 23 | this->outputs_types_ = static_cast(node->outputs().types()); 24 | } 25 | 26 | for (const auto& attr : node->attr()) { 27 | const string& attr_name = attr.name(); 28 | 29 | if (attr_name == "is_cpu_op") { 30 | this->is_cpu_op_ = static_cast(attr.bool_val()); 31 | } else if (attr_name == "num_ops") { 32 | this->num_ops_ = static_cast(attr.int64_val()); 33 | } else if (attr_name == "tensor_size") { 34 | this->tensor_size_ = attr.uint64_val(); 35 | } else if (attr_name == "comm_type") { 36 | this->comm_type_ = 37 | static_cast(attr.int64_val()); 38 | } else if (attr_name == "comm_priority") { 39 | this->comm_priority_ = static_cast(attr.int32_val()); 40 | } else if (attr_name == "comm_size") { 41 | this->comm_size_ = static_cast(attr.int64_val()); 42 | } else if (attr_name == "comm_src") { 43 | this->comm_src_ = static_cast(attr.int32_val()); 44 | } else if (attr_name == "comm_dst") { 45 | this->comm_dst_ = static_cast(attr.int32_val()); 46 | } else if (attr_name == "comm_tag") { 47 | this->comm_tag_ = static_cast(attr.int32_val()); 48 | } else if (attr_name == "pg_name") { 49 | this->pg_name_ = static_cast(attr.string_val()); 50 | } else { 51 | this->other_attrs_.emplace(attr_name, attr); 52 | } 53 | } 54 | } 55 | 56 | shared_ptr ETFeederNode::getChakraNode() { 57 | return node_; 58 | } 59 | 60 | void ETFeederNode::addChild(shared_ptr node) { 61 | // Avoid adding the same child node multiple times 62 | // addChild is called multiple times to resolve dependencies 63 | if (children_set_.find(node) != children_set_.end()) { 64 | return; 65 | } 66 | children_vec_.emplace_back(node); 67 | children_set_.emplace(node); 68 | } 69 | 70 | vector> ETFeederNode::getChildren() { 71 | return children_vec_; 72 | } 73 | 74 | void ETFeederNode::addDepUnresolvedParentID(uint64_t node_id) { 75 | dep_unresolved_parent_ids_.emplace_back(node_id); 76 | } 77 | 78 | vector ETFeederNode::getDepUnresolvedParentIDs() { 79 | return dep_unresolved_parent_ids_; 80 | } 81 | 82 | void ETFeederNode::setDepUnresolvedParentIDs( 83 | vector const& dep_unresolved_parent_ids) { 84 | dep_unresolved_parent_ids_ = dep_unresolved_parent_ids; 85 | } 86 | 87 | const ChakraProtoMsg::AttributeProto& ETFeederNode::get_other_attr( 88 | const string& attr_name) const { 89 | if (this->has_other_attr(attr_name)) 90 | return this->other_attrs_.at(attr_name); 91 | throw std::runtime_error( 92 | "Asked for attr \"" + attr_name + "\" from node " + 93 | std::to_string(this->id_) + ", which do not exist"); 94 | } 95 | 96 | bool ETFeederNode::has_other_attr(const string& attr_name) const { 97 | const auto& item = this->other_attrs_.find(attr_name); 98 | return item != this->other_attrs_.end(); 99 | } 100 | 101 | uint64_t ETFeederNode::id() { 102 | return id_; 103 | } 104 | 105 | string ETFeederNode::name() { 106 | return name_; 107 | } 108 | 109 | bool ETFeederNode::is_cpu_op() { 110 | return is_cpu_op_; 111 | } 112 | 113 | ChakraProtoMsg::NodeType ETFeederNode::type() { 114 | return node_->type(); 115 | } 116 | 117 | uint64_t ETFeederNode::runtime() { 118 | return runtime_; 119 | } 120 | 121 | uint64_t ETFeederNode::num_ops() { 122 | return num_ops_; 123 | } 124 | 125 | uint32_t ETFeederNode::tensor_loc() { 126 | return tensor_loc_; 127 | } 128 | 129 | uint64_t ETFeederNode::tensor_size() { 130 | return tensor_size_; 131 | } 132 | 133 | ChakraProtoMsg::CollectiveCommType ETFeederNode::comm_type() { 134 | return comm_type_; 135 | } 136 | 137 | uint32_t ETFeederNode::comm_priority() { 138 | return comm_priority_; 139 | } 140 | 141 | uint64_t ETFeederNode::comm_size() { 142 | return comm_size_; 143 | } 144 | 145 | uint32_t ETFeederNode::comm_src() { 146 | return comm_src_; 147 | } 148 | 149 | uint32_t ETFeederNode::comm_dst() { 150 | return comm_dst_; 151 | } 152 | 153 | uint32_t ETFeederNode::comm_tag() { 154 | return comm_tag_; 155 | } 156 | 157 | string ETFeederNode::pg_name() { 158 | return pg_name_; 159 | } 160 | 161 | string ETFeederNode::get_inputs_values() const { 162 | if (node_->has_inputs()) { 163 | return inputs_values_; 164 | } 165 | return ""; 166 | } 167 | 168 | string ETFeederNode::get_inputs_shapes() const { 169 | if (node_->has_inputs()) { 170 | return inputs_shapes_; 171 | } 172 | return ""; 173 | } 174 | 175 | string ETFeederNode::get_inputs_types() const { 176 | if (node_->has_inputs()) { 177 | return inputs_types_; 178 | } 179 | return ""; 180 | } 181 | 182 | string ETFeederNode::get_outputs_values() const { 183 | if (node_->has_outputs()) { 184 | return outputs_values_; 185 | } 186 | return ""; 187 | } 188 | 189 | string ETFeederNode::get_outputs_shapes() const { 190 | if (node_->has_outputs()) { 191 | return outputs_shapes_; 192 | } 193 | return ""; 194 | } 195 | 196 | string ETFeederNode::get_outputs_types() const { 197 | if (node_->has_outputs()) { 198 | return outputs_types_; 199 | } 200 | return ""; 201 | } 202 | -------------------------------------------------------------------------------- /src/timeline_visualizer/timeline_visualizer.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import json 3 | import logging 4 | import sys 5 | from enum import IntEnum 6 | from logging import FileHandler 7 | from typing import Any, Dict, List, Tuple 8 | 9 | 10 | class TID(IntEnum): 11 | """ 12 | Enum representing the types of TID (Thread ID) used for classifying different nodes in a trace. 13 | 14 | Attributes 15 | LOCAL_MEMORY (int): Represents local memory nodes. 16 | REMOTE_MEMORY (int): Represents remote memory nodes. 17 | COMP (int): Represents compute nodes. 18 | COMM (int): Represents communication nodes. 19 | """ 20 | 21 | LOCAL_MEMORY = 1 22 | REMOTE_MEMORY = 2 23 | COMP = 3 24 | COMM = 4 25 | 26 | 27 | def get_logger(log_filename: str) -> logging.Logger: 28 | formatter = logging.Formatter("%(levelname)s [%(asctime)s] %(message)s", datefmt="%m/%d/%Y %I:%M:%S %p") 29 | 30 | file_handler = FileHandler(log_filename, mode="w") 31 | file_handler.setLevel(logging.DEBUG) 32 | file_handler.setFormatter(formatter) 33 | 34 | stream_handler = logging.StreamHandler() 35 | stream_handler.setLevel(logging.WARNING) 36 | stream_handler.setFormatter(formatter) 37 | 38 | logger = logging.getLogger(__file__) 39 | logger.setLevel(logging.DEBUG) 40 | logger.addHandler(file_handler) 41 | logger.addHandler(stream_handler) 42 | 43 | return logger 44 | 45 | 46 | def is_local_mem_node(node_name: str) -> bool: 47 | return ( 48 | ("MEM_LOAD_NODE" in node_name) 49 | and ("LOCAL_MEMORY" in node_name) 50 | or ("MEM_STORE_NODE" in node_name) 51 | and ("LOCAL_MEMORY" in node_name) 52 | ) 53 | 54 | 55 | def is_remote_mem_node(node_name: str) -> bool: 56 | return ( 57 | ("MEM_LOAD_NODE" in node_name) 58 | and ("REMOTE_MEMORY" in node_name) 59 | or ("MEM_STORE_NODE" in node_name) 60 | and ("REMOTE_MEMORY" in node_name) 61 | ) 62 | 63 | 64 | def is_comp_node(node_name: str) -> bool: 65 | return "COMP_NODE" in node_name 66 | 67 | 68 | def is_comm_node(node_name: str) -> bool: 69 | return ("COMM_SEND_NODE" in node_name) or ("COMM_RECV_NODE" in node_name) or ("COMM_COLL_NODE" in node_name) 70 | 71 | 72 | def get_tid(node_name: str) -> TID: 73 | if is_local_mem_node(node_name): 74 | return TID.LOCAL_MEMORY 75 | elif is_remote_mem_node(node_name): 76 | return TID.REMOTE_MEMORY 77 | elif is_comp_node(node_name): 78 | return TID.COMP 79 | elif is_comm_node(node_name): 80 | return TID.COMM 81 | else: 82 | raise ValueError(f"Node type cannot be identified from {node_name}") 83 | 84 | 85 | def parse_event(line: str) -> Tuple[str, int, int, int, str]: 86 | try: 87 | cols = line.strip().split(",") 88 | trace_type = cols[0] 89 | npu_id = int(cols[1].split("=")[1]) 90 | curr_cycle = int(cols[2].split("=")[1]) 91 | node_id = int(cols[3].split("=")[1]) 92 | node_name = cols[4].split("=")[1] 93 | return (trace_type, npu_id, curr_cycle, node_id, node_name) 94 | except Exception as e: 95 | raise ValueError(f'Cannot parse the following event -- "{line}": {e}') from e 96 | 97 | 98 | def get_trace_events(input_filename: str, num_npus: int, npu_frequency: int) -> List[Dict[str, Any]]: 99 | trace_dict = {i: {} for i in range(num_npus)} 100 | trace_events = [] 101 | 102 | with open(input_filename, "r") as f: 103 | for line in f: 104 | if ("issue" in line) or ("callback" in line): 105 | (trace_type, npu_id, curr_cycle, node_id, node_name) = parse_event(line) 106 | 107 | if trace_type == "issue": 108 | trace_dict[npu_id].update({node_id: [node_name, curr_cycle]}) 109 | elif trace_type == "callback": 110 | node_name = trace_dict[npu_id][node_id][0] 111 | tid = get_tid(node_name) 112 | issued_cycle = trace_dict[npu_id][node_id][1] 113 | issued_ms = (issued_cycle / npu_frequency) / 1_000 114 | duration_in_cycles = curr_cycle - issued_cycle 115 | duration_in_ms = duration_in_cycles / (npu_frequency * 1_000) 116 | 117 | trace_events.append( 118 | { 119 | "pid": npu_id, 120 | "tid": tid, 121 | "ts": issued_ms, 122 | "dur": duration_in_ms, 123 | "ph": "X", 124 | "name": node_name, 125 | "args": {"ms": duration_in_ms}, 126 | } 127 | ) 128 | 129 | del trace_dict[npu_id][node_id] 130 | else: 131 | raise ValueError(f"Unsupported trace_type, {trace_type}") 132 | 133 | return trace_events 134 | 135 | 136 | def write_trace_events(output_filename: str, num_npus: int, trace_events: List[Dict[str, Any]]) -> None: 137 | output_dict = {"meta_user": "aras", "traceEvents": trace_events, "meta_cpu_count": num_npus} 138 | with open(output_filename, "w") as f: 139 | json.dump(output_dict, f) 140 | 141 | 142 | def main() -> None: 143 | parser = argparse.ArgumentParser(description="Timeline Visualizer") 144 | parser.add_argument("--input_filename", type=str, default=None, required=True, help="Input timeline filename") 145 | parser.add_argument("--output_filename", type=str, default=None, required=True, help="Output trace filename") 146 | parser.add_argument("--num_npus", type=int, default=None, required=True, help="Number of NPUs in a system") 147 | parser.add_argument("--npu_frequency", type=int, default=None, required=True, help="NPU frequency in MHz") 148 | parser.add_argument("--log_filename", type=str, default="debug.log", help="Log filename") 149 | args = parser.parse_args() 150 | 151 | logger = get_logger(args.log_filename) 152 | logger.debug(" ".join(sys.argv)) 153 | 154 | try: 155 | trace_events = get_trace_events(args.input_filename, args.num_npus, args.npu_frequency) 156 | write_trace_events(args.output_filename, args.num_npus, trace_events) 157 | except Exception as e: 158 | logger.error(str(e)) 159 | sys.exit(1) 160 | 161 | 162 | if __name__ == "__main__": 163 | main() 164 | -------------------------------------------------------------------------------- /src/third_party/utils/protoio.hh: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2012 ARM Limited 3 | * All rights reserved 4 | * 5 | * The license below extends only to copyright in the software and shall 6 | * not be construed as granting a license to any other intellectual 7 | * property including but not limited to intellectual property relating 8 | * to a hardware implementation of the functionality of the software 9 | * licensed hereunder. You may use the software subject to the license 10 | * terms below provided that you ensure that this notice is replicated 11 | * unmodified and in its entirety in all distributions of the software, 12 | * modified or unmodified, in source code or in binary form. 13 | * 14 | * Redistribution and use in source and binary forms, with or without 15 | * modification, are permitted provided that the following conditions are 16 | * met: redistributions of source code must retain the above copyright 17 | * notice, this list of conditions and the following disclaimer; 18 | * redistributions in binary form must reproduce the above copyright 19 | * notice, this list of conditions and the following disclaimer in the 20 | * documentation and/or other materials provided with the distribution; 21 | * neither the name of the copyright holders nor the names of its 22 | * contributors may be used to endorse or promote products derived from 23 | * this software without specific prior written permission. 24 | * 25 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 26 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 27 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 28 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 29 | * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 30 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 31 | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 32 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 33 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 34 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 35 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 36 | */ 37 | 38 | /** 39 | * @file 40 | * Declaration of a wrapper for protobuf output streams and input streams. 41 | */ 42 | 43 | #ifndef __PROTO_PROTOIO_HH__ 44 | #define __PROTO_PROTOIO_HH__ 45 | 46 | #include 47 | #include 48 | #include 49 | #include 50 | 51 | #include 52 | 53 | /** 54 | * A ProtoStream provides the shared functionality of the input and 55 | * output streams. At the moment this is limited to magic number. 56 | */ 57 | class ProtoStream { 58 | protected: 59 | /// Use the ASCII characters gem5 as our magic number 60 | static const uint32_t magicNumber = 0x356d6567; 61 | 62 | /** 63 | * Create a ProtoStream. 64 | */ 65 | ProtoStream() {} 66 | 67 | private: 68 | /** 69 | * Hide the copy constructor and assignment operator. 70 | * @{ 71 | */ 72 | ProtoStream(const ProtoStream&); 73 | ProtoStream& operator=(const ProtoStream&); 74 | /** @} */ 75 | }; 76 | 77 | /** 78 | * A ProtoOutputStream wraps a coded stream, potentially with 79 | * compression, based on looking at the file name. Writing to the 80 | * stream is done to enable interaction with the file on a per-message 81 | * basis to avoid having to deal with huge data structures. The latter 82 | * is made possible by encoding the length of each message in the 83 | * stream. 84 | */ 85 | class ProtoOutputStream : public ProtoStream { 86 | public: 87 | /** 88 | * Create an output stream for a given file name. If the filename 89 | * ends with .gz then the file will be compressed accordinly. 90 | * 91 | * @param filename Path to the file to create or truncate 92 | */ 93 | ProtoOutputStream(const std::string& filename); 94 | 95 | /** 96 | * Destruct the output stream, and also flush and close the 97 | * underlying file streams and coded streams. 98 | */ 99 | ~ProtoOutputStream(); 100 | 101 | /** 102 | * Write a message to the stream, preprending it with the message 103 | * size. 104 | * 105 | * @param msg Message to write to the stream 106 | */ 107 | void write(const google::protobuf::Message& msg); 108 | 109 | private: 110 | /// Underlying file output stream 111 | std::ofstream fileStream; 112 | 113 | /// Zero Copy stream wrapping the STL output stream 114 | google::protobuf::io::OstreamOutputStream* wrappedFileStream; 115 | 116 | /// Optional Gzip stream to wrap the Zero Copy stream 117 | google::protobuf::io::GzipOutputStream* gzipStream; 118 | 119 | /// Top-level zero-copy stream, either with compression or not 120 | google::protobuf::io::ZeroCopyOutputStream* zeroCopyStream; 121 | }; 122 | 123 | /** 124 | * A ProtoInputStream wraps a coded stream, potentially with 125 | * decompression, based on looking at the file name. Reading from the 126 | * stream is done on a per-message basis to avoid having to deal with 127 | * huge data structures. The latter assumes the length of each message 128 | * is encoded in the stream when it is written. 129 | */ 130 | class ProtoInputStream : public ProtoStream { 131 | public: 132 | /** 133 | * Create an input stream for a given file name. If the filename 134 | * ends with .gz then the file will be decompressed accordingly. 135 | * 136 | * @param filename Path to the file to read from 137 | */ 138 | ProtoInputStream(const std::string& filename); 139 | 140 | /** 141 | * Destruct the input stream, and also close the underlying file 142 | * streams and coded streams. 143 | */ 144 | ~ProtoInputStream(); 145 | 146 | bool is_open(); 147 | 148 | /** 149 | * Read a message from the stream. 150 | * 151 | * @param msg Message read from the stream 152 | * @param return True if a message was read, false if reading fails 153 | */ 154 | bool read(google::protobuf::Message& msg); 155 | 156 | /** 157 | * Reset the input stream and seek to the beginning of the file. 158 | */ 159 | void reset(); 160 | 161 | private: 162 | /** 163 | * Create the internal streams that are wrapping the input file. 164 | */ 165 | void createStreams(); 166 | 167 | /** 168 | * Destroy the internal streams that are wrapping the input file. 169 | */ 170 | void destroyStreams(); 171 | 172 | /// Underlying file input stream 173 | std::ifstream fileStream; 174 | 175 | /// Hold on to the file name for debug messages 176 | const std::string fileName; 177 | 178 | /// Boolean flag to remember whether we use gzip or not 179 | bool useGzip; 180 | 181 | /// Zero Copy stream wrapping the STL input stream 182 | google::protobuf::io::IstreamInputStream* wrappedFileStream; 183 | 184 | /// Optional Gzip stream to wrap the Zero Copy stream 185 | google::protobuf::io::GzipInputStream* gzipStream; 186 | 187 | /// Top-level zero-copy stream, either with compression or not 188 | google::protobuf::io::ZeroCopyInputStream* zeroCopyStream; 189 | }; 190 | 191 | #endif //__PROTO_PROTOIO_HH 192 | -------------------------------------------------------------------------------- /src/third_party/utils/protolib.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | # Copyright (c) 2013 ARM Limited 4 | # All rights reserved 5 | # 6 | # The license below extends only to copyright in the software and shall 7 | # not be construed as granting a license to any other intellectual 8 | # property including but not limited to intellectual property relating 9 | # to a hardware implementation of the functionality of the software 10 | # licensed hereunder. You may use the software subject to the license 11 | # terms below provided that you ensure that this notice is replicated 12 | # unmodified and in its entirety in all distributions of the software, 13 | # modified or unmodified, in source code or in binary form. 14 | # 15 | # Redistribution and use in source and binary forms, with or without 16 | # modification, are permitted provided that the following conditions are 17 | # met: redistributions of source code must retain the above copyright 18 | # notice, this list of conditions and the following disclaimer; 19 | # redistributions in binary form must reproduce the above copyright 20 | # notice, this list of conditions and the following disclaimer in the 21 | # documentation and/or other materials provided with the distribution; 22 | # neither the name of the copyright holders nor the names of its 23 | # contributors may be used to endorse or promote products derived from 24 | # this software without specific prior written permission. 25 | # 26 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 27 | # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 28 | # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 29 | # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 30 | # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 31 | # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 32 | # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 33 | # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 34 | # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 35 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 36 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 37 | # 38 | # Copyright 2008 Google Inc. All rights reserved. 39 | # http://code.google.com/p/protobuf/ 40 | # 41 | # Redistribution and use in source and binary forms, with or without 42 | # modification, are permitted provided that the following conditions are 43 | # met: 44 | # 45 | # * Redistributions of source code must retain the above copyright 46 | # notice, this list of conditions and the following disclaimer. 47 | # * Redistributions in binary form must reproduce the above 48 | # copyright notice, this list of conditions and the following disclaimer 49 | # in the documentation and/or other materials provided with the 50 | # distribution. 51 | # * Neither the name of Google Inc. nor the names of its 52 | # contributors may be used to endorse or promote products derived from 53 | # this software without specific prior written permission. 54 | # 55 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 56 | # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 57 | # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 58 | # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 59 | # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 60 | # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 61 | # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 62 | # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 63 | # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 64 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 65 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 66 | 67 | # This file is a library of commonly used functions used when interfacing 68 | # with protobuf python messages. For eg, the decode scripts for different 69 | # types of proto objects can use the same function to decode a single message 70 | 71 | import gzip 72 | import struct 73 | 74 | 75 | def openFileRd(in_file): 76 | """ 77 | This opens the file passed as argument for reading using an appropriate 78 | function depending on if it is gzipped or not. It returns the file 79 | handle. 80 | """ 81 | try: 82 | # First see if this file is gzipped 83 | try: 84 | # Opening the file works even if it is not a gzip file 85 | proto_in = gzip.open(in_file, "rb") 86 | 87 | # Force a check of the magic number by seeking in the 88 | # file. If we do not do it here the error will occur when 89 | # reading the first message. 90 | proto_in.seek(1) 91 | proto_in.seek(0) 92 | except IOError: 93 | proto_in = open(in_file, "rb") 94 | except IOError: 95 | print("Failed to open ", in_file, " for reading") 96 | exit(-1) 97 | return proto_in 98 | 99 | 100 | def _DecodeVarint32(in_file): 101 | """ 102 | The decoding of the Varint32 is copied from 103 | google.protobuf.internal.decoder and is only repeated here to 104 | avoid depending on the internal functions in the library. If the 105 | end of file is reached, return (0, 0). 106 | """ 107 | result = 0 108 | shift = 0 109 | pos = 0 110 | # Use a 32-bit mask 111 | mask = 0xFFFFFFFF 112 | while 1: 113 | c = in_file.read(1) 114 | if len(c) == 0: 115 | return (0, 0) 116 | b = struct.unpack(" 0x7FFFFFFFFFFFFFFF: 121 | result -= 1 << 64 122 | result |= ~mask 123 | else: 124 | result &= mask 125 | return (result, pos) 126 | shift += 7 127 | if shift >= 64: 128 | raise IOError("Too many bytes when decoding varint.") 129 | 130 | 131 | def decodeMessage(in_file, message): 132 | """ 133 | Attempt to read a message from the file and decode it. Return 134 | False if no message could be read. 135 | """ 136 | try: 137 | size, pos = _DecodeVarint32(in_file) 138 | if size == 0: 139 | return False 140 | buf = in_file.read(size) 141 | message.ParseFromString(buf) 142 | return True 143 | except IOError: 144 | return False 145 | 146 | 147 | def _EncodeVarint32(out_file, value): 148 | """ 149 | The encoding of the Varint32 is copied from 150 | google.protobuf.internal.encoder and is only repeated here to 151 | avoid depending on the internal functions in the library. 152 | """ 153 | bits = value & 0x7F 154 | value >>= 7 155 | while value: 156 | out_file.write(struct.pack(">= 7 159 | out_file.write(struct.pack(" None: 33 | """ 34 | Initialize a new instance of the KinetoOperator class. 35 | 36 | Args: 37 | kineto_op (Dict[str, Any]): The dictionary representing the 38 | operator data. 39 | """ 40 | self.id: Optional[int] = kineto_op.get("id") 41 | self.category: str = kineto_op.get("cat", "") 42 | self.name: str = kineto_op.get("name", "") 43 | self.phase: Optional[str] = kineto_op.get("ph") 44 | self.inclusive_dur: int = kineto_op.get("dur", 0) 45 | self.exclusive_dur: int = kineto_op.get("dur", 0) 46 | self.timestamp: int = kineto_op.get("ts", 0) 47 | self.external_id: int = int(kineto_op.get("args", {}).get("External id", -1)) 48 | self.ev_idx: int = int(kineto_op.get("args", {}).get("Ev Idx", -1)) 49 | self.tid: int = kineto_op.get("tid", 0) 50 | self.host_op: Optional[PyTorchOperator] = None 51 | self.parent_host_op_id: Optional[int] = None 52 | self.inter_thread_dep: Optional[int] = None 53 | self.sync_dep: List[KinetoOperator] = [] 54 | self.stream: Optional[int] = kineto_op.get("args", {}).get("stream", None) 55 | self.rf_id: Optional[int] = kineto_op.get("args", {}).get("Record function id", None) 56 | self.correlation: int = kineto_op.get("args", {}).get("correlation", -1) 57 | self.pg_name: Optional[str] = kineto_op.get("args", {}).get("Process Group Name", None) 58 | 59 | def __repr__(self) -> str: 60 | """ 61 | Represent the KinetoOperator as a string. 62 | 63 | Returns 64 | str: A string representation of the KinetoOperator. 65 | """ 66 | sync_dep_ids = [op.id for op in self.sync_dep] 67 | return ( 68 | f"KinetoOperator(id={self.id}, category={self.category}, name={self.name}, " 69 | f"phase={self.phase}, inclusive_dur={self.inclusive_dur}, " 70 | f"exclusive_dur={self.exclusive_dur}, timestamp={self.timestamp}, " 71 | f"external_id={self.external_id}, ev_idx={self.ev_idx}, tid={self.tid}, " 72 | f"parent_host_op_id={self.parent_host_op_id}, inter_thread_dep={self.inter_thread_dep}, " 73 | f"sync_dep={sync_dep_ids}, stream={self.stream}, rf_id={self.rf_id}, correlation={self.correlation})" 74 | ) 75 | 76 | def is_cpu_op(self) -> bool: 77 | """ 78 | Determine if the operator is simulatable based on its category and name. 79 | 80 | The categories 'cpu_op' and 'user_annotation' are considered CPU operators. 81 | Notably, 'user_annotation' operators often include the duration of CPU operator launch times. 82 | Ignoring the duration measured in 'user_annotation' can lead to inaccuracies in simulation. 83 | An exception to this is 'ProfilerStep', which should be completely ignored. 84 | Ideally, a more general rule should be developed to identify such exception nodes. 85 | 86 | Returns 87 | bool: True if the operator is simulatable, False otherwise. 88 | """ 89 | simulatable_categories = {"cpu_op", "user_annotation"} 90 | name_exceptions = {"ProfilerStep"} 91 | if self.category in simulatable_categories and all(exc not in self.name for exc in name_exceptions): 92 | return True 93 | return False 94 | 95 | def is_cuda_runtime_op(self) -> bool: 96 | """ 97 | Determine whether the operator is a CUDA runtime operator. 98 | 99 | Returns 100 | bool: True if it's a CUDA runtime operator, otherwise False. 101 | """ 102 | return self.category == "cuda_runtime" 103 | 104 | def is_cuda_driver_op(self) -> bool: 105 | """ 106 | Determine whether the operator is a CUDA driver operator. 107 | 108 | Returns 109 | bool: True if it's a CUDA driver operator, otherwise False. 110 | """ 111 | return self.category == "cuda_driver" 112 | 113 | def is_ac2g_op(self) -> bool: 114 | """ 115 | Check if the operator is categorized as 'ac2g', which stands for arrows from CPU to GPU. 116 | 117 | Excerpt from https://pytorch.org/docs/stable/torch.compiler_profiling_torch_compile.html 118 | ``` 119 | Every kernel on the GPU occurs after being launched by code running on the CPU. The profiler can draw 120 | connections (i.e. "flows") between the GPU and CPU events to show which CPU event launched a GPU kernel. 121 | This is particularly helpful because, with a few exceptions, GPU kernels are launched asynchronously. 122 | 123 | To view a flow connection, click on a GPU kernel and click "ac2g". 124 | ```` 125 | 126 | Returns 127 | bool: True if the operator is an 'ac2g' type, otherwise False. 128 | """ 129 | return self.category == "ac2g" 130 | 131 | def is_kernel_launch_op(self) -> bool: 132 | """ 133 | Determine whether the operator is a kernel-launching CUDA runtime operator. 134 | 135 | Returns 136 | bool: True if it's a launch operation, otherwise False. 137 | """ 138 | cuda_launch_categories = self.is_cuda_runtime_op() or self.is_cuda_driver_op() 139 | cuda_launch_operations = { 140 | "cuLaunchKernel", 141 | "cuLaunchKernelEx", 142 | "cudaLaunchKernel", 143 | "cudaLaunchKernelExC", 144 | "cudaMemcpy", 145 | "cudaMemcpyAsync", 146 | "cudaMemcpyFromSymbol", 147 | "cudaMemcpyToSymbol", 148 | "cudaLaunchCooperativeKernel", 149 | } 150 | 151 | hip_launch_operations = { 152 | "hipLaunchKernel", 153 | "hipExtLaunchKernel", 154 | "hipExtModuleLaunchKernel", 155 | "hipModuleLaunchKernel", 156 | "hipMemcpyWithStream", 157 | "hipMemcpyAsync", 158 | } 159 | return cuda_launch_categories and (self.name in cuda_launch_operations or self.name in hip_launch_operations) 160 | 161 | def is_gpu_op(self) -> bool: 162 | """ 163 | Check if the operator is a GPU-side operator based on its category. 164 | 165 | Returns 166 | bool: True if it's a GPU-side operation, otherwise False. 167 | """ 168 | gpu_categories = {"kernel", "gpu_memcpy"} 169 | return self.category in gpu_categories 170 | 171 | def is_inter_gpu_comms_op(self) -> bool: 172 | """ 173 | Check if the operator is a inter-GPU communication operator based on its name. 174 | 175 | Both point-to-point send/receive primitives and collective communication primitives are considered. 176 | 177 | Returns 178 | bool: True if it's a inter-GPU communication, otherwise False. 179 | """ 180 | return "ncclDevKernel" in self.name 181 | -------------------------------------------------------------------------------- /tests/converter/test_pytorch_converter.py: -------------------------------------------------------------------------------- 1 | import json 2 | from typing import Dict 3 | from unittest.mock import MagicMock, mock_open, patch 4 | 5 | import pytest 6 | from chakra.schema.protobuf.et_def_pb2 import ( 7 | ALL_GATHER, 8 | ALL_REDUCE, 9 | ALL_TO_ALL, 10 | BROADCAST, 11 | COMM_COLL_NODE, 12 | COMP_NODE, 13 | METADATA_NODE, 14 | REDUCE_SCATTER, 15 | ) 16 | from chakra.schema.protobuf.et_def_pb2 import Node as ChakraNode 17 | from chakra.src.converter.pytorch_converter import PyTorchConverter 18 | from chakra.src.converter.pytorch_node import PyTorchNode 19 | 20 | 21 | @pytest.fixture 22 | def sample_pytorch_data() -> Dict: 23 | return { 24 | "schema": "1.0.2-chakra.0.0.4", 25 | "pid": 1234, 26 | "time": "2023-01-01 12:00:00", 27 | "start_ts": 1000, 28 | "finish_ts": 2000, 29 | "nodes": [ 30 | { 31 | "id": 1, 32 | "name": "node1", 33 | "ctrl_deps": None, 34 | "exclusive_dur": 50, 35 | "inputs": {"values": "values", "shapes": "shapes", "types": "types"}, 36 | "outputs": {"values": "values", "shapes": "shapes", "types": "types"}, 37 | "attrs": [ 38 | {"name": "rf_id", "type": "uint64", "value": 0}, 39 | {"name": "fw_parent", "type": "uint64", "value": 0}, 40 | {"name": "seq_id", "type": "int64", "value": -1}, 41 | {"name": "scope", "type": "uint64", "value": 7}, 42 | {"name": "tid", "type": "uint64", "value": 1}, 43 | {"name": "fw_tid", "type": "uint64", "value": 0}, 44 | {"name": "op_schema", "type": "string", "value": ""}, 45 | ], 46 | }, 47 | { 48 | "id": 2, 49 | "name": "node2", 50 | "ctrl_deps": 1, 51 | "exclusive_dur": 30, 52 | "inputs": {"values": "values", "shapes": "shapes", "types": "types"}, 53 | "outputs": {"values": "values", "shapes": "shapes", "types": "types"}, 54 | "attrs": [ 55 | {"name": "rf_id", "type": "uint64", "value": 0}, 56 | {"name": "fw_parent", "type": "uint64", "value": 0}, 57 | {"name": "seq_id", "type": "int64", "value": -1}, 58 | {"name": "scope", "type": "uint64", "value": 7}, 59 | {"name": "tid", "type": "uint64", "value": 1}, 60 | {"name": "fw_tid", "type": "uint64", "value": 0}, 61 | {"name": "op_schema", "type": "string", "value": ""}, 62 | ], 63 | }, 64 | ], 65 | } 66 | 67 | 68 | @pytest.fixture 69 | def mock_chakra_node() -> ChakraNode: 70 | node = ChakraNode() 71 | node.id = 1 72 | node.name = "node1" 73 | node.type = COMP_NODE 74 | return node 75 | 76 | 77 | @patch("builtins.open", new_callable=mock_open) 78 | def test_load_json_execution_traces(mock_file: MagicMock, sample_pytorch_data: Dict) -> None: 79 | mock_file.return_value.read.return_value = json.dumps(sample_pytorch_data) 80 | converter = PyTorchConverter() 81 | data = converter.load_json_execution_traces("input.json") 82 | assert data == sample_pytorch_data 83 | mock_file.assert_called_once_with("input.json", "r") 84 | 85 | 86 | def test_parse_json_trace(sample_pytorch_data: Dict) -> None: 87 | converter = PyTorchConverter() 88 | json_metadata, json_node_map = converter.parse_json_trace(sample_pytorch_data) 89 | 90 | assert json_metadata["schema"] == "1.0.2-chakra.0.0.4" 91 | assert json_metadata["pid"] == 1234 92 | assert json_metadata["time"] == "2023-01-01 12:00:00" 93 | assert json_metadata["start_ts"] == 1000 94 | assert json_metadata["finish_ts"] == 2000 95 | assert len(json_node_map) == 2 96 | assert json_node_map[1].id == 1 97 | assert json_node_map[2].id == 2 98 | 99 | 100 | def create_sample_graph(parent_id: int = 0, expected_child_id: int = 0) -> Dict[int, PyTorchNode]: 101 | node1_data = { 102 | "id": 1, 103 | "name": "node1", 104 | "ctrl_deps": None, 105 | "inputs": {"values": ["val1"], "shapes": ["shape1"], "types": ["type1"]}, 106 | "outputs": {"values": ["val1"], "shapes": ["shape1"], "types": ["type1"]}, 107 | "attrs": [], 108 | } 109 | node2_data = { 110 | "id": 2, 111 | "name": "node2", 112 | "ctrl_deps": parent_id, 113 | "inputs": {"values": ["val2"], "shapes": ["shape2"], "types": ["type2"]}, 114 | "outputs": {"values": ["val2"], "shapes": ["shape2"], "types": ["type2"]}, 115 | "attrs": [], 116 | } 117 | node1 = PyTorchNode("1.0.2-chakra.0.0.4", node1_data) 118 | node2 = PyTorchNode("1.0.2-chakra.0.0.4", node2_data) 119 | return {1: node1, 2: node2} 120 | 121 | 122 | @pytest.mark.parametrize("parent_id, expected_child_id", [(1, 2), (None, None)]) 123 | def test_establish_parent_child_relationships(parent_id: int, expected_child_id: int) -> None: 124 | converter = PyTorchConverter() 125 | json_node_map = create_sample_graph(parent_id, expected_child_id) 126 | 127 | json_node_map = converter.establish_parent_child_relationships(json_node_map, []) 128 | 129 | if expected_child_id: 130 | assert json_node_map[parent_id].children[0].id == expected_child_id 131 | else: 132 | assert len(json_node_map[1].children) == 0 133 | 134 | 135 | def test_convert_json_to_protobuf_nodes(sample_pytorch_data: Dict) -> None: 136 | converter = PyTorchConverter() 137 | json_metadata, json_node_map = converter.parse_json_trace(sample_pytorch_data) 138 | json_node_map = converter.establish_parent_child_relationships(json_node_map, []) 139 | chakra_nodes = {} 140 | converter.convert_json_to_protobuf_nodes(json_node_map, chakra_nodes) 141 | assert len(chakra_nodes) == 2 142 | assert chakra_nodes[1].id == 1 143 | assert chakra_nodes[2].id == 2 144 | 145 | 146 | def test_convert_ctrl_dep_to_data_dep(sample_pytorch_data: Dict) -> None: 147 | converter = PyTorchConverter() 148 | json_metadata, json_node_map = converter.parse_json_trace(sample_pytorch_data) 149 | json_node_map = converter.establish_parent_child_relationships(json_node_map, []) 150 | chakra_nodes = {} 151 | converter.convert_json_to_protobuf_nodes(json_node_map, chakra_nodes) 152 | root_node = chakra_nodes[1] 153 | converter.convert_ctrl_dep_to_data_dep(json_node_map, chakra_nodes, root_node) 154 | assert root_node.data_deps == [] 155 | 156 | 157 | @patch("builtins.open", new_callable=mock_open) 158 | def test_write_chakra_et(mock_file: MagicMock, sample_pytorch_data: Dict) -> None: 159 | converter = PyTorchConverter() 160 | json_metadata, json_node_map = converter.parse_json_trace(sample_pytorch_data) 161 | json_node_map = converter.establish_parent_child_relationships(json_node_map, []) 162 | chakra_nodes = {} 163 | converter.convert_json_to_protobuf_nodes(json_node_map, chakra_nodes) 164 | converter.write_protobuf_execution_trace("output.et", json_metadata, chakra_nodes) 165 | assert mock_file().write.called 166 | 167 | 168 | @pytest.mark.parametrize( 169 | "pytorch_node_data, expected_type", 170 | [ 171 | ({"name": "process_group:init", "is_gpu_op": False, "is_metadata_op": True}, METADATA_NODE), 172 | ({"name": "ncclKernel", "is_gpu_op": True, "is_metadata_op": False}, COMM_COLL_NODE), 173 | ({"name": "ncclDevKernel", "is_gpu_op": True, "is_metadata_op": False}, COMM_COLL_NODE), 174 | ({"name": "c10d::all_reduce", "is_gpu_op": True, "is_metadata_op": False}, COMP_NODE), 175 | ({"name": "other_op", "is_gpu_op": False, "is_metadata_op": False}, COMP_NODE), 176 | ], 177 | ) 178 | def test_get_protobuf_node_type_from_json_node(pytorch_node_data: Dict, expected_type: int) -> None: 179 | # Create a mock PyTorchNode with the required attributes 180 | pytorch_node = MagicMock(spec=PyTorchNode) 181 | pytorch_node.name = pytorch_node_data["name"] 182 | pytorch_node.is_gpu_op = MagicMock(return_value=pytorch_node_data["is_gpu_op"]) 183 | pytorch_node.is_metadata_op = MagicMock(return_value=pytorch_node_data["is_metadata_op"]) 184 | 185 | # Create a mock json_node_map dictionary with actual PyTorchNode instances 186 | mock_pytorch_node_data = { 187 | "id": 0, 188 | "name": "mock_node", 189 | "ctrl_deps": None, 190 | "exclusive_dur": 0, 191 | "inputs": {"values": [], "shapes": [], "types": []}, 192 | "outputs": {"values": [], "shapes": [], "types": []}, 193 | "attrs": [], 194 | } 195 | mock_pytorch_node = PyTorchNode("1.0.2-chakra.0.0.4", mock_pytorch_node_data) 196 | json_node_map = {0: mock_pytorch_node, 1: pytorch_node} 197 | 198 | converter = PyTorchConverter() 199 | node_type = converter.get_protobuf_node_type_from_json_node(json_node_map, pytorch_node) 200 | assert node_type == expected_type 201 | 202 | 203 | @pytest.mark.parametrize( 204 | "name, expected_comm_type", 205 | [ 206 | ("allreduce", ALL_REDUCE), 207 | ("alltoall", ALL_TO_ALL), 208 | ("allgather", ALL_GATHER), 209 | ("reducescatter", REDUCE_SCATTER), 210 | ("broadcast", BROADCAST), 211 | ], 212 | ) 213 | def test_get_collective_comm_type(name: str, expected_comm_type: int) -> None: 214 | converter = PyTorchConverter() 215 | comm_type = converter.get_collective_comm_type(name) 216 | assert comm_type == expected_comm_type 217 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | 2 | Apache License 3 | Version 2.0, January 2004 4 | http://www.apache.org/licenses/ 5 | 6 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 7 | 8 | 1. Definitions. 9 | 10 | "License" shall mean the terms and conditions for use, reproduction, 11 | and distribution as defined by Sections 1 through 9 of this document. 12 | 13 | "Licensor" shall mean the copyright owner or entity authorized by 14 | the copyright owner that is granting the License. 15 | 16 | "Legal Entity" shall mean the union of the acting entity and all 17 | other entities that control, are controlled by, or are under common 18 | control with that entity. For the purposes of this definition, 19 | "control" means (i) the power, direct or indirect, to cause the 20 | direction or management of such entity, whether by contract or 21 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 22 | outstanding shares, or (iii) beneficial ownership of such entity. 23 | 24 | "You" (or "Your") shall mean an individual or Legal Entity 25 | exercising permissions granted by this License. 26 | 27 | "Source" form shall mean the preferred form for making modifications, 28 | including but not limited to software source code, documentation 29 | source, and configuration files. 30 | 31 | "Object" form shall mean any form resulting from mechanical 32 | transformation or translation of a Source form, including but 33 | not limited to compiled object code, generated documentation, 34 | and conversions to other media types. 35 | 36 | "Work" shall mean the work of authorship, whether in Source or 37 | Object form, made available under the License, as indicated by a 38 | copyright notice that is included in or attached to the work 39 | (an example is provided in the Appendix below). 40 | 41 | "Derivative Works" shall mean any work, whether in Source or Object 42 | form, that is based on (or derived from) the Work and for which the 43 | editorial revisions, annotations, elaborations, or other modifications 44 | represent, as a whole, an original work of authorship. For the purposes 45 | of this License, Derivative Works shall not include works that remain 46 | separable from, or merely link (or bind by name) to the interfaces of, 47 | the Work and Derivative Works thereof. 48 | 49 | "Contribution" shall mean any work of authorship, including 50 | the original version of the Work and any modifications or additions 51 | to that Work or Derivative Works thereof, that is intentionally 52 | submitted to Licensor for inclusion in the Work by the copyright owner 53 | or by an individual or Legal Entity authorized to submit on behalf of 54 | the copyright owner. For the purposes of this definition, "submitted" 55 | means any form of electronic, verbal, or written communication sent 56 | to the Licensor or its representatives, including but not limited to 57 | communication on electronic mailing lists, source code control systems, 58 | and issue tracking systems that are managed by, or on behalf of, the 59 | Licensor for the purpose of discussing and improving the Work, but 60 | excluding communication that is conspicuously marked or otherwise 61 | designated in writing by the copyright owner as "Not a Contribution." 62 | 63 | "Contributor" shall mean Licensor and any individual or Legal Entity 64 | on behalf of whom a Contribution has been received by Licensor and 65 | subsequently incorporated within the Work. 66 | 67 | 2. Grant of Copyright License. Subject to the terms and conditions of 68 | this License, each Contributor hereby grants to You a perpetual, 69 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 70 | copyright license to reproduce, prepare Derivative Works of, 71 | publicly display, publicly perform, sublicense, and distribute the 72 | Work and such Derivative Works in Source or Object form. 73 | 74 | 3. Grant of Patent License. Subject to the terms and conditions of 75 | this License, each Contributor hereby grants to You a perpetual, 76 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 77 | (except as stated in this section) patent license to make, have made, 78 | use, offer to sell, sell, import, and otherwise transfer the Work, 79 | where such license applies only to those patent claims licensable 80 | by such Contributor that are necessarily infringed by their 81 | Contribution(s) alone or by combination of their Contribution(s) 82 | with the Work to which such Contribution(s) was submitted. If You 83 | institute patent litigation against any entity (including a 84 | cross-claim or counterclaim in a lawsuit) alleging that the Work 85 | or a Contribution incorporated within the Work constitutes direct 86 | or contributory patent infringement, then any patent licenses 87 | granted to You under this License for that Work shall terminate 88 | as of the date such litigation is filed. 89 | 90 | 4. Redistribution. You may reproduce and distribute copies of the 91 | Work or Derivative Works thereof in any medium, with or without 92 | modifications, and in Source or Object form, provided that You 93 | meet the following conditions: 94 | 95 | (a) You must give any other recipients of the Work or 96 | Derivative Works a copy of this License; and 97 | 98 | (b) You must cause any modified files to carry prominent notices 99 | stating that You changed the files; and 100 | 101 | (c) You must retain, in the Source form of any Derivative Works 102 | that You distribute, all copyright, patent, trademark, and 103 | attribution notices from the Source form of the Work, 104 | excluding those notices that do not pertain to any part of 105 | the Derivative Works; and 106 | 107 | (d) If the Work includes a "NOTICE" text file as part of its 108 | distribution, then any Derivative Works that You distribute must 109 | include a readable copy of the attribution notices contained 110 | within such NOTICE file, excluding those notices that do not 111 | pertain to any part of the Derivative Works, in at least one 112 | of the following places: within a NOTICE text file distributed 113 | as part of the Derivative Works; within the Source form or 114 | documentation, if provided along with the Derivative Works; or, 115 | within a display generated by the Derivative Works, if and 116 | wherever such third-party notices normally appear. The contents 117 | of the NOTICE file are for informational purposes only and 118 | do not modify the License. You may add Your own attribution 119 | notices within Derivative Works that You distribute, alongside 120 | or as an addendum to the NOTICE text from the Work, provided 121 | that such additional attribution notices cannot be construed 122 | as modifying the License. 123 | 124 | You may add Your own copyright statement to Your modifications and 125 | may provide additional or different license terms and conditions 126 | for use, reproduction, or distribution of Your modifications, or 127 | for any such Derivative Works as a whole, provided Your use, 128 | reproduction, and distribution of the Work otherwise complies with 129 | the conditions stated in this License. 130 | 131 | 5. Submission of Contributions. Unless You explicitly state otherwise, 132 | any Contribution intentionally submitted for inclusion in the Work 133 | by You to the Licensor shall be under the terms and conditions of 134 | this License, without any additional terms or conditions. 135 | Notwithstanding the above, nothing herein shall supersede or modify 136 | the terms of any separate license agreement you may have executed 137 | with Licensor regarding such Contributions. 138 | 139 | 6. Trademarks. This License does not grant permission to use the trade 140 | names, trademarks, service marks, or product names of the Licensor, 141 | except as required for reasonable and customary use in describing the 142 | origin of the Work and reproducing the content of the NOTICE file. 143 | 144 | 7. Disclaimer of Warranty. Unless required by applicable law or 145 | agreed to in writing, Licensor provides the Work (and each 146 | Contributor provides its Contributions) on an "AS IS" BASIS, 147 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 148 | implied, including, without limitation, any warranties or conditions 149 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 150 | PARTICULAR PURPOSE. You are solely responsible for determining the 151 | appropriateness of using or redistributing the Work and assume any 152 | risks associated with Your exercise of permissions under this License. 153 | 154 | 8. Limitation of Liability. In no event and under no legal theory, 155 | whether in tort (including negligence), contract, or otherwise, 156 | unless required by applicable law (such as deliberate and grossly 157 | negligent acts) or agreed to in writing, shall any Contributor be 158 | liable to You for damages, including any direct, indirect, special, 159 | incidental, or consequential damages of any character arising as a 160 | result of this License or out of the use or inability to use the 161 | Work (including but not limited to damages for loss of goodwill, 162 | work stoppage, computer failure or malfunction, or any and all 163 | other commercial damages or losses), even if such Contributor 164 | has been advised of the possibility of such damages. 165 | 166 | 9. Accepting Warranty or Additional Liability. While redistributing 167 | the Work or Derivative Works thereof, You may choose to offer, 168 | and charge a fee for, acceptance of support, warranty, indemnity, 169 | or other liability obligations and/or rights consistent with this 170 | License. However, in accepting such obligations, You may act only 171 | on Your own behalf and on Your sole responsibility, not on behalf 172 | of any other Contributor, and only if You agree to indemnify, 173 | defend, and hold each Contributor harmless for any liability 174 | incurred by, or claims asserted against, such Contributor by reason 175 | of your accepting any such warranty or additional liability. 176 | 177 | END OF TERMS AND CONDITIONS 178 | -------------------------------------------------------------------------------- /src/converter/pytorch_node.py: -------------------------------------------------------------------------------- 1 | import traceback 2 | from enum import Enum 3 | from typing import Any, Dict, List, Optional 4 | 5 | from .pytorch_tensor import PyTorchTensor 6 | 7 | 8 | class PyTorchNodeType(Enum): 9 | """ 10 | Enum representing the type of a PyTorch node in an execution trace. 11 | 12 | Attributes 13 | CPU_OP (int): Represents a CPU operation. 14 | GPU_OP (int): Represents a GPU operation. 15 | LABEL (int): Represents a non-operator node (e.g., labels). 16 | METADATA (int): Represents a metadata node (e.g., process group initialization). 17 | """ 18 | 19 | CPU_OP = 1 20 | GPU_OP = 2 21 | LABEL = 3 # Non-operator nodes 22 | METADATA = 4 # Metadata nodes 23 | 24 | 25 | class PyTorchNode: 26 | """ 27 | Represents a node in a PyTorch execution trace, initialized based on a schema version. 28 | 29 | Attributes 30 | schema (str): Schema version used for initialization. 31 | data_deps (List[PyTorchNode]): List of data-dependent parent nodes. 32 | children (List[PyTorchNode]): List of child nodes. 33 | gpu_children (List[PyTorchNode]): List of GPU-specific child nodes. 34 | record_param_comms_node (Optional[PyTorchNode]): Corresponding record_param_comms node. 35 | nccl_node (Optional[PyTorchNode]): Corresponding NCCL node. 36 | id (str): Identifier of the node. 37 | name (str): Name of the node. 38 | parent (Any): Parent of the node. 39 | inputs (Any): Inputs of the node. 40 | outputs (Any): Outputs of the node. 41 | inclusive_dur (Optional[float]): Inclusive duration of the node. 42 | exclusive_dur (float): Exclusive duration of the node. 43 | ts (Optional[float]): Timestamp of the node. 44 | inter_thread_dep (Any): Inter-thread dependency of the node. 45 | cat (Any): Category of the node. 46 | stream (int): Stream associated with the node. 47 | pg_name (str): Process Group name for the inter-GPU communication. 48 | """ 49 | 50 | SUPPORTED_VERSIONS = ["1.0.2-chakra.0.0.4", "1.0.3-chakra.0.0.4", "1.1.0-chakra.0.0.4", "1.1.1-chakra.0.0.4"] 51 | 52 | def __init__(self, schema: str, node_data: Dict[str, Any]) -> None: 53 | """ 54 | Initialize a PyTorchNode object using the node data and schema version provided. 55 | 56 | Args: 57 | schema (str): The schema version based on which the node will be initialized. 58 | node_data (Dict[str, Any]): Dictionary containing the data of the PyTorch node. 59 | """ 60 | self.schema = schema 61 | self.data_deps: List["PyTorchNode"] = [] 62 | self.children: List["PyTorchNode"] = [] 63 | self.gpu_children: List["PyTorchNode"] = [] 64 | self.record_param_comms_node: Optional["PyTorchNode"] = None 65 | self.nccl_node: Optional["PyTorchNode"] = None 66 | 67 | self.parse_data(node_data) 68 | 69 | def __repr__(self) -> str: 70 | """ 71 | Provide a string representation of the PyTorchNode. 72 | 73 | Returns 74 | str: String representation of the node. 75 | """ 76 | return ( 77 | f"PyTorchNode(id={self.id}, name={self.name}, op_type={self.get_op_type()}, timestamp={self.ts}, " 78 | f"inclusive_duration={self.inclusive_dur}, exclusive_duration={self.exclusive_dur})" 79 | ) 80 | 81 | def parse_data(self, node_data: Dict[str, Any]) -> None: 82 | """ 83 | Parse node data based on the provided schema version. 84 | 85 | Args: 86 | node_data (Dict[str, Any]): The node data to be parsed. 87 | """ 88 | if self.schema in self.SUPPORTED_VERSIONS: 89 | if self.schema in ["1.0.2-chakra.0.0.4", "1.0.3-chakra.0.0.4", "1.1.0-chakra.0.0.4", "1.1.1-chakra.0.0.4"]: 90 | self._parse_data_1_0_3_chakra_0_0_4(node_data) 91 | else: 92 | raise ValueError( 93 | f"Unsupported schema version '{self.schema}'. Please check if the schema version is in the list of " 94 | f"supported versions: {self.SUPPORTED_VERSIONS}. The schema version of the trace is not supported by " 95 | f"the converter. The schema version is determined by the PyTorch version used to collect Chakra host " 96 | f"execution traces. Please consider changing the PyTorch version you are using. For more details, you " 97 | f"can follow the git history of the relevant file: " 98 | f"https://github.com/pytorch/pytorch/blob/7cd48df2dae7e2194438b162968c47d1f05bf20e/torch/csrc/" 99 | f"profiler/standalone/execution_trace_observer.cpp#L308. Check which PyTorch versions generate Chakra " 100 | f"host traces that are supported by the converter." 101 | ) 102 | 103 | def _parse_data_1_0_3_chakra_0_0_4(self, node_data: Dict[str, Any]) -> None: 104 | self.id = node_data["id"] 105 | self.name = node_data["name"] 106 | self.parent = node_data["ctrl_deps"] 107 | self.inputs = node_data["inputs"] 108 | self.outputs = node_data["outputs"] 109 | self.inclusive_dur = node_data.get("inclusive_dur") 110 | self.exclusive_dur = node_data.get("exclusive_dur", 0) 111 | self.ts = node_data.get("ts") 112 | self.inter_thread_dep = node_data.get("inter_thread_dep") 113 | self.sync_dep = node_data.get("sync_dep") 114 | self.cat = node_data.get("cat") 115 | self.stream = node_data.get("stream", 0) 116 | # In Colletive comms nodes, pg_name is in node_data if exists. 117 | # In SendRecv nodes, pg_name is in the attrs if exists. 118 | # Otherwise, pg_name is not present. 119 | self.pg_name = node_data.get("pg_name", "") 120 | 121 | for attr in node_data.get("attrs", []): 122 | setattr(self, attr["name"], attr["value"]) 123 | 124 | def get_op_type(self) -> PyTorchNodeType: 125 | """ 126 | Determine the type of PyTorch operation. 127 | 128 | Returns 129 | PyTorchNodeType: The type of the PyTorch operation. 130 | """ 131 | if "process_group:init" in self.name: 132 | return PyTorchNodeType.METADATA 133 | elif self.is_gpu_op(): 134 | return PyTorchNodeType.GPU_OP 135 | elif hasattr(self, "op_schema") or hasattr(self, "outputs"): 136 | return PyTorchNodeType.CPU_OP 137 | else: 138 | return PyTorchNodeType.LABEL 139 | 140 | def is_metadata_op(self) -> bool: 141 | """ 142 | Check if the node is a METADATA operator. 143 | 144 | Returns 145 | bool: True if the node is a METADATA operator, False otherwise. 146 | """ 147 | return self.get_op_type() == PyTorchNodeType.METADATA 148 | 149 | def is_cpu_op(self) -> bool: 150 | """ 151 | Check if the node is a CPU operator. 152 | 153 | Returns 154 | bool: True if the node is a CPU operator, False otherwise. 155 | """ 156 | return self.get_op_type() == PyTorchNodeType.CPU_OP 157 | 158 | def is_gpu_op(self) -> bool: 159 | """ 160 | Check if the node is a GPU operator. 161 | 162 | Returns 163 | bool: True if the node is a GPU operator, False otherwise. 164 | """ 165 | return self.cat is not None 166 | 167 | def add_data_dep(self, parent_node: "PyTorchNode") -> None: 168 | """ 169 | Add a data-dependent parent node to this node. 170 | 171 | Args: 172 | parent_node (PyTorchNode): The parent node to be added. 173 | """ 174 | self.data_deps.append(parent_node) 175 | 176 | def add_child(self, child_node: "PyTorchNode") -> None: 177 | """ 178 | Add a child node to this node. 179 | 180 | Args: 181 | child_node (PyTorchNode): The child node to be added. 182 | """ 183 | self.children.append(child_node) 184 | 185 | def add_gpu_child(self, gpu_child_node: "PyTorchNode") -> None: 186 | """ 187 | Add a child GPU node for this node. 188 | 189 | Args: 190 | gpu_child_node (Optional[PyTorchNode]): The child GPU node to be added. 191 | """ 192 | self.gpu_children.append(gpu_child_node) 193 | 194 | def is_record_param_comms_op(self) -> bool: 195 | """ 196 | Check if the node is a record_param_comms operator. 197 | 198 | Returns 199 | bool: True if the node is a record_param_comms operator, False otherwise. 200 | """ 201 | return "record_param_comms" in self.name 202 | 203 | def is_nccl_op(self) -> bool: 204 | """ 205 | Check if the node is a NCCL operator. 206 | 207 | Returns 208 | bool: True if the node is a NCCL operator, False otherwise. 209 | """ 210 | return "nccl:" in self.name 211 | 212 | @property 213 | def comm_size(self) -> int: 214 | """ 215 | Calculate the communication size for the given input types and shapes. 216 | 217 | Returns 218 | int: The calculated communication size. 219 | """ 220 | comm_size = 0 221 | for input_value, input_type in zip(self.inputs["values"], self.inputs["types"]): 222 | if "Tensor" in input_type: 223 | if input_type.startswith("GenericList[Tensor"): 224 | for inner_value in input_value: 225 | tensor = PyTorchTensor(inner_value) 226 | input_size = tensor.num_elem * tensor.elem_bytes 227 | comm_size += input_size 228 | else: 229 | tensor = PyTorchTensor(input_value) 230 | input_size = tensor.num_elem * tensor.elem_bytes 231 | comm_size += input_size 232 | return comm_size 233 | 234 | @staticmethod 235 | def get_data_type_size(data_type: str) -> int: 236 | """ 237 | Return the data type size of a given data type in string. 238 | 239 | Args: 240 | data_type (str): The data type as a string. 241 | 242 | Returns: 243 | int: The size of the data type in bytes. 244 | 245 | Raises: 246 | ValueError: If the data type is not supported. 247 | """ 248 | data_type_size_map = { 249 | "Tensor(float32)": 4, 250 | "Tensor(float)": 4, 251 | "Tensor(float64)": 8, 252 | "Tensor(double)": 8, 253 | "Tensor(float16)": 2, 254 | "Tensor(half)": 2, 255 | "Tensor(bfloat16)": 2, 256 | "Tensor(complex64)": 8, 257 | "Tensor(complex128)": 16, 258 | "Tensor(uint8)": 1, 259 | "Tensor(int8)": 1, 260 | "Tensor(int16)": 2, 261 | "Tensor(short)": 2, 262 | "Tensor(int32)": 4, 263 | "Tensor(int)": 4, 264 | "Tensor(int64)": 8, 265 | "Tensor(long)": 8, 266 | "Tensor(c10::Half)": 2, 267 | "Tensor(c10::BFloat16)": 2, 268 | "Tensor(unsigned char)": 1, 269 | "Tensor(long int)": 8, 270 | # TODO: Add more types 271 | } 272 | try: 273 | return data_type_size_map[data_type] 274 | except KeyError as e: 275 | traceback_str = traceback.format_exc() 276 | raise ValueError( 277 | f"Unsupported data type: {data_type}. The data_type_size_map dictionary is used for mapping the " 278 | f"number of bytes for a given tensor data type. This dictionary may be incomplete. Please update the " 279 | f"data_type_size_map or report this issue to the maintainer by creating an issue. Traceback:\n" 280 | f"{traceback_str}" 281 | ) from e 282 | -------------------------------------------------------------------------------- /src/trace_link/chakra_device_trace_loader.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import sys 3 | from concurrent.futures import ThreadPoolExecutor, as_completed 4 | from typing import Dict, List, Tuple 5 | 6 | from et_replay.utils import read_dictionary_from_json_file 7 | 8 | from .kineto_operator import KinetoOperator 9 | 10 | 11 | class ChakraDeviceTraceLoader: 12 | """Loads Chakra device traces.""" 13 | 14 | def load( 15 | self, chakra_device_trace: str 16 | ) -> Tuple[ 17 | List[KinetoOperator], 18 | Dict[int, List[KinetoOperator]], 19 | Dict[int, List[KinetoOperator]], 20 | Dict[int, KinetoOperator], 21 | List[KinetoOperator], 22 | Dict[int, KinetoOperator], 23 | Dict[int, KinetoOperator], 24 | int, 25 | int, 26 | Dict[int, Tuple[int, int]], 27 | Dict[int, KinetoOperator], 28 | List[KinetoOperator], 29 | List[int], 30 | Dict[int, KinetoOperator], 31 | ]: 32 | """ 33 | Load and process the Chakra device trace. 34 | 35 | Args: 36 | chakra_device_trace (str): Path to the Chakra device trace file. 37 | 38 | Returns: 39 | Tuple containing various data structures needed for linking traces. 40 | """ 41 | logging.debug(f"Starting to load Chakra device trace from file: {chakra_device_trace}.") 42 | chakra_trace_data = read_dictionary_from_json_file(chakra_device_trace) 43 | sorted_kineto_ops = sorted( 44 | [KinetoOperator(op) for op in chakra_trace_data["traceEvents"]], 45 | key=lambda op: op.timestamp, 46 | ) 47 | 48 | dev_data = self.construct_dev_data_structures(sorted_kineto_ops, chakra_device_trace) 49 | self.calculate_exclusive_dur(dev_data["kineto_tid_cpu_ops_map"]) 50 | 51 | dev_data["sorted_kineto_cpu_ops"] = sorted(dev_data["kineto_cpu_ops"], key=lambda op: op.timestamp) 52 | dev_data["sorted_kineto_cpu_op_ts"] = [op.timestamp for op in dev_data["sorted_kineto_cpu_ops"]] 53 | 54 | logging.debug( 55 | f"Processed Chakra device trace with {len(dev_data['kineto_cpu_ops'])} CPU ops, " 56 | f"{len(dev_data['kineto_id_cuda_launch_op_map'])} CPU launcher ops, " 57 | f"and {len(dev_data['kineto_gpu_ops'])} GPU ops." 58 | ) 59 | logging.debug("Chakra device trace has been loaded and processed successfully.") 60 | return ( 61 | dev_data["kineto_cpu_ops"], 62 | dev_data["kineto_tid_ops_map"], 63 | dev_data["kineto_tid_cpu_ops_map"], 64 | dev_data["kineto_correlation_cuda_runtime_map"], 65 | dev_data["kineto_gpu_ops"], 66 | dev_data["kineto_id_arrow_op_map"], 67 | dev_data["kineto_id_cuda_launch_op_map"], 68 | dev_data["kineto_process_start_time"], 69 | dev_data["kineto_process_end_time"], 70 | dev_data["kineto_thread_info"], 71 | dev_data["kineto_rf_id_to_kineto_op_map"], 72 | dev_data["sorted_kineto_cpu_ops"], 73 | dev_data["sorted_kineto_cpu_op_ts"], 74 | dev_data["kineto_external_id_to_kineto_op_map"], 75 | ) 76 | 77 | def construct_dev_data_structures(self, kineto_ops: List[KinetoOperator], trace_file: str) -> Dict: 78 | """ 79 | Construct necessary data structures required for trace linking from the provided Kineto operators. 80 | 81 | This method identifies process start time, end time, thread start time, and end time, and also categorizes 82 | operators into CPU, GPU, and other relevant groups. 83 | 84 | Args: 85 | kineto_ops (List[KinetoOperator]): List of Kineto operators to categorize. 86 | trace_file (str): Path to the trace file for logging purposes. 87 | 88 | Returns: 89 | Dict: Dictionary containing categorized operators and timing boundaries. 90 | """ 91 | logging.debug("Categorizing Kineto operators and calculating timing boundaries.") 92 | process_start_time = sys.maxsize 93 | process_end_time = 0 94 | thread_info = {} 95 | 96 | kineto_cpu_ops = [] 97 | kineto_tid_ops_map = {} 98 | kineto_tid_cpu_ops_map = {} 99 | kineto_correlation_cuda_runtime_map = {} 100 | kineto_gpu_ops = [] 101 | kineto_id_arrow_op_map = {} 102 | kineto_id_cuda_launch_op_map = {} 103 | kineto_external_id_to_kineto_op_map = {} 104 | 105 | for op in kineto_ops: 106 | kineto_tid_ops_map.setdefault(op.tid, []).append(op) 107 | 108 | if op.is_cpu_op(): 109 | kineto_cpu_ops.append(op) 110 | kineto_tid_cpu_ops_map.setdefault(op.tid, []).append(op) 111 | logging.debug(f"Added CPU or user annotation op: {op.name}") 112 | 113 | elif op.is_kernel_launch_op(): 114 | kineto_id_cuda_launch_op_map[op.external_id] = op 115 | if op.correlation in kineto_correlation_cuda_runtime_map: 116 | error_msg = ( 117 | f"Duplicate correlation ID {op.correlation} found in kineto_id_cuda_launch_op_map. " 118 | "The kineto_id_cuda_launch_op_map works as a mapping to link GPU operators with the launcher " 119 | "CPU operator for the GPU operator. The correlation field works as a link, and this map has a " 120 | "mapping between the correlation and the launcher operator. Each kernel launch operator " 121 | "should have a unique correlation ID for linking it to a GPU operator. Therefore, duplicated " 122 | "correlation is not expected in the map. Please review the file manually to see if the " 123 | f"operator has an invalid correlation value in file: {trace_file}." 124 | ) 125 | logging.error(error_msg) 126 | raise ValueError(error_msg) 127 | kineto_correlation_cuda_runtime_map[op.correlation] = op 128 | logging.debug(f"Added CPU launcher op: {op.name}") 129 | 130 | elif op.is_gpu_op(): 131 | kineto_gpu_ops.append(op) 132 | logging.debug(f"Added GPU op: {op.name}") 133 | 134 | elif op.is_ac2g_op(): # arrow from CPU to GPU 135 | assert (op.phase == "s") or (op.phase == "f") 136 | if op.id is None: 137 | error_msg = ( 138 | f"'id' field is None in Kineto operator: {op} in file: {trace_file}. This is unexpected as " 139 | "'id' should generally be populated for 'ac2g' operators. Please verify the validity of " 140 | "the Kineto trace and the operator data." 141 | ) 142 | logging.error(error_msg) 143 | raise KeyError(error_msg) 144 | 145 | kineto_id_arrow_op_map[op.id] = op 146 | 147 | # Update timing boundaries 148 | if op.tid is not None: 149 | process_start_time = min(process_start_time, op.timestamp) 150 | process_end_time = max(process_end_time, op.timestamp + op.inclusive_dur) 151 | thread_start_end = thread_info.setdefault(op.tid, [sys.maxsize, 0]) 152 | thread_start_end[0] = min(thread_start_end[0], op.timestamp) 153 | thread_start_end[1] = max(thread_start_end[1], op.timestamp + op.inclusive_dur) 154 | 155 | if op.external_id is not None: 156 | kineto_external_id_to_kineto_op_map[op.external_id] = op 157 | 158 | kineto_rf_id_to_kineto_op_map = {op.rf_id: op for op in kineto_cpu_ops if op.rf_id is not None} 159 | 160 | return { 161 | "kineto_cpu_ops": kineto_cpu_ops, 162 | "kineto_tid_ops_map": kineto_tid_ops_map, 163 | "kineto_tid_cpu_ops_map": kineto_tid_cpu_ops_map, 164 | "kineto_correlation_cuda_runtime_map": kineto_correlation_cuda_runtime_map, 165 | "kineto_gpu_ops": kineto_gpu_ops, 166 | "kineto_id_arrow_op_map": kineto_id_arrow_op_map, 167 | "kineto_id_cuda_launch_op_map": kineto_id_cuda_launch_op_map, 168 | "kineto_process_start_time": process_start_time, 169 | "kineto_process_end_time": process_end_time, 170 | "kineto_thread_info": thread_info, 171 | "kineto_rf_id_to_kineto_op_map": kineto_rf_id_to_kineto_op_map, 172 | "sorted_kineto_cpu_ops": [], 173 | "sorted_kineto_cpu_op_ts": [], 174 | "kineto_external_id_to_kineto_op_map": kineto_external_id_to_kineto_op_map, 175 | } 176 | 177 | def calculate_exclusive_dur(self, kineto_tid_cpu_ops_map: Dict[int, List[KinetoOperator]]) -> None: 178 | """ 179 | Calculate the exclusive duration of each operator in the Kineto traces in parallel. 180 | 181 | The exclusive duration is defined as the total duration of the operator minus any time spent in child operators, 182 | effectively representing the time spent exclusively in that operator. 183 | 184 | Args: 185 | kineto_tid_cpu_ops_map (Dict[int, List[KinetoOperator]]): Map of thread IDs to their corresponding Kineto 186 | operators. 187 | """ 188 | logging.debug("Calculating exclusive durations for Kineto operators in parallel.") 189 | 190 | def process_ops_for_thread(ops: List[KinetoOperator]) -> None: 191 | logging.debug(f"Processing {len(ops)} operators in thread.") 192 | sorted_ops = sorted(ops, key=lambda op: (op.timestamp, op.inclusive_dur)) 193 | for i, op in enumerate(sorted_ops): 194 | exclusive_dur = op.inclusive_dur 195 | overlapping_regions = [] 196 | 197 | # Identify overlapping regions with child operators 198 | for child_op in sorted_ops[i + 1 :]: 199 | if child_op.timestamp >= op.timestamp and (child_op.timestamp + child_op.inclusive_dur) <= ( 200 | op.timestamp + op.inclusive_dur 201 | ): 202 | overlap_start = child_op.timestamp 203 | overlap_end = child_op.timestamp + child_op.inclusive_dur 204 | overlapping_regions.append((overlap_start, overlap_end)) 205 | if (op.timestamp + op.inclusive_dur) < child_op.timestamp: 206 | break 207 | 208 | # Merge overlapping regions and calculate exclusive duration 209 | merged_regions = self.merge_overlapping_intervals(overlapping_regions) 210 | for start, end in merged_regions: 211 | exclusive_dur -= end - start 212 | 213 | # Check if exclusive_dur is not negative or zero 214 | if exclusive_dur < 0: 215 | error_msg = ( 216 | f"Exclusive duration calculation error for node '{op.name}' " 217 | f"(ts: {op.timestamp}, inclusive_dur: {op.inclusive_dur}, rf_id: {op.rf_id}): " 218 | f"Duration cannot be less than zero." 219 | ) 220 | logging.error(error_msg) 221 | raise ValueError(error_msg) 222 | 223 | op.exclusive_dur = exclusive_dur 224 | logging.debug( 225 | f"Node '{op.name}' (ts: {op.timestamp}, inclusive_dur: {op.inclusive_dur}, " 226 | f"rf_id: {op.rf_id}) exclusive duration: {op.exclusive_dur} microseconds." 227 | ) 228 | 229 | with ThreadPoolExecutor() as executor: 230 | futures = [executor.submit(process_ops_for_thread, ops) for ops in kineto_tid_cpu_ops_map.values()] 231 | 232 | for future in as_completed(futures): 233 | future.result() # Wait for all threads to complete and handle any exceptions 234 | 235 | logging.debug("Exclusive durations for Kineto operators calculated successfully.") 236 | 237 | @staticmethod 238 | def merge_overlapping_intervals(intervals: List[Tuple[int, int]]) -> List[Tuple[int, int]]: 239 | """ 240 | Merge overlapping intervals into a single interval. 241 | 242 | Args: 243 | intervals (List[Tuple[int, int]]): List of intervals. 244 | 245 | Returns: 246 | List[Tuple[int, int]]: List of merged intervals. 247 | """ 248 | if not intervals: 249 | return [] 250 | 251 | # Sort intervals based on the start time 252 | intervals.sort(key=lambda x: x[0]) 253 | merged = [intervals[0]] 254 | 255 | for current in intervals: 256 | prev = merged[-1] 257 | if current[0] <= prev[1]: 258 | # There is overlap, merge the current interval with the previous one 259 | merged[-1] = (prev[0], max(prev[1], current[1])) 260 | else: 261 | # No overlap, add the current interval 262 | merged.append(current) 263 | 264 | return merged 265 | -------------------------------------------------------------------------------- /src/generator/generator.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | 3 | from ...schema.protobuf.et_def_pb2 import ( 4 | ALL_GATHER, 5 | ALL_REDUCE, 6 | ALL_TO_ALL, 7 | BARRIER, 8 | BROADCAST, 9 | COMM_COLL_NODE, 10 | COMM_RECV_NODE, 11 | COMM_SEND_NODE, 12 | COMP_NODE, 13 | MEM_LOAD_NODE, 14 | MEM_STORE_NODE, 15 | METADATA_NODE, 16 | REDUCE_SCATTER, 17 | BoolList, 18 | BytesList, 19 | DoubleList, 20 | Fixed32List, 21 | Fixed64List, 22 | FloatList, 23 | GlobalMetadata, 24 | Int32List, 25 | Int64List, 26 | Sfixed32List, 27 | Sfixed64List, 28 | Sint32List, 29 | Sint64List, 30 | StringList, 31 | Uint32List, 32 | Uint64List, 33 | ) 34 | from ...schema.protobuf.et_def_pb2 import ( 35 | AttributeProto as ChakraAttr, 36 | ) 37 | from ...schema.protobuf.et_def_pb2 import ( 38 | Node as ChakraNode, 39 | ) 40 | from ...schema.protobuf.et_def_pb2 import ( 41 | NodeType as ChakraNodeType, 42 | ) 43 | from ..third_party.utils.protolib import encodeMessage as encode_message 44 | 45 | NODE_ID = 0 46 | 47 | 48 | def get_node(node_name: str, node_type: ChakraNodeType) -> ChakraNode: 49 | """Generate a new ChakraNode with a unique ID.""" 50 | global NODE_ID 51 | node = ChakraNode() 52 | node.id = NODE_ID 53 | node.name = node_name 54 | node.type = node_type 55 | NODE_ID += 1 56 | return node 57 | 58 | 59 | def get_comm_type_attr(comm_type: int) -> ChakraAttr: 60 | """Create a communication type attribute.""" 61 | return ChakraAttr(name="comm_type", int64_val=comm_type) 62 | 63 | 64 | def one_metadata_node_all_types(num_npus: int) -> None: 65 | """Generate metadata nodes with all types of attributes.""" 66 | for npu_id in range(num_npus): 67 | output_filename = f"one_metadata_node_all_types.{npu_id}.et" 68 | with open(output_filename, "wb") as et: 69 | encode_message(et, GlobalMetadata(version="0.0.4")) 70 | 71 | node = get_node("METADATA_NODE", METADATA_NODE) 72 | node.attr.extend( 73 | [ 74 | ChakraAttr(name="double", double_val=1.2345, doc_string="double"), 75 | ChakraAttr(name="double_list", double_list=DoubleList(values=[1.2345, 2.3456])), 76 | ChakraAttr(name="float", float_val=1.2345, doc_string="float"), 77 | ChakraAttr(name="float_list", float_list=FloatList(values=[1.2345, 2.3456])), 78 | ChakraAttr(name="int32", int32_val=12345, doc_string="int32"), 79 | ChakraAttr(name="int32_list", int32_list=Int32List(values=[12345, 23456])), 80 | ChakraAttr(name="int64", int64_val=9876543210, doc_string="int64"), 81 | ChakraAttr(name="int64_list", int64_list=Int64List(values=[9876543210, 1234567890])), 82 | ChakraAttr(name="uint32", uint32_val=12345, doc_string="uint32"), 83 | ChakraAttr(name="uint32_list", uint32_list=Uint32List(values=[12345, 23456])), 84 | ChakraAttr(name="uint64", uint64_val=9876543210, doc_string="uint64"), 85 | ChakraAttr(name="uint64_list", uint64_list=Uint64List(values=[9876543210, 1234567890])), 86 | ChakraAttr(name="sint32", sint32_val=-12345, doc_string="sint32"), 87 | ChakraAttr(name="sint32_list", sint32_list=Sint32List(values=[12345, -23456])), 88 | ChakraAttr(name="sint64", sint64_val=-9876543210, doc_string="sint64"), 89 | ChakraAttr(name="sint64_list", sint64_list=Sint64List(values=[9876543210, -1234567890])), 90 | ChakraAttr(name="fixed32", fixed32_val=12345), 91 | ChakraAttr(name="fixed32_list", fixed32_list=Fixed32List(values=[12345, 23456])), 92 | ChakraAttr(name="fixed64", fixed64_val=9876543210), 93 | ChakraAttr(name="fixed64_list", fixed64_list=Fixed64List(values=[9876543210, 1234567890])), 94 | ChakraAttr(name="sfixed32", sfixed32_val=-12345), 95 | ChakraAttr(name="sfixed32_list", sfixed32_list=Sfixed32List(values=[12345, -23456])), 96 | ChakraAttr(name="sfixed64", sfixed64_val=-9876543210), 97 | ChakraAttr(name="sfixed64_list", sfixed64_list=Sfixed64List(values=[9876543210, -1234567890])), 98 | ChakraAttr(name="bool", bool_val=True, doc_string="bool"), 99 | ChakraAttr(name="bool_list", bool_list=BoolList(values=[i % 2 == 0 for i in range(10)])), 100 | ChakraAttr(name="string", string_val="12345", doc_string="string"), 101 | ChakraAttr(name="string_list", string_list=StringList(values=[str(12345 + i) for i in range(10)])), 102 | ChakraAttr(name="bytes", bytes_val=bytes("12345", "utf-8")), 103 | ChakraAttr( 104 | name="bytes_list", 105 | bytes_list=BytesList(values=[bytes(str(12345 + i), "utf-8") for i in range(10)]), 106 | ), 107 | ] 108 | ) 109 | 110 | encode_message(et, node) 111 | 112 | 113 | def one_remote_mem_load_node(num_npus: int, tensor_size: int) -> None: 114 | """Generate remote memory load nodes.""" 115 | for npu_id in range(num_npus): 116 | output_filename = f"one_remote_mem_load_node.{npu_id}.et" 117 | with open(output_filename, "wb") as et: 118 | encode_message(et, GlobalMetadata(version="0.0.4")) 119 | 120 | node = get_node("MEM_LOAD_NODE", MEM_LOAD_NODE) 121 | node.attr.append(ChakraAttr(name="is_cpu_op", bool_val=False)) 122 | node.attr.append(ChakraAttr(name="tensor_size", uint64_val=tensor_size)) 123 | encode_message(et, node) 124 | 125 | 126 | def one_remote_mem_store_node(num_npus: int, tensor_size: int) -> None: 127 | """Generate remote memory store nodes.""" 128 | for npu_id in range(num_npus): 129 | output_filename = f"one_remote_mem_store_node.{npu_id}.et" 130 | with open(output_filename, "wb") as et: 131 | encode_message(et, GlobalMetadata(version="0.0.4")) 132 | 133 | node = get_node("MEM_STORE_NODE", MEM_STORE_NODE) 134 | node.attr.append(ChakraAttr(name="is_cpu_op", bool_val=False)) 135 | node.attr.append(ChakraAttr(name="tensor_size", uint64_val=tensor_size)) 136 | encode_message(et, node) 137 | 138 | 139 | def one_comp_node(num_npus: int, runtime: int) -> None: 140 | """Generate computation nodes with a given runtime.""" 141 | for npu_id in range(num_npus): 142 | output_filename = f"one_comp_node.{npu_id}.et" 143 | with open(output_filename, "wb") as et: 144 | encode_message(et, GlobalMetadata(version="0.0.4")) 145 | 146 | node = get_node("COMP_NODE", COMP_NODE) 147 | node.attr.append(ChakraAttr(name="is_cpu_op", bool_val=False)) 148 | node.duration_micros = runtime 149 | encode_message(et, node) 150 | 151 | 152 | def two_comp_nodes_independent(num_npus: int, runtime: int) -> None: 153 | """Generate two independent computation nodes.""" 154 | for npu_id in range(num_npus): 155 | output_filename = f"two_comp_nodes_independent.{npu_id}.et" 156 | with open(output_filename, "wb") as et: 157 | encode_message(et, GlobalMetadata(version="0.0.4")) 158 | 159 | for _ in range(2): 160 | node = get_node("COMP_NODE", COMP_NODE) 161 | node.attr.append(ChakraAttr(name="is_cpu_op", bool_val=False)) 162 | node.duration_micros = runtime 163 | encode_message(et, node) 164 | 165 | 166 | def two_comp_nodes_dependent(num_npus: int, runtime: int) -> None: 167 | """Generate two dependent computation nodes.""" 168 | for npu_id in range(num_npus): 169 | output_filename = f"two_comp_nodes_dependent.{npu_id}.et" 170 | with open(output_filename, "wb") as et: 171 | encode_message(et, GlobalMetadata(version="0.0.4")) 172 | 173 | parent_node = get_node("COMP_NODE", COMP_NODE) 174 | parent_node.attr.append(ChakraAttr(name="is_cpu_op", bool_val=False)) 175 | parent_node.duration_micros = runtime 176 | encode_message(et, parent_node) 177 | 178 | child_node = get_node("COMP_NODE", COMP_NODE) 179 | child_node.attr.append(ChakraAttr(name="is_cpu_op", bool_val=False)) 180 | child_node.duration_micros = runtime 181 | child_node.data_deps.append(parent_node.id) 182 | encode_message(et, child_node) 183 | 184 | 185 | def generate_comm_coll_node(num_npus: int, comm_size: int, comm_type: int, node_name: str) -> None: 186 | """Generate communication collective nodes.""" 187 | for npu_id in range(num_npus): 188 | output_filename = f"{node_name}.{npu_id}.et" 189 | with open(output_filename, "wb") as et: 190 | encode_message(et, GlobalMetadata(version="0.0.4")) 191 | 192 | node = get_node(node_name, COMM_COLL_NODE) 193 | node.attr.append(ChakraAttr(name="is_cpu_op", bool_val=False)) 194 | node.attr.extend([get_comm_type_attr(comm_type), ChakraAttr(name="comm_size", int64_val=comm_size)]) 195 | encode_message(et, node) 196 | 197 | 198 | def one_comm_coll_node_allreduce(num_npus: int, comm_size: int) -> None: 199 | """Generate one AllReduce communication collective node.""" 200 | generate_comm_coll_node(num_npus, comm_size, ALL_REDUCE, "ALL_REDUCE") 201 | 202 | 203 | def one_comm_coll_node_alltoall(num_npus: int, comm_size: int) -> None: 204 | """Generate one AllToAll communication collective node.""" 205 | generate_comm_coll_node(num_npus, comm_size, ALL_TO_ALL, "ALL_TO_ALL") 206 | 207 | 208 | def one_comm_coll_node_allgather(num_npus: int, comm_size: int) -> None: 209 | """Generate one AllGather communication collective node.""" 210 | generate_comm_coll_node(num_npus, comm_size, ALL_GATHER, "ALL_GATHER") 211 | 212 | 213 | def one_comm_coll_node_reducescatter(num_npus: int, comm_size: int) -> None: 214 | """Generate one ReduceScatter communication collective node.""" 215 | generate_comm_coll_node(num_npus, comm_size, REDUCE_SCATTER, "REDUCE_SCATTER") 216 | 217 | 218 | def one_comm_coll_node_broadcast(num_npus: int, comm_size: int) -> None: 219 | """Generate one Broadcast communication collective node.""" 220 | generate_comm_coll_node(num_npus, comm_size, BROADCAST, "BROADCAST") 221 | 222 | 223 | def one_comm_coll_node_barrier(num_npus: int) -> None: 224 | """Generate one Barrier communication collective node.""" 225 | generate_comm_coll_node(num_npus, comm_size=0, comm_type=BARRIER, node_name="BARRIER") 226 | 227 | 228 | def one_comm_send_node(num_npus: int, tensor_size: int) -> None: 229 | """Generate communication send nodes.""" 230 | for npu_id in range(num_npus): 231 | output_filename = f"one_comm_send_node.{npu_id}.et" 232 | with open(output_filename, "wb") as et: 233 | encode_message(et, GlobalMetadata(version="0.0.4")) 234 | 235 | node = get_node("COMM_SEND_NODE", COMM_SEND_NODE) 236 | node.attr.append(ChakraAttr(name="is_cpu_op", bool_val=False)) 237 | node.attr.append(ChakraAttr(name="tensor_size", uint64_val=tensor_size)) 238 | encode_message(et, node) 239 | 240 | 241 | def one_comm_recv_node(num_npus: int, tensor_size: int) -> None: 242 | """Generate communication receive nodes.""" 243 | for npu_id in range(num_npus): 244 | output_filename = f"one_comm_recv_node.{npu_id}.et" 245 | with open(output_filename, "wb") as et: 246 | encode_message(et, GlobalMetadata(version="0.0.4")) 247 | 248 | node = get_node("COMM_RECV_NODE", COMM_RECV_NODE) 249 | node.attr.append(ChakraAttr(name="is_cpu_op", bool_val=False)) 250 | node.attr.append(ChakraAttr(name="tensor_size", uint64_val=tensor_size)) 251 | encode_message(et, node) 252 | 253 | 254 | def main() -> None: 255 | parser = argparse.ArgumentParser(description="Execution Trace Generator") 256 | parser.add_argument("--num_npus", type=int, default=64, help="Number of NPUs") 257 | parser.add_argument("--default_runtime", type=int, default=5, help="Default runtime of compute nodes") 258 | parser.add_argument("--default_tensor_size", type=int, default=1024, help="Default tensor size of memory nodes") 259 | parser.add_argument( 260 | "--default_comm_size", type=int, default=65536, help="Default communication size of communication nodes" 261 | ) 262 | args = parser.parse_args() 263 | 264 | one_metadata_node_all_types(args.num_npus) 265 | one_remote_mem_load_node(args.num_npus, args.default_tensor_size) 266 | one_remote_mem_store_node(args.num_npus, args.default_tensor_size) 267 | one_comp_node(args.num_npus, args.default_runtime) 268 | two_comp_nodes_independent(args.num_npus, args.default_runtime) 269 | two_comp_nodes_dependent(args.num_npus, args.default_runtime) 270 | one_comm_coll_node_allreduce(args.num_npus, args.default_comm_size) 271 | one_comm_coll_node_alltoall(args.num_npus, args.default_comm_size) 272 | one_comm_coll_node_allgather(args.num_npus, args.default_comm_size) 273 | one_comm_coll_node_reducescatter(args.num_npus, args.default_comm_size) 274 | one_comm_coll_node_broadcast(args.num_npus, args.default_comm_size) 275 | one_comm_coll_node_barrier(args.num_npus) 276 | one_comm_send_node(args.num_npus, args.default_tensor_size) 277 | one_comm_recv_node(args.num_npus, args.default_tensor_size) 278 | 279 | 280 | if __name__ == "__main__": 281 | main() 282 | --------------------------------------------------------------------------------