├── .clang-format ├── .github ├── CODEOWNERS ├── ISSUE_TEMPLATE │ ├── bug_report.yml │ ├── feature_request.yml │ ├── question.yml │ └── untriaged.md ├── PULL_REQUEST_TEMPLATE.md └── workflows │ └── check-clang-format.yml ├── .gitignore ├── .gitmodules ├── CMakeLists.txt ├── Dockerfile ├── LICENSE ├── README.md ├── docs └── images │ └── tacos_overview.png ├── include └── tacos │ ├── collective │ ├── all_gather.h │ └── collective.h │ ├── event-queue │ ├── event_queue.h │ └── timer.h │ ├── logger │ ├── logger.h │ └── logger.tpp │ ├── synthesizer │ ├── synthesizer.h │ └── time_expanded_network.h │ ├── topology │ ├── hypercube_3d.h │ ├── mesh_2d.h │ ├── topology.h │ ├── torus_2d.h │ └── torus_3d.h │ └── writer │ ├── comm_op.h │ ├── link_result.h │ ├── npu_result.h │ ├── synthesis_result.h │ └── xml_writer.h ├── src ├── collective │ ├── Collective.cpp │ └── all_gather.cpp ├── event-queue │ ├── event_queue.cpp │ └── timer.cpp ├── logger │ └── logger.cpp ├── main.cpp ├── synthesizer │ ├── synthesizer.cpp │ └── time_expanded_network.cpp ├── topology │ ├── hypercube_3d.cpp │ ├── mesh_2d.cpp │ ├── topology.cpp │ ├── torus_2d.cpp │ └── torus_3d.cpp └── writer │ ├── comm_op.cpp │ ├── link_result.cpp │ ├── npu_result.cpp │ ├── synthesis_result.cpp │ └── xml_writer.cpp ├── tacos.sh └── utils ├── build_docker_image.sh ├── run_clang_format.sh └── start_docker_container.sh /.clang-format: -------------------------------------------------------------------------------- 1 | ## ****************************************************************************** 2 | ## This source code is licensed under the MIT license found in the 3 | ## LICENSE file in the root directory of this source tree. 4 | ## 5 | ## Copyright (c) 2022 Intel Corporation 6 | ## Copyright (c) 2022 Georgia Institute of Technology 7 | ## ****************************************************************************** 8 | --- 9 | # TACOS uses LLVM style by default (https://llvm.org/docs/CodingStandards.html) 10 | BasedOnStyle: LLVM 11 | 12 | # Indentations and spaces 13 | IndentWidth: 4 14 | UseTab: Never 15 | SpacesBeforeTrailingComments: 2 16 | 17 | # Line and column setups 18 | ColumnLimit: 80 19 | InsertNewlineAtEOF: true 20 | PenaltyReturnTypeOnItsOwnLine: 2000000 21 | PackConstructorInitializers: CurrentLine 22 | BinPackParameters: false 23 | 24 | # Pointer alignment 25 | PointerAlignment: Left 26 | 27 | # Braces 28 | InsertBraces: true 29 | AllowShortFunctionsOnASingleLine: Empty 30 | 31 | # templates 32 | BreakTemplateDeclarations: Yes 33 | --- 34 | -------------------------------------------------------------------------------- /.github/CODEOWNERS: -------------------------------------------------------------------------------- 1 | ## ****************************************************************************** 2 | ## This source code is licensed under the MIT license found in the 3 | ## LICENSE file in the root directory of this source tree. 4 | ## ****************************************************************************** 5 | 6 | # Current CODEOWNERS 7 | * @willjwon @tushar-krishna 8 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug_report.yml: -------------------------------------------------------------------------------- 1 | ## ****************************************************************************** 2 | ## This source code is licensed under the MIT license found in the 3 | ## LICENSE file in the root directory of this source tree. 4 | ## ****************************************************************************** 5 | 6 | name: Bug Report 7 | description: File a bug report. 8 | title: "[Bug]: " 9 | labels: ["bug", "untriaged"] 10 | body: 11 | - type: markdown 12 | attributes: 13 | value: | 14 | We sincerely appreciate your contribution to the `astra-sim/tacos` project. Please use this form to report a potential bug in the project you find out. 15 | - type: textarea 16 | id: what-happened 17 | attributes: 18 | label: What happened? 19 | description: Please explain to us what exactly has happened. 20 | placeholder: What happened? 21 | validations: 22 | required: true 23 | - type: textarea 24 | id: version 25 | attributes: 26 | label: Project Version 27 | description: If possible, let us know the project version or commit ID you are currently working on. 28 | - type: dropdown 29 | id: operatingsystem 30 | attributes: 31 | label: Operating System 32 | description: "From which operating system have you observed this bug?" 33 | options: 34 | - Linux - Debian-based (e.g., Ubuntu) 35 | - Linux - Other Distributions 36 | - macOS 37 | - Windows - Subsystem for Linux 38 | - Others 39 | validations: 40 | required: true 41 | - type: textarea 42 | id: reproduction 43 | attributes: 44 | label: How to reproduce the bug 45 | description: Please explain to us how to reproduce the bug, e.g., which file or line to change, or how to run the program. 46 | - type: textarea 47 | id: logs 48 | attributes: 49 | label: Relevant log output 50 | description: If possible, please provide us with any relevant program output. 51 | render: shell 52 | - type: textarea 53 | id: misc 54 | attributes: 55 | label: Miscellaneous 56 | description: Please provide us with any other relevant information you find necessary. 57 | - type: markdown 58 | attributes: 59 | value: | 60 | Thanks again for your time filling in this form. 61 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature_request.yml: -------------------------------------------------------------------------------- 1 | ## ****************************************************************************** 2 | ## This source code is licensed under the MIT license found in the 3 | ## LICENSE file in the root directory of this source tree. 4 | ## ****************************************************************************** 5 | 6 | name: Feature request 7 | description: Suggest a new feature for this project. 8 | title: "[Feature Request]: " 9 | labels: ["enhancement", "untriaged"] 10 | body: 11 | - type: markdown 12 | attributes: 13 | value: | 14 | We sincerely appreciate your contribution to the `astra-sim/tacos` project. Please use this form to propose a potential feature you think is beneficial to be added. 15 | - type: textarea 16 | id: feature-request 17 | attributes: 18 | label: Feature Request 19 | description: Please explain to us which feature you would like to propose. 20 | placeholder: Feature Request 21 | validations: 22 | required: true 23 | - type: textarea 24 | id: version 25 | attributes: 26 | label: Project Version 27 | description: If possible, let us know the project version or commit ID you are currently working on. 28 | - type: textarea 29 | id: misc 30 | attributes: 31 | label: Miscellaneous 32 | description: Please provide us with any other relevant information you find necessary. 33 | - type: markdown 34 | attributes: 35 | value: | 36 | Thanks again for your time filling in this form. 37 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/question.yml: -------------------------------------------------------------------------------- 1 | ## ****************************************************************************** 2 | ## This source code is licensed under the MIT license found in the 3 | ## LICENSE file in the root directory of this source tree. 4 | ## ****************************************************************************** 5 | 6 | name: Question 7 | description: General question about the project and codebase 8 | title: "[Question]: " 9 | labels: ["question", "untriaged"] 10 | body: 11 | - type: markdown 12 | attributes: 13 | value: | 14 | Please use this form to ask general questions about the project and codebase. 15 | - type: textarea 16 | id: question 17 | attributes: 18 | label: Question? 19 | description: Please explain your question here. 20 | placeholder: Question? 21 | validations: 22 | required: true 23 | - type: markdown 24 | attributes: 25 | value: | 26 | If your question is regarding the codebase, if possible, please provide us with the operating system and project version you are currently working on. 27 | - type: dropdown 28 | id: operatingsystem 29 | attributes: 30 | label: Operating System 31 | description: "From which operating system have you observed this bug?" 32 | options: 33 | - Linux - Debian-based (e.g., Ubuntu) 34 | - Linux - Other Distributions 35 | - macOS 36 | - Windows - Subsystem for Linux 37 | - Others 38 | - type: textarea 39 | id: version 40 | attributes: 41 | label: Project Version 42 | description: If possible, let us know the project version or commit ID you are currently working on. 43 | - type: textarea 44 | id: misc 45 | attributes: 46 | label: Miscellaneous 47 | description: Please provide us with any other relevant information you find necessary. 48 | - type: markdown 49 | attributes: 50 | value: | 51 | Thanks again for your time filling in this form. 52 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/untriaged.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Untriaged 3 | about: Submit an issue without using a form. 4 | title: '' 5 | labels: 'untriaged' 6 | --- 7 | -------------------------------------------------------------------------------- /.github/PULL_REQUEST_TEMPLATE.md: -------------------------------------------------------------------------------- 1 | ### Please delete below block after checking ================= 2 | 3 | # PR Guidelines 4 | We sincerely appreciate your contribution to the `astra-sim/tacos` project. Below is a summarized contribution guideline before you create this PR. 5 | 6 | ## Branch 7 | 1. All PRs must land into the `develop` branch, not `main`. 8 | 2. All PRs with the merge base set to branches other than `develop` (including `main`) will be rejected. 9 | 3. Please check your merge base branch above. 10 | 11 | ## Code Formatting 12 | 1. Automatic code formatting checks using `clang-format` will be run. 13 | 2. Please ensure your code is formatted using the `.clang-format` file provided. 14 | 3. We provide an automated script to help this process. Run `./utils/run_clang_format.sh` before creating a PR 15 | 16 | ## Unit Tests 17 | 1. Unit tests must pass for a PR to be merged. 18 | 2. Current unit tests are in the `test` directory. 19 | 20 | ### Please delete above block after checking ================= 21 | -------------------------------------------------------------------------------- /.github/workflows/check-clang-format.yml: -------------------------------------------------------------------------------- 1 | ## ****************************************************************************** 2 | ## This source code is licensed under the MIT license found in the 3 | ## LICENSE file in the root directory of this source tree. 4 | ## ****************************************************************************** 5 | 6 | name: format 7 | on: 8 | push: 9 | branches: 10 | - main 11 | - develop 12 | pull_request: 13 | 14 | permissions: 15 | contents: read 16 | 17 | jobs: 18 | check-code-formatting: 19 | name: Code Formatting 20 | runs-on: ubuntu-24.04 # comes with clang-format v18 21 | steps: 22 | - name: Clone repository 23 | uses: actions/checkout@v4 24 | 25 | - name: Check format of `src` directory 26 | run: | 27 | find src -type f \( -name "*.cpp" -o -name "*.h" \) -print0 | 28 | xargs -0L1 clang-format -style=file --dry-run -Werror 29 | 30 | - name: Check format of `include` directory 31 | run: | 32 | find include -type f \( -name "*.cpp" -o -name "*.h" \) -print0 | 33 | xargs -0L1 clang-format -style=file --dry-run -Werror 34 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | ## ****************************************************************************** 2 | ## This source code is licensed under the MIT license found in the 3 | ## LICENSE file in the root directory of this source tree. 4 | ## 5 | ## Copyright (c) 2022 Intel Corporation 6 | ## Copyright (c) 2022 Georgia Institute of Technology 7 | ## ****************************************************************************** 8 | 9 | 10 | # ================================================================ 11 | # TACOS 12 | # build directory 13 | build/ 14 | 15 | # output formats 16 | results/ 17 | *.csv 18 | *.et 19 | *.xml 20 | # ================================================================ 21 | 22 | 23 | # ================================================================ 24 | # C++ 25 | # from: https://github.com/github/gitignore/blob/8779ee73af62c669e7ca371aaab8399d87127693/C%2B%2B.gitignore 26 | # Prerequisites 27 | *.d 28 | 29 | # Compiled Object files 30 | *.slo 31 | *.lo 32 | *.o 33 | *.obj 34 | 35 | # Precompiled Headers 36 | *.gch 37 | *.pch 38 | 39 | # Compiled Dynamic libraries 40 | *.so 41 | *.dylib 42 | *.dll 43 | 44 | # Fortran module files 45 | *.mod 46 | *.smod 47 | 48 | # Compiled Static libraries 49 | *.lai 50 | *.la 51 | *.a 52 | *.lib 53 | 54 | # Executables 55 | *.exe 56 | *.out 57 | *.app 58 | # ================================================================ 59 | 60 | 61 | # ================================================================ 62 | # CMake 63 | # from: https://github.com/github/gitignore/blob/8779ee73af62c669e7ca371aaab8399d87127693/CMake.gitignore 64 | CMakeLists.txt.user 65 | CMakeCache.txt 66 | CMakeFiles 67 | CMakeScripts 68 | Testing 69 | Makefile 70 | cmake_install.cmake 71 | install_manifest.txt 72 | compile_commands.json 73 | CTestTestfile.cmake 74 | _deps 75 | CMakeUserPresets.json 76 | # ================================================================ 77 | 78 | 79 | # ================================================================ 80 | # JetBrains 81 | # from: https://github.com/github/gitignore/blob/8779ee73af62c669e7ca371aaab8399d87127693/Global/JetBrains.gitignore 82 | # Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio, WebStorm and Rider 83 | # Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839 84 | 85 | # User-specific stuff 86 | .idea/ 87 | 88 | # Generated files 89 | .idea/**/contentModel.xml 90 | 91 | # CMake 92 | cmake-build-*/ 93 | 94 | # File-based project format 95 | *.iws 96 | 97 | # IntelliJ 98 | out/ 99 | 100 | # mpeltonen/sbt-idea plugin 101 | .idea_modules/ 102 | 103 | # JIRA plugin 104 | atlassian-ide-plugin.xml 105 | 106 | # Crashlytics plugin (for Android Studio and IntelliJ) 107 | com_crashlytics_export_strings.xml 108 | crashlytics.properties 109 | crashlytics-build.properties 110 | fabric.properties 111 | # ================================================================ 112 | 113 | 114 | # ================================================================ 115 | # VS Code 116 | # from: https://github.com/github/gitignore/blob/8779ee73af62c669e7ca371aaab8399d87127693/Global/VisualStudioCode.gitignore 117 | .vscode/* 118 | !.vscode/settings.json 119 | !.vscode/tasks.json 120 | !.vscode/launch.json 121 | !.vscode/extensions.json 122 | !.vscode/*.code-snippets 123 | 124 | # Local History for Visual Studio Code 125 | .history/ 126 | 127 | # Built Visual Studio Code Extensions 128 | *.vsix 129 | # ================================================================ 130 | 131 | 132 | # ================================================================ 133 | # macOS 134 | # from: https://github.com/github/gitignore/blob/8779ee73af62c669e7ca371aaab8399d87127693/Global/macOS.gitignore 135 | # General 136 | .DS_Store 137 | .AppleDouble 138 | .LSOverride 139 | 140 | # Icon must end with two \r 141 | Icon 142 | 143 | # Thumbnails 144 | ._* 145 | 146 | # Files that might appear in the root of a volume 147 | .DocumentRevisions-V100 148 | .fseventsd 149 | .Spotlight-V100 150 | .TemporaryItems 151 | .Trashes 152 | .VolumeIcon.icns 153 | .com.apple.timemachine.donotpresent 154 | 155 | # Directories potentially created on remote AFP share 156 | .AppleDB 157 | .AppleDesktop 158 | Network Trash Folder 159 | Temporary Items 160 | .apdisk 161 | # ================================================================ 162 | 163 | 164 | # ================================================================ 165 | # Linux 166 | # from: https://github.com/github/gitignore/blob/8779ee73af62c669e7ca371aaab8399d87127693/Global/Linux.gitignore 167 | *~ 168 | 169 | # temporary files which can be created if a process still has a handle open of a deleted file 170 | .fuse_hidden* 171 | 172 | # KDE directory preferences 173 | .directory 174 | 175 | # Linux trash folder which might appear on any partition or disk 176 | .Trash-* 177 | 178 | # .nfs files are created when an open file is removed but is still being accessed 179 | .nfs* 180 | # ================================================================ 181 | 182 | 183 | # ================================================================ 184 | # Windows 185 | # from: https://github.com/github/gitignore/blob/8779ee73af62c669e7ca371aaab8399d87127693/Global/Windows.gitignore 186 | # Windows thumbnail cache files 187 | Thumbs.db 188 | Thumbs.db:encryptable 189 | ehthumbs.db 190 | ehthumbs_vista.db 191 | 192 | # Dump file 193 | *.stackdump 194 | 195 | # Folder config file 196 | [Dd]esktop.ini 197 | 198 | # Recycle Bin used on file shares 199 | $RECYCLE.BIN/ 200 | 201 | # Windows Installer files 202 | *.cab 203 | *.msi 204 | *.msix 205 | *.msm 206 | *.msp 207 | 208 | # Windows shortcuts 209 | *.lnk 210 | # ================================================================ 211 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "libs/yaml-cpp"] 2 | path = libs/yaml-cpp 3 | url = https://github.com/jbeder/yaml-cpp.git 4 | [submodule "libs/spdlog"] 5 | path = libs/spdlog 6 | url = https://github.com/gabime/spdlog.git 7 | [submodule "libs/pugixml"] 8 | path = libs/pugixml 9 | url = https://github.com/zeux/pugixml.git 10 | -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | ## ****************************************************************************** 2 | ## This source code is licensed under the MIT license found in the 3 | ## LICENSE file in the root directory of this source tree. 4 | ## 5 | ## Copyright (c) 2022 Intel Corporation 6 | ## Copyright (c) 2022 Georgia Institute of Technology 7 | ## ****************************************************************************** 8 | 9 | # CMake requirement 10 | cmake_minimum_required(VERSION 3.22) 11 | 12 | # Start TACOS project 13 | project(tacos) 14 | 15 | # Default build mode: Release 16 | if (NOT CMAKE_BUILD_TYPE) 17 | set(CMAKE_BUILD_TYPE Release) 18 | endif () 19 | 20 | # find external packages 21 | find_package(Boost CONFIG REQUIRED COMPONENTS filesystem) 22 | 23 | # Compile external libraries 24 | add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/libs/yaml-cpp yaml-cpp) 25 | add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/libs/spdlog spdlog) 26 | add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/libs/pugixml pugixml) 27 | 28 | # Create tacos binary target 29 | add_executable(tacos) 30 | 31 | # Set C++ compilation options 32 | set_target_properties(tacos PROPERTIES 33 | CXX_STANDARD 17 34 | CXX_STANDARD_REQUIRED ON 35 | COMPILE_WARNING_AS_ERROR ON) 36 | 37 | # Compiled binary/library file locations 38 | set_target_properties(tacos PROPERTIES 39 | RUNTIME_OUTPUT_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/bin 40 | LIBRARY_OUTPUT_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/lib 41 | ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/lib) 42 | 43 | # Path to header files 44 | target_include_directories(tacos PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include) 45 | target_include_directories(tacos PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/libs) 46 | 47 | # Link to external libraries 48 | target_link_libraries(tacos PRIVATE yaml-cpp) 49 | target_link_libraries(tacos PRIVATE spdlog) 50 | target_link_libraries(tacos PRIVATE pugixml) 51 | target_link_libraries(tacos PRIVATE Boost::filesystem) 52 | 53 | # List all source files to be compiled 54 | file(GLOB tacos_srcs 55 | ${CMAKE_CURRENT_SOURCE_DIR}/src/collective/*.cpp 56 | ${CMAKE_CURRENT_SOURCE_DIR}/src/event-queue/*.cpp 57 | ${CMAKE_CURRENT_SOURCE_DIR}/src/synthesizer/*.cpp 58 | ${CMAKE_CURRENT_SOURCE_DIR}/src/topology/*.cpp 59 | ${CMAKE_CURRENT_SOURCE_DIR}/src/writer/*.cpp 60 | ${CMAKE_CURRENT_SOURCE_DIR}/src/logger/*.cpp 61 | ${CMAKE_CURRENT_SOURCE_DIR}/src/writer/*.cpp 62 | ) 63 | target_sources(tacos PRIVATE ${tacos_srcs}) 64 | target_sources(tacos PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/src/main.cpp) 65 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | ## Use Ubuntu 2 | FROM ubuntu:24.04 3 | LABEL maintainer="Will Won " 4 | 5 | 6 | ### ================== System Setups ====================== 7 | ## Install System Dependencies 8 | ENV DEBIAN_FRONTEND=noninteractive 9 | RUN apt -y update 10 | RUN apt -y install \ 11 | coreutils wget vim git \ 12 | gcc g++ clang-format \ 13 | make cmake \ 14 | libboost-all-dev \ 15 | zlib1g-dev 16 | ### ====================================================== 17 | 18 | 19 | ### ====== Abseil Installation: Protobuf Dependency ====== 20 | ## Download Abseil 20240116.2 (Latest LTS as of 7/8/2024) 21 | WORKDIR /opt 22 | RUN wget https://github.com/abseil/abseil-cpp/releases/download/20240116.2/abseil-cpp-20240116.2.tar.gz 23 | RUN tar -xf abseil-cpp-20240116.2.tar.gz 24 | RUN rm abseil-cpp-20240116.2.tar.gz 25 | 26 | ## Compile Abseil 27 | WORKDIR /opt/abseil-cpp-20240116.2/build 28 | RUN cmake .. \ 29 | -DCMAKE_CXX_STANDARD=14 \ 30 | -DCMAKE_BUILD_TYPE=Release \ 31 | -DCMAKE_INSTALL_PREFIX="/opt/abseil-cpp-20240116.2/install" 32 | RUN cmake --build . --target install --config Release --parallel $(nproc) 33 | ENV absl_DIR="/opt/abseil-cpp-20240116.2/install" 34 | ### ====================================================== 35 | 36 | 37 | ### ============= Protobuf Installation ================== 38 | ## Download Protobuf 25.3 (=v4.25.3, latest version before protobuf v5) 39 | WORKDIR /opt 40 | RUN wget https://github.com/protocolbuffers/protobuf/releases/download/v25.3/protobuf-25.3.tar.gz 41 | RUN tar -xf protobuf-25.3.tar.gz 42 | RUN rm protobuf-25.3.tar.gz 43 | 44 | ## Compile Protobuf 45 | WORKDIR /opt/protobuf-25.3/build 46 | RUN cmake .. \ 47 | -DCMAKE_CXX_STANDARD=14 \ 48 | -DCMAKE_BUILD_TYPE=Release \ 49 | -Dprotobuf_BUILD_TESTS=OFF \ 50 | -Dprotobuf_ABSL_PROVIDER=package \ 51 | -DCMAKE_INSTALL_PREFIX="/opt/protobuf-25.3/install" 52 | RUN cmake --build . --target install --config Release --parallel $(nproc) 53 | ENV PATH="/opt/protobuf-25.3/install/bin:$PATH" 54 | ENV protobuf_DIR="/opt/protobuf-25.3/install" 55 | ### ====================================================== 56 | 57 | 58 | ### ================== Finalize ========================== 59 | ## Move to the application directory 60 | WORKDIR /app/tacos 61 | ### ====================================================== 62 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2022 Intel Corporation 4 | Copyright (c) 2022 Georgia Institute of Technology 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in all 14 | copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | SOFTWARE. 23 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | [//]: # (This source code is licensed under the MIT license found in the) 2 | [//]: # (LICENSE file in the root directory of this source tree.) 3 | [//]: # 4 | [//]: # Copyright (c) 2022 Intel Corporation 5 | [//]: # Copyright (c) 2022 Georgia Institute of Technology 6 | 7 | # 🌮 TACOS 8 | ## [T]opology-[A]ware [Co]llective Algorithm [S]ynthesizer for Distributed Machine Learning 9 | 10 | ## Latest Release 11 | [Latest Release](https://github.com/astra-sim/tacos/releases) 12 | 13 | ## Project Status 14 | | branch | macOS | Ubuntu | Format | Coverage | 15 | |:---:|:---:|:---:|:---:|:---:| 16 | | **main** | TBA | TBA | [![format](https://github.com/astra-sim/tacos/actions/workflows/check-clang-format.yml/badge.svg?branch=main)](https://github.com/astra-sim/tacos/actions/workflows/check-clang-format.yml) | TBA | 17 | | **develop** | TBA | TBA | [![format](https://github.com/astra-sim/tacos/actions/workflows/check-clang-format.yml/badge.svg?branch=develop)](https://github.com/astra-sim/tacos/actions/workflows/check-clang-format.yml) | TBA | 18 | 19 | ## Overview 20 | TACOS receives an arbitrary point-to-point network topology and autonomously synthesizes the topology-aware All-Reduce (Reduce-Scatter and All-Gather) collective communication algorithm. TACOS is powered by the Time-expanded Network (TEN) representation and Utilization Maximizing Link-Chunk Matching algorithm, thereby resulting in greater scalability to large networks. 21 | 22 | Below figure summarizes the TACOS framework: 23 | ![TACOS Abstraction](https://github.com/astra-sim/tacos/blob/main/docs/images/tacos_overview.png) 24 | 25 | Please find more information about TACOS in [this paper](https://arxiv.org/abs/2304.05301). 26 | - William Won, Midhilesh Elavazhagan, Sudarshan Srinivasan, Swati Gupta, and Tushar Krishna, "TACOS: Topology-Aware Collective Algorithm Synthesizer for Distributed Machine Learning," arXiv:2304.05301 [cs.DC] 27 | 28 | ## Getting Started 29 | We highly recommend using the provided Docker image as the runtime environment, since TACOS requires several dependencies including protobuf and boost. You can either download the Docker image from the Docker Hub, or you may build one locally using the provided script. 30 | 31 | 1. Download the TACOS project. 32 | ```sh 33 | git clone --recurse-submodules https://github.com/astra-sim/tacos.git 34 | ``` 35 | 36 | 2. Pull the TACOS Docker Image. 37 | ```sh 38 | docker pull astrasim/tacos:latest 39 | 40 | # Instead, you may consider building this Docker Image locally. 41 | ./utils/build_docker_image.sh 42 | ``` 43 | 44 | 3. Start the Docker Container (which becomes your TACOS runtime environment). 45 | ```sh 46 | ./utils/start_docker_container.sh 47 | ``` 48 | 49 | 4. Run TACOS with the provided script. 50 | ```sh 51 | [docker] ./tacos.sh 52 | ``` 53 | 54 | If you'd like to analyze the codebase, `src/main.cpp` is the main entry point. 55 | 56 | ## Contact Us 57 | For any questions about TACOS, please contact [Will Won](mailto:william.won@gatech.edu) 58 | or [Tushar Krishna](mailto:tushar@ece.gatech.edu). You may also find or open a GitHub Issue in this repository. 59 | -------------------------------------------------------------------------------- /docs/images/tacos_overview.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/astra-sim/tacos/00f38732e2acd22f836d09210d254c6cd44023b1/docs/images/tacos_overview.png -------------------------------------------------------------------------------- /include/tacos/collective/all_gather.h: -------------------------------------------------------------------------------- 1 | /****************************************************************************** 2 | This source code is licensed under the MIT license found in the 3 | LICENSE file in the root directory of this source tree. 4 | 5 | Copyright (c) 2022 Intel Corporation 6 | Copyright (c) 2022 Georgia Institute of Technology 7 | *******************************************************************************/ 8 | 9 | #pragma once 10 | 11 | #include 12 | 13 | namespace tacos { 14 | 15 | class AllGather final : public Collective { 16 | public: 17 | explicit AllGather(int npusCount, 18 | ChunkSize chunkSize = 1, 19 | int initChunksPerNpu = 1) noexcept; 20 | }; 21 | 22 | } // namespace tacos 23 | -------------------------------------------------------------------------------- /include/tacos/collective/collective.h: -------------------------------------------------------------------------------- 1 | /****************************************************************************** 2 | This source code is licensed under the MIT license found in the 3 | LICENSE file in the root directory of this source tree. 4 | 5 | Copyright (c) 2022 Intel Corporation 6 | Copyright (c) 2022 Georgia Institute of Technology 7 | *******************************************************************************/ 8 | 9 | #pragma once 10 | 11 | #include 12 | #include 13 | #include 14 | 15 | namespace tacos { 16 | class Collective { 17 | public: 18 | using ChunkID = int; 19 | using ChunkSize = Topology::ChunkSize; 20 | using NpuID = Topology::NpuID; 21 | 22 | using CollectiveCondition = std::map>; 23 | 24 | Collective(int npusCount, ChunkSize chunkSize) noexcept; 25 | 26 | [[nodiscard]] ChunkSize getChunkSize() const noexcept; 27 | 28 | [[nodiscard]] int getChunksCount() const noexcept; 29 | 30 | [[nodiscard]] CollectiveCondition getPrecondition() const noexcept; 31 | 32 | [[nodiscard]] CollectiveCondition getPostcondition() const noexcept; 33 | 34 | [[nodiscard]] bool synthesisCompleted() const noexcept; 35 | 36 | [[nodiscard]] int chunksPerNpu() const noexcept; 37 | 38 | protected: 39 | int npusCount; 40 | int chunksCount = 0; 41 | int chunksPerNpu_ = 0; 42 | 43 | void add(ChunkID chunkID, NpuID src, NpuID dest) noexcept; 44 | 45 | void updateChunksCount() noexcept; 46 | 47 | private: 48 | ChunkSize chunkSize; 49 | 50 | std::set chunks = {}; 51 | CollectiveCondition precondition = {}; 52 | CollectiveCondition postcondition = {}; 53 | }; 54 | 55 | } // namespace tacos 56 | -------------------------------------------------------------------------------- /include/tacos/event-queue/event_queue.h: -------------------------------------------------------------------------------- 1 | /****************************************************************************** 2 | This source code is licensed under the MIT license found in the 3 | LICENSE file in the root directory of this source tree. 4 | 5 | Copyright (c) 2022 Intel Corporation 6 | Copyright (c) 2022 Georgia Institute of Technology 7 | *******************************************************************************/ 8 | 9 | #pragma once 10 | 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | 17 | namespace tacos { 18 | 19 | class EventQueue { 20 | public: 21 | using Time = uint64_t; // ps 22 | 23 | EventQueue() noexcept; 24 | 25 | void schedule(Time newEventTime) noexcept; 26 | 27 | [[nodiscard]] Time getCurrentTime() const noexcept; 28 | 29 | [[nodiscard]] Time pop() noexcept; 30 | 31 | [[nodiscard]] bool empty() const noexcept; 32 | 33 | private: 34 | Time currentTime = 0; 35 | std::set