├── .changed_files ├── gem5 │ └── src │ │ ├── arch │ │ ├── arm │ │ │ └── linux │ │ │ │ └── se_workload.cc │ │ └── x86 │ │ │ └── linux │ │ │ ├── syscall_tbl32.cc │ │ │ └── syscall_tbl64.cc │ │ ├── cpu │ │ ├── o3 │ │ │ └── cpu.cc │ │ └── simple │ │ │ ├── atomic.cc │ │ │ └── timing.cc │ │ └── sim │ │ ├── eventq.cc │ │ ├── eventq.hh │ │ ├── sim_events.cc │ │ ├── syscall_emul.cc │ │ └── syscall_emul.hh ├── gpgpu-sim │ ├── libcuda │ │ ├── Makefile │ │ └── cuda_runtime_api.cc │ └── src │ │ └── gpgpu-sim │ │ ├── gpu-sim.cc │ │ └── gpu-sim.h └── snipersim │ ├── common │ └── core │ │ ├── core.cc │ │ └── syscall_model.cc │ └── sift │ └── recorder │ └── syscall_modeling.cc ├── .gitignore ├── .gitmodules ├── README.md ├── apply_patch.sh ├── benchmark ├── .gitignore ├── MLP │ ├── config_fermi_islip.icnt │ ├── gpuwattch_gtx480.xml │ ├── makefile │ ├── mlp.cpp │ ├── mlp.cu │ ├── mlp.yml │ ├── readData.py │ ├── temp_data.json │ └── wine.csv ├── matmul │ ├── makefile │ ├── matmul.cpp │ ├── matmul.cu │ └── matmul.yml └── test │ ├── gem5-arm │ ├── barrier │ │ ├── makefile │ │ └── test.yml │ ├── launch │ │ ├── makefile │ │ └── test.yml │ ├── lock │ │ ├── makefile │ │ └── test.yml │ └── pipe │ │ ├── makefile │ │ └── test.yml │ ├── gem5 │ ├── barrier │ │ ├── makefile │ │ ├── test_atomic.yml │ │ ├── test_o3.yml │ │ └── test_timing.yml │ ├── launch │ │ ├── makefile │ │ ├── test_atomic.yml │ │ ├── test_o3.yml │ │ └── test_timing.yml │ ├── lock │ │ ├── makefile │ │ ├── test_atomic.yml │ │ ├── test_o3.yml │ │ └── test_timing.yml │ └── pipe │ │ ├── makefile │ │ ├── test_atomic.yml │ │ ├── test_o3.yml │ │ └── test_timing.yml │ ├── gpgpu-sim │ ├── barrier │ │ ├── makefile │ │ ├── matmul.cpp │ │ ├── matmul.cu │ │ └── matmul.yml │ ├── launch │ │ ├── makefile │ │ ├── matmul.cpp │ │ ├── matmul.cu │ │ └── matmul.yml │ ├── lock │ │ ├── makefile │ │ ├── matmul.cpp │ │ ├── matmul.cu │ │ └── matmul.yml │ └── pipe │ │ ├── makefile │ │ ├── matmul.cpp │ │ ├── matmul.cu │ │ └── matmul.yml │ └── snipersim │ ├── barrier │ ├── makefile │ ├── test.cpp │ └── test.yml │ ├── launch │ ├── makefile │ ├── test.cpp │ └── test.yml │ ├── lock │ ├── makefile │ ├── test.cpp │ └── test.yml │ └── pipe │ ├── makefile │ ├── test.cpp │ └── test.yml ├── docs ├── .gitignore ├── docs │ ├── 01-quick-start.md │ ├── 02-benchmarks │ │ ├── 021-apis │ │ │ └── index.md │ │ └── 022-yaml.md │ ├── 03-sync-proto │ │ ├── 030-net-proto.md │ │ ├── 031-communication.md │ │ ├── 032-barrier.md │ │ ├── 033-lock.md │ │ ├── 034-launch.md │ │ ├── 035-cycle.md │ │ └── index.md │ ├── 04-import-sim │ │ ├── 041-snipersim.md │ │ ├── 042-gpgpusim.md │ │ ├── 043-gem5.md │ │ └── index.md │ ├── images │ │ ├── Chiplet Simulator.drawio │ │ ├── architect_example_1.drawio.svg │ │ ├── architect_example_2.drawio.svg │ │ ├── interchiplet_architect.drawio.svg │ │ ├── interchiplet_dataflow.drawio.svg │ │ └── iteration_flow.drawio.svg │ └── index.md ├── javascripts │ └── mathjax.js ├── mkdocs.yml └── requirements.txt ├── global_manager_test.cpp ├── interchiplet ├── .gitignore ├── CMakeLists.txt ├── depreciate │ ├── CMakeLists.txt │ ├── interchiplet_client.cpp │ ├── interchiplet_client.h │ ├── record_transfer.cpp │ ├── sniper_change.cpp │ └── zmq_pro.cpp ├── includes │ ├── apis_c.h │ ├── apis_cu.h │ ├── benchmark_yaml.h │ ├── cmd_handler.h │ ├── cmdline_options.h │ ├── global_define.h │ ├── net_bench.h │ ├── net_delay.h │ ├── pipe_comm.h │ └── sync_protocol.h ├── patch │ ├── gem5.diff │ ├── gpgpu-sim.diff │ └── snipersim.diff └── srcs │ ├── apis_c.cpp │ ├── cmd_handler.cpp │ └── interchiplet.cpp ├── patch.sh ├── setup_env.sh ├── test ├── test_benchmark_config.cpp ├── test_cmd_handler.cpp ├── test_cmdline_options ├── test_global_manager.cpp ├── test_network_lists.cpp ├── test_pipe_comm.cpp └── test_process_struct.cpp ├── test_benchmark_config.cpp └── test_cmd_handler.cpp /.changed_files/gem5/src/sim/sim_events.cc: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2013 ARM Limited 3 | * All rights reserved 4 | * 5 | * The license below extends only to copyright in the software and shall 6 | * not be construed as granting a license to any other intellectual 7 | * property including but not limited to intellectual property relating 8 | * to a hardware implementation of the functionality of the software 9 | * licensed hereunder. You may use the software subject to the license 10 | * terms below provided that you ensure that this notice is replicated 11 | * unmodified and in its entirety in all distributions of the software, 12 | * modified or unmodified, in source code or in binary form. 13 | * 14 | * Copyright (c) 2002-2005 The Regents of The University of Michigan 15 | * Copyright (c) 2013 Advanced Micro Devices, Inc. 16 | * Copyright (c) 2013 Mark D. Hill and David A. Wood 17 | * All rights reserved. 18 | * 19 | * Redistribution and use in source and binary forms, with or without 20 | * modification, are permitted provided that the following conditions are 21 | * met: redistributions of source code must retain the above copyright 22 | * notice, this list of conditions and the following disclaimer; 23 | * redistributions in binary form must reproduce the above copyright 24 | * notice, this list of conditions and the following disclaimer in the 25 | * documentation and/or other materials provided with the distribution; 26 | * neither the name of the copyright holders nor the names of its 27 | * contributors may be used to endorse or promote products derived from 28 | * this software without specific prior written permission. 29 | * 30 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 31 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 32 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 33 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 34 | * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 35 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 36 | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 37 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 38 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 39 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 40 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 41 | */ 42 | 43 | #include "sim/sim_events.hh" 44 | 45 | #include 46 | 47 | #include "base/callback.hh" 48 | #include "sim/eventq.hh" 49 | #include "sim/sim_exit.hh" 50 | #include "sim/stats.hh" 51 | 52 | #include "../../../interchiplet/includes/sync_protocol.h" 53 | 54 | namespace gem5 55 | { 56 | 57 | GlobalSimLoopExitEvent::GlobalSimLoopExitEvent(Tick when, 58 | const std::string &_cause, 59 | int c, Tick r) 60 | : GlobalEvent(when, Sim_Exit_Pri, IsExitEvent), 61 | cause(_cause), code(c), repeat(r) 62 | { 63 | } 64 | 65 | GlobalSimLoopExitEvent::GlobalSimLoopExitEvent(const std::string &_cause, 66 | int c, Tick r) 67 | : GlobalEvent(curTick(), Minimum_Pri, IsExitEvent), 68 | cause(_cause), code(c), repeat(r) 69 | { 70 | } 71 | 72 | const char * 73 | GlobalSimLoopExitEvent::description() const 74 | { 75 | return "global simulation loop exit"; 76 | } 77 | 78 | // 79 | // handle termination event 80 | // 81 | void 82 | GlobalSimLoopExitEvent::process() 83 | { 84 | if (repeat) { 85 | schedule(curTick() + repeat); 86 | } 87 | } 88 | 89 | void 90 | exitSimLoop(const std::string &message, int exit_code, Tick when, Tick repeat, 91 | bool serialize) 92 | { 93 | // Send exit cycle. 94 | InterChiplet::sendCycleCmd(curTick()); 95 | 96 | warn_if(serialize && (when != curTick() || repeat), 97 | "exitSimLoop called with a delay and auto serialization. This is " 98 | "currently unsupported."); 99 | 100 | new GlobalSimLoopExitEvent(when + simQuantum, message, exit_code, repeat); 101 | } 102 | 103 | void 104 | exitSimLoopNow(const std::string &message, int exit_code, Tick repeat, 105 | bool serialize) 106 | { 107 | // Send exit cycle. 108 | InterChiplet::sendCycleCmd(curTick()); 109 | 110 | new GlobalSimLoopExitEvent(message, exit_code, repeat); 111 | } 112 | 113 | LocalSimLoopExitEvent::LocalSimLoopExitEvent(const std::string &_cause, int c, 114 | Tick r) 115 | : Event(Sim_Exit_Pri, IsExitEvent), 116 | cause(_cause), code(c), repeat(r) 117 | { 118 | } 119 | 120 | // 121 | // handle termination event 122 | // 123 | void 124 | LocalSimLoopExitEvent::process() 125 | { 126 | exitSimLoop(cause, 0); 127 | } 128 | 129 | 130 | const char * 131 | LocalSimLoopExitEvent::description() const 132 | { 133 | return "simulation loop exit"; 134 | } 135 | 136 | void 137 | LocalSimLoopExitEvent::serialize(CheckpointOut &cp) const 138 | { 139 | Event::serialize(cp); 140 | 141 | SERIALIZE_SCALAR(cause); 142 | SERIALIZE_SCALAR(code); 143 | SERIALIZE_SCALAR(repeat); 144 | } 145 | 146 | void 147 | LocalSimLoopExitEvent::unserialize(CheckpointIn &cp) 148 | { 149 | Event::unserialize(cp); 150 | 151 | UNSERIALIZE_SCALAR(cause); 152 | UNSERIALIZE_SCALAR(code); 153 | UNSERIALIZE_SCALAR(repeat); 154 | } 155 | 156 | // 157 | // constructor: automatically schedules at specified time 158 | // 159 | CountedExitEvent::CountedExitEvent(const std::string &_cause, int &counter) 160 | : Event(Sim_Exit_Pri), cause(_cause), downCounter(counter) 161 | { 162 | // catch stupid mistakes 163 | assert(downCounter > 0); 164 | } 165 | 166 | 167 | // 168 | // handle termination event 169 | // 170 | void 171 | CountedExitEvent::process() 172 | { 173 | if (--downCounter == 0) { 174 | exitSimLoop(cause, 0); 175 | } 176 | } 177 | 178 | 179 | const char * 180 | CountedExitEvent::description() const 181 | { 182 | return "counted exit"; 183 | } 184 | 185 | } // namespace gem5 186 | -------------------------------------------------------------------------------- /.changed_files/gpgpu-sim/libcuda/Makefile: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2009 by Tor M. Aamodt, Ali Bakhoda and the 2 | # University of British Columbia 3 | # Vancouver, BC V6T 1Z4 4 | # All Rights Reserved. 5 | # 6 | # THIS IS A LEGAL DOCUMENT BY DOWNLOADING GPGPU-SIM, YOU ARE AGREEING TO THESE 7 | # TERMS AND CONDITIONS. 8 | # 9 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 10 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 11 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 12 | # ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNERS OR CONTRIBUTORS BE 13 | # LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 14 | # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 15 | # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 16 | # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 17 | # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 18 | # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 19 | # POSSIBILITY OF SUCH DAMAGE. 20 | # 21 | # NOTE: The files libcuda/cuda_runtime_api.c and src/cuda-sim/cuda-math.h 22 | # are derived from the CUDA Toolset available from http://www.nvidia.com/cuda 23 | # (property of NVIDIA). The files benchmarks/BlackScholes/ and 24 | # benchmarks/template/ are derived from the CUDA SDK available from 25 | # http://www.nvidia.com/cuda (also property of NVIDIA). The files from 26 | # src/intersim/ are derived from Booksim (a simulator provided with the 27 | # textbook "Principles and Practices of Interconnection Networks" available 28 | # from http://cva.stanford.edu/books/ppin/). As such, those files are bound by 29 | # the corresponding legal terms and conditions set forth separately (original 30 | # copyright notices are left in files from these sources and where we have 31 | # modified a file our copyright notice appears before the original copyright 32 | # notice). 33 | # 34 | # Using this version of GPGPU-Sim requires a complete installation of CUDA 35 | # which is distributed seperately by NVIDIA under separate terms and 36 | # conditions. To use this version of GPGPU-Sim with OpenCL requires a 37 | # recent version of NVIDIA's drivers which support OpenCL. 38 | # 39 | # Redistribution and use in source and binary forms, with or without 40 | # modification, are permitted provided that the following conditions are met: 41 | # 42 | # 1. Redistributions of source code must retain the above copyright notice, 43 | # this list of conditions and the following disclaimer. 44 | # 45 | # 2. Redistributions in binary form must reproduce the above copyright notice, 46 | # this list of conditions and the following disclaimer in the documentation 47 | # and/or other materials provided with the distribution. 48 | # 49 | # 3. Neither the name of the University of British Columbia nor the names of 50 | # its contributors may be used to endorse or promote products derived from 51 | # this software without specific prior written permission. 52 | # 53 | # 4. This version of GPGPU-SIM is distributed freely for non-commercial use only. 54 | # 55 | # 5. No nonprofit user may place any restrictions on the use of this software, 56 | # including as modified by the user, by any other authorized user. 57 | # 58 | # 6. GPGPU-SIM was developed primarily by Tor M. Aamodt, Wilson W. L. Fung, 59 | # Ali Bakhoda, George L. Yuan, at the University of British Columbia, 60 | # Vancouver, BC V6T 1Z4 61 | 62 | 63 | include ../version_detection.mk 64 | 65 | ifeq ($(OPENGL_SUPPORT),1) 66 | GL = -DOPENGL_SUPPORT 67 | endif 68 | 69 | ifeq ($(GNUC_CPP0X), 1) 70 | CXXFLAGS = -std=c++0x 71 | endif 72 | 73 | 74 | CPP = g++ $(SNOW) 75 | CC = gcc $(SNOW) 76 | CREATELIBRARY = 1 77 | DEBUG ?= 0 78 | ifeq ($(DEBUG),1) 79 | CXXFLAGS += -Wall -Wno-unused-function -Wno-sign-compare -g -fPIC $(GL) 80 | CCFLAGS += -Wall -Wno-unused-function -Wno-sign-compare -ggdb -fPIC 81 | else 82 | CXXFLAGS += -O3 -g -Wall -Wno-unused-function -Wno-sign-compare -fPIC $(GL) 83 | CCFLAGS += -Wall -Wno-unused-function -Wno-sign-compare -fPIC 84 | endif 85 | 86 | PROG =cuda 87 | 88 | CXX_SRCS = cuda_runtime_api.cc 89 | LEX_SRCS = cuobjdump.l 90 | YACC_SRCS = cuobjdump.y 91 | CXXFLAGS += -DCUDART_VERSION=$(CUDART_VERSION) 92 | LEX = flex 93 | LEXFLAGS = -B 94 | YACC = bison 95 | YFLAGS = -t -d -v --report=all 96 | 97 | .PHONY: clean 98 | 99 | OUTPUT_DIR=$(SIM_OBJ_FILES_DIR)/libcuda 100 | 101 | OBJS = $(CXX_SRCS:%.cc=$(OUTPUT_DIR)/%.o) 102 | OBJS += $(LEX_SRCS:%.l=$(OUTPUT_DIR)/%_lexer.o) 103 | OBJS += $(YACC_SRCS:%.y=$(OUTPUT_DIR)/%_parser.o) 104 | 105 | #--- Make rules --- 106 | lib$(PROG).a: $(OBJS) 107 | echo $(OBJS) 108 | ar rcs $(OUTPUT_DIR)/lib$(PROG).a $(OBJS) 109 | 110 | $(OUTPUT_DIR)/%.o: %.cc 111 | $(CPP) $(CXXFLAGS) -I./ -I$(OUTPUT_DIR) -I$(CUDA_INSTALL_PATH)/include -I$(SIMULATOR_ROOT)/interchiplet/includes -c $< -o $@ 112 | 113 | $(OUTPUT_DIR)/%.o: %.c 114 | $(CPP) $(CCFLAGS) -I./ -I$(OUTPUT_DIR) -I$(CUDA_INSTALL_PATH)/include -c $< -o $@ 115 | 116 | $(OUTPUT_DIR)/%.o: $(OUTPUT_DIR)/%.c 117 | $(CPP) $(CCFLAGS) -I./ -I$(OUTPUT_DIR) -I$(CUDA_INSTALL_PATH)/include -c $< -o $@ 118 | 119 | $(OUTPUT_DIR)/cuobjdump_parser.c: cuobjdump.y 120 | $(YACC) $(YFLAGS) -p cuobjdump_ -o$@ $< --file-prefix=$(OUTPUT_DIR)/cuobjdump 121 | 122 | $(OUTPUT_DIR)/cuobjdump_lexer.c: cuobjdump.l $(OUTPUT_DIR)/cuobjdump_parser.h 123 | $(LEX) $(LEXFLAGS) -P cuobjdump_ -o$@ $< 124 | 125 | # The next rule means just get parser.c and you will get parser.h with it 126 | # in other words, get parser.c and do nothing to get parser.h 127 | $(OUTPUT_DIR)/%_parser.h: $(OUTPUT_DIR)/%_parser.c 128 | : 129 | $(OUTPUT_DIR)/Makefile.makedepend: depend 130 | 131 | depend: 132 | touch $(OUTPUT_DIR)/Makefile.makedepend 133 | makedepend -f$(OUTPUT_DIR)/Makefile.makedepend -p$(OUTPUT_DIR)/ $(CXX_SRCS) 2> /dev/null 134 | 135 | clean: 136 | rm -f $(PROG) 137 | rm -f *.o 138 | rm -f lib$(PROG).a 139 | rm -f *_parser.* 140 | rm -f *_lexer.* 141 | rm -f Makefile.makedepend Makefile.makedepend.bak 142 | 143 | include $(OUTPUT_DIR)/Makefile.makedepend 144 | 145 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .vscode 2 | *.o 3 | 4 | interchiplet/bin 5 | interchiplet/lib 6 | interchiplet/build 7 | 8 | gpgpu-sim_distribution_backup 9 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "snipersim"] 2 | path = snipersim 3 | url = https://github.com/snipersim/snipersim.git 4 | [submodule "gpgpu-sim"] 5 | path = gpgpu-sim 6 | url = https://github.com/gpgpu-sim/gpgpu-sim_distribution.git 7 | [submodule "popnet_chiplet"] 8 | path = popnet_chiplet 9 | url = https://github.com/baikeina/popnet_chiplet.git 10 | [submodule "interchiplet/thirdparty/spdlog"] 11 | path = interchiplet/thirdparty/spdlog 12 | url = https://github.com/gabime/spdlog.git 13 | [submodule "interchiplet/thirdparty/CLI11"] 14 | path = interchiplet/thirdparty/CLI11 15 | url = https://github.com/CLIUtils/CLI11.git 16 | [submodule "interchiplet/thirdparty/yaml-cpp"] 17 | path = interchiplet/thirdparty/yaml-cpp 18 | url = https://github.com/jbeder/yaml-cpp.git 19 | [submodule "gem5"] 20 | path = gem5 21 | url = https://github.com/gem5/gem5.git 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | For the detail documents in English, please access [Document for LegoSim](https://fcas-zju.github.io/Chiplet_Heterogeneous_newVersion/). 3 | 4 | # 安装 5 | 6 | ## 下载仓库并设置环境 7 | 8 | 1. 从github上下载仓库。 9 | 10 | ``` 11 | git clone --single-branch --branch master_v2 https://github.com/FCAS-SCUT/Chiplet_Heterogeneous_newVersion.git 12 | ``` 13 | 14 | 进入仿真器根目录,以下的示例命名都假设从仿真器根目录开始执行。 15 | 16 | 2. 初始化并更新submodule。 17 | 18 | ``` 19 | git submodule init 20 | git submodule update 21 | ``` 22 | 23 | 3. 运行脚本,初始化环境变量 24 | 25 | ``` 26 | source setup_env.sh 27 | ``` 28 | 29 | 运行成功应出现:setup_environment succeeded 30 | 31 | 4. 对于snipersim和gpgpu-sim代码进行修改。 32 | 33 | ``` 34 | ./apply_patch.sh 35 | ``` 36 | 37 | 更多细节参见下文“打包和应用Patch”章节。 38 | 39 | 5. 编译安装snipersim。新版本的snipersim提供了非常自动化的编译脚本,直接执行make即可。 40 | 41 | ``` 42 | cd snipersim 43 | make -j4 44 | ``` 45 | 46 | 6. 编译安装Gem5。请查看Gem5文档获取详细安装指南。LegoSim中可以运行X86和ARM架构仿真器: 47 | 48 | ``` 49 | cd gem5 50 | scons build/X86/gem5.opt 51 | ``` 52 | 53 | 或者 54 | 55 | ``` 56 | cd gem5 57 | scons build/ARM/gem5.opt 58 | ``` 59 | 60 | 7. 编译安装GPGPUSim。GPGPUsim安装有前置条件: 61 | 62 | 1. GPGPUSim需要安装cuda。新版本的gpgpusim可以支持cuda4到cuda11的任意版本,详细信息请参见GPGPUSim的README。 63 | 2. GPGPUSim对于编译版本有要求,建议使用GCC7。 64 | 65 | 配置好Cuda和编译器,可以直接执行make。 66 | 67 | ``` 68 | cd gpgpu-sim 69 | make -j4 70 | ``` 71 | 72 | 8. 编译安装popnet 73 | 74 | ``` 75 | cd popnet_chiplet 76 | mkdir build 77 | cd build 78 | cmake .. 79 | make -j4 80 | ``` 81 | 82 | 9. 编译安装芯粒间通信程序。interchiplet提供了芯粒间通信所需要的API和实现代码。 83 | 84 | ``` 85 | cd interchiplet 86 | mkdir build 87 | cd build 88 | cmake .. 89 | make 90 | ``` 91 | 92 | 编译完成后应在interchiplet/bin下找到record_transfer和zmq_pro,在interchiplet/lib下找到libinterchiplet_app.a。 93 | 94 | zmq_pro需要安装zmq环境。通常会在cmake步骤被忽略。 95 | 96 | # 验证安装 97 | 98 | 正确执行上述过程后,可以使用benchmark/matmul验证环境设置是否正确。 99 | 100 | 1. 设置仿真器环境 101 | 102 | ``` 103 | source setup_env.sh 104 | ``` 105 | 106 | 2. 编译可执行文件 107 | 108 | ``` 109 | cd benchmark/matmul 110 | make 111 | ``` 112 | 113 | 3. 执行可执行文件。示例包含4个进程,分别是1个CPU进行和3个GPU进程。必须在benchmark/matmul进程执行。 114 | 115 | ``` 116 | ../../interchiplet/bin/interchiplet ./matmul.yml 117 | ``` 118 | 119 | 执行后,可以在benchmark/matmul文件下找到一组proc_r{R}_p{P}_t{T}的文件夹,对应于第R轮执行的第P阶段的第T个线程。 120 | 在文件夹中可以找到下列文件: 121 | 122 | 1. GPGPUSim仿真的临时文件和日志文件gpgpusim_X_X.log。 123 | 2. Sniper仿真的临时文件和sniper仿真的日志文件sniper.log。 124 | 3. Popnet的日志文件popnet.log。 125 | 126 | 4. 清理可执行文件和输出文件。 127 | 128 | ``` 129 | make clean 130 | ``` 131 | 132 | # 打包和应用Patch 133 | 134 | 由于sniper和GPGPUSim是用submodule方式引入的,对于snipersim和gpgpu-sim的修改不会通过常规的git流程追踪。因此,工程提供了patch.sh和apply_patch.sh两个脚本通过Patch管理sniper和gpgpu-sim的修改。 135 | 136 | patch.sh脚本用来生成Patch: 137 | 138 | ``` 139 | ./patch.sh 140 | ``` 141 | 142 | 1. 使用patch.sh脚本将snipersim和gpgpu-sim的修改分别打包到snipersim.diff和gpgpu-sim.diff文件中。diff文件保存在interchiplet/patch下面。diff文件会被git追踪。 143 | 2. patch.sh脚本还会将被修改的文件按照文件层次结构保存到.changed_files文件夹中,用于在diff文件出错时进行查看和参考。 144 | 145 | apply_patch.sh脚本用来应用Patch: 146 | 147 | ``` 148 | ./apply_patch.sh 149 | ``` 150 | 151 | 1. 使用apply_patch.sh脚本将snipersim.diff和gpgpu-sim.diff文件应用到snipersim和gpgpu-sim,重现对于文件的修改。 152 | 2. 当apply出错时,可以参考.changed_files中的文件手动修改snipersim和gpgpu-sim的文件。 153 | 154 | 需要说明的是:不建议用.changed_files直接覆盖snipersim和gpgpu-sim文件夹。因为snipersim和gpgpu-sim本身的演进可能会与芯粒仿真器修改相同的文件。使用Patch的方式会报告修改的冲突。如果直接覆盖,则会导致不可预见的错误。 155 | 156 | # 添加测试程序 157 | 158 | 测试程序统一添加到benchmark路径下,每一个测试文件有独立的文件夹。 159 | 160 | 测试程序的文件管理推荐按照matmul组织,并且使用类似的Makefile。但是并不绝对要求。 161 | 162 | 运行测试程序需要编写YAML配置文件。 163 | 164 | ## YAML配置文件格式 165 | 166 | ``` 167 | # Phase 1 configuration. 168 | phase1: 169 | # Process 0 170 | - cmd: "$BENCHMARK_ROOT/bin/matmul_cu" 171 | args: ["0", "1"] 172 | log: "gpgpusim.0.1.log" 173 | is_to_stdout: false 174 | pre_copy: "$SIMULATOR_ROOT/gpgpu-sim/configs/tested-cfgs/SM2_GTX480/*" 175 | # Process 1 176 | - cmd: "$BENCHMARK_ROOT/bin/matmul_cu" 177 | args: ["1", "0"] 178 | log: "gpgpusim.1.0.log" 179 | is_to_stdout: false 180 | pre_copy: "$SIMULATOR_ROOT/gpgpu-sim/configs/tested-cfgs/SM2_GTX480/*" 181 | ...... 182 | 183 | # Phase 2 configuration. 184 | phase2: 185 | # Process 0 186 | - cmd: "$SIMULATOR_ROOT/popnet/popnet" 187 | args: ["-A", "2", "-c", "2", "-V", "3", "-B", "12", "-O", "12", "-F", "4", "-L", "1000", "-T", "10000000", "-r", "1", "-I", "../bench.txt", "-R", "0"] 188 | log: "popnet.log" 189 | is_to_stdout: false 190 | 191 | ``` 192 | 193 | YAML配置文件的第一层支持的关键字是: 194 | 195 | - `phase1`:配置第一阶段的仿真器进程。 196 | - `phase2`:配置第二阶段的仿真器进程。 197 | 198 | 这两个关键字下面都是数组,每项对应于一个并发的仿真器进程。`phase1`和`phase2`都可以支持多个仿真进程。 199 | 200 | 仿真器进程的配置支持如下关键字: 201 | 202 | - `cmd`:表示仿真器的命令。字符串表示。支持环境变量`$BENCHMARK_ROOT`和`$SIMULATOR_ROOT`。 203 | - `args`:表示仿真器的参数。字符串数组表示。支持环境变量`$BENCHMARK_ROOT`和`$SIMULATOR_ROOT`。 204 | - `log`:表示日志的名称。不能使用相对路径或绝对路径。 205 | - `is_to_stdout`:表示是否将仿真器的标准输出/错误输出重定向到interchiplet的标准输出。 206 | - `pre_copy`:有些仿真器需要一些额外的文件才能启动仿真。这个关键字是字符串。如果需要复制多个文件,则用空格隔开,用引号包围。 207 | 208 | 在YAML里面使用相对路径时,以当前路径作为基础。推荐使用环境变量构成绝对路径。 209 | 210 | - `$BENCHMARK_ROOT`表示测试程序的路径,根据YAML文件的位置决定。 211 | - `$SIMULATOR_ROOT`表示仿真器的路径,通过setup_env.sh决定。 212 | 213 | ## 运行InterChiplet 214 | 215 | 仿真器的主程序是InterChiplet。在运行路径下执行下面的命令: 216 | 217 | ``` 218 | $SIMULATOR_ROOT/interchiplet/bin/interchiplet $BENCHMARK_ROOT/bench.yml 219 | ``` 220 | 221 | InterChiplet命令格式如下: 222 | 223 | ``` 224 | interchiplet .yml [--cwd ] [-t|--timeout ] [-e|--error ] [-h] 225 | ``` 226 | 227 | 命令参数如下: 228 | 229 | - `.yml`指定测试程序的配置文件。 230 | - `--cwd `指定执行仿真的路径。 231 | - `-t `和`--timeout `指定仿真退出的轮次。不论结果是否收敛,都会结束仿真。 232 | - `e `和`--error `指定仿真退出的条件。当仿真误差小于这个比例时,结束仿真。 233 | 234 | -------------------------------------------------------------------------------- /apply_patch.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | if [[ -n "${SIMULATOR_ROOT}" ]]; then 4 | echo "SIMULATOR_ROOT is: ${SIMULATOR_ROOT}" 5 | else 6 | echo "The environment variable SIMULATOR_ROOT is not defined." 7 | exit 8 | fi 9 | 10 | # Pathc for Sniper 11 | cd ${SIMULATOR_ROOT}/snipersim 12 | git apply ../interchiplet/patch/snipersim.diff 13 | 14 | # Patch for GPGPUSim 15 | cd ${SIMULATOR_ROOT}/gpgpu-sim 16 | git apply ../interchiplet/patch/gpgpu-sim.diff 17 | 18 | # Patch for GEM5 19 | cd ${SIMULATOR_ROOT}/gem5 20 | git apply ../interchiplet/patch/gem5.diff 21 | -------------------------------------------------------------------------------- /benchmark/.gitignore: -------------------------------------------------------------------------------- 1 | 2 | _app_cuda_version_* 3 | _cuobjdump_list_ptx_* 4 | *.ptx 5 | *.ptxas 6 | gpgpu_inst_stats.txt 7 | 8 | bench.txt 9 | delayInfo.txt 10 | buffer* 11 | *.log 12 | message_record.txt 13 | 14 | sim.cfg 15 | sim.info 16 | sim.out 17 | sim.stats.sqlite3 18 | 19 | checkpoint_files 20 | 21 | obj 22 | cuobj 23 | bin 24 | proc_r*_t* 25 | 26 | MLP/json 27 | -------------------------------------------------------------------------------- /benchmark/MLP/config_fermi_islip.icnt: -------------------------------------------------------------------------------- 1 | //21*1 fly with 32 flits per packet under gpgpusim injection mode 2 | use_map = 0; 3 | flit_size = 40; 4 | 5 | // currently we do not use this, see subnets below 6 | network_count = 2; 7 | 8 | // Topology 9 | topology = fly; 10 | k = 27; 11 | n = 1; 12 | 13 | // Routing 14 | 15 | routing_function = dest_tag; 16 | 17 | // Flow control 18 | 19 | num_vcs = 1; 20 | vc_buf_size = 64; 21 | 22 | wait_for_tail_credit = 0; 23 | 24 | // Router architecture 25 | 26 | vc_allocator = islip; //separable_input_first; 27 | sw_allocator = islip; //separable_input_first; 28 | alloc_iters = 1; 29 | 30 | credit_delay = 0; 31 | routing_delay = 0; 32 | vc_alloc_delay = 1; 33 | sw_alloc_delay = 1; 34 | 35 | input_speedup = 1; 36 | output_speedup = 1; 37 | internal_speedup = 2.0; 38 | 39 | // Traffic, GPGPU-Sim does not use this 40 | 41 | traffic = uniform; 42 | packet_size ={{1,2,3,4},{10,20}}; 43 | packet_size_rate={{1,1,1,1},{2,1}}; 44 | 45 | // Simulation - Don't change 46 | 47 | sim_type = gpgpusim; 48 | //sim_type = latency; 49 | injection_rate = 0.1; 50 | 51 | subnets = 2; 52 | 53 | // Always use read and write no matter following line 54 | //use_read_write = 1; 55 | 56 | 57 | read_request_subnet = 0; 58 | read_reply_subnet = 1; 59 | write_request_subnet = 0; 60 | write_reply_subnet = 1; 61 | 62 | read_request_begin_vc = 0; 63 | read_request_end_vc = 0; 64 | write_request_begin_vc = 0; 65 | write_request_end_vc = 0; 66 | read_reply_begin_vc = 0; 67 | read_reply_end_vc = 0; 68 | write_reply_begin_vc = 0; 69 | write_reply_end_vc = 0; -------------------------------------------------------------------------------- /benchmark/MLP/makefile: -------------------------------------------------------------------------------- 1 | CC=g++ 2 | CFLAGS=-Wall -Werror -g -I$(SIMULATOR_ROOT)/interchiplet/includes -I$(SIMULATOR_ROOT)/benchmark/MLP/json/include 3 | INTERCHIPLETLIB=$(SIMULATOR_ROOT)/interchiplet/lib/libinterchiplet_c.a 4 | SNIPER_EXEC=$(SIMULATOR_ROOT)/snipersim/run-sniper 5 | 6 | NVCC=nvcc 7 | CUFLAGS=--compiler-options -Wall -I$(SIMULATOR_ROOT)/interchiplet/includes 8 | 9 | BENCHMARK_ROOT=$(SIMULATOR_ROOT)/benchmark/MLP 10 | 11 | SNIPER_SRCS=mlp.cpp 12 | SNIPER_OBJS=mlp.o 13 | SNIPER_TARGET=bin/mlp_cpu 14 | 15 | GPGPUSIM_SRCS=mlp.cu 16 | GPGPUSIM_TARGET=bin/mlp_cu 17 | REPO_DIR=$(BENCHMARK_ROOT)/json 18 | GIT_REPO=https://github.com/nlohmann/json.git 19 | 20 | all: sniper_target gpgpusim_target 21 | 22 | sniper_target: $(SNIPER_OBJS) 23 | $(CC) -g $(SNIPER_OBJS) $(INTERCHIPLETLIB) -o $(SNIPER_TARGET) -lpthread 24 | 25 | %.o: %.cpp 26 | if [ ! -d $(REPO_DIR) ]; then \ 27 | git clone $(GIT_REPO) $(REPO_DIR); \ 28 | fi 29 | 30 | $(CC) $(CFLAGS) -c $< -o $@ 31 | 32 | debug: CFLAGS += -DDEBUG -g 33 | debug: all 34 | 35 | gpgpusim_target: 36 | $(NVCC) -L$(SIMULATOR_ROOT)/gpgpu-sim/lib/$(GPGPUSIM_CONFIG) --cudart shared $(GPGPUSIM_SRCS) $(CUFLAGS) -o $(GPGPUSIM_TARGET) -g 37 | 38 | run: 39 | touch "start running" & 40 | ./$(GPGPUSIM_TARGET) 0 1 > gpgpusim.0.1.log 2>&1 & 41 | ./$(GPGPUSIM_TARGET) 0 2 > gpgpusim.0.2.log 2>&1 & 42 | ./$(GPGPUSIM_TARGET) 0 3 > gpgpusim.0.3.log 2>&1 & 43 | ./$(GPGPUSIM_TARGET) 0 4 > gpgpusim.0.4.log 2>&1 & 44 | ./$(GPGPUSIM_TARGET) 1 1 > gpgpusim.1.1.log 2>&1 & 45 | ./$(GPGPUSIM_TARGET) 1 2 > gpgpusim.1.2.log 2>&1 & 46 | ./$(GPGPUSIM_TARGET) 1 3 > gpgpusim.1.3.log 2>&1 & 47 | ./$(GPGPUSIM_TARGET) 1 4 > gpgpusim.1.4.log 2>&1 & 48 | $(SNIPER_EXEC) --curdir $(BENCHMARK_ROOT) -- $(BENCHMARK_ROOT)/$(SNIPER_TARGET) 0 0 > sniper.0.0.log 2>&1 49 | 50 | run_cpu: 51 | $(SNIPER_EXEC) --curdir $(BENCHMARK_ROOT) -- $(BENCHMARK_ROOT)/$(SNIPER_TARGET) 0 0 52 | 53 | run_gpu: 54 | ./$(GPGPUSIM_TARGET) 0 1 > gpgpusim.0.1.log 2>&1 55 | 56 | 57 | gdb: $(SNIPER_TARGET) 58 | cd $(BENCHMARK_ROOT) && gdb ./$(SNIPER_TARGET) -- $(BENCHMARK_ROOT)/$(SNIPER_TARGET) 0 0 59 | 60 | valgrind: $(SNIPER_TARGET) 61 | cd $(BENCHMARK_ROOT) && valgrind --leak-check=full ./$(SNIPER_TARGET) -- $(BENCHMARK_ROOT)/$(SNIPER_TARGET) 0 0 62 | 63 | clean: 64 | rm -rf bench* buffer* message_record.txt 65 | rm -rf _app_cuda_version_* _cuobjdump_list_ptx_* gpgpusim_power_report_* *.ptx *.ptxas gpgpusim.*.log gpgpu_inst_stats.txt gpuRead* cpuRead* 66 | rm -rf sim.cfg sim.info sim.out sim.stats.sqlite3 67 | rm -rf checkpoint_files 68 | 69 | cleanall: 70 | make clean 71 | rm -rf $(SNIPER_OBJS) $(GPGPUSIM_TARGET) $(SNIPER_TARGET) 72 | rm -f start* sniper* 73 | 74 | kill: 75 | pkill -f mlp_cu 76 | pkill -f mlp_cpu 77 | -------------------------------------------------------------------------------- /benchmark/MLP/mlp.cu: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #include 6 | #include 7 | #include 8 | 9 | #include "apis_cu.h" 10 | #include "cuda_runtime.h" 11 | #include "device_launch_parameters.h" 12 | #define BLOCK_DIM 10 13 | 14 | __global__ void matrix_mul_gpu(int64_t *M, int64_t *N, int64_t *P, int64_t widthA, int64_t heightA, 15 | int64_t widthB) { 16 | int64_t i = threadIdx.x + blockDim.x * blockIdx.x; 17 | int64_t j = threadIdx.y + blockDim.y * blockIdx.y; 18 | if (i < widthB && j < heightA) { 19 | int64_t sum = 0; 20 | for (int64_t k = 0; k < widthA; k++) { 21 | int64_t a = M[j * widthA + k]; 22 | int64_t b = N[k * widthB + i]; 23 | sum += a * b; 24 | } 25 | P[j * widthB + i] = sum; 26 | } 27 | } 28 | 29 | int Row_A = 0, Col_A = 0, Row_B = 0, Col_B = 0; 30 | int main(int argc, char **argv) { 31 | while (1) { 32 | char *fileName = new char[100]; 33 | // 读取本进程所代表的chiplet编号 34 | int srcX = atoi(argv[1]); 35 | int srcY = atoi(argv[2]); 36 | int64_t *size_A = new int64_t[2]; 37 | int64_t *size_B = new int64_t[2]; 38 | int64_t *Size_A, *Size_B; 39 | cudaMalloc((void **)&Size_A, sizeof(int64_t) * 2); 40 | cudaMalloc((void **)&Size_B, sizeof(int64_t) * 2); 41 | 42 | receiveMessage(srcX, srcY, 0, 0, Size_A, sizeof(int64_t) * 2); 43 | receiveMessage(srcX, srcY, 0, 0, Size_B, sizeof(int64_t) * 2); 44 | 45 | cudaMemcpy(size_A, Size_A, sizeof(int64_t) * 2, cudaMemcpyDeviceToHost); 46 | cudaMemcpy(size_B, Size_B, sizeof(int64_t) * 2, cudaMemcpyDeviceToHost); 47 | Row_A = size_A[0]; 48 | Col_A = size_A[1]; 49 | Row_B = size_B[0]; 50 | Col_B = size_B[1]; 51 | int64_t *C = (int64_t *)malloc(sizeof(int64_t) * Col_B * Row_A); 52 | int64_t *A = (int64_t *)malloc(sizeof(int64_t) * Row_A * Col_A); 53 | 54 | int64_t *d_dataA, *d_dataB, *d_dataC; 55 | cudaMalloc((void **)&d_dataA, sizeof(int64_t) * Row_A * Col_A); 56 | cudaMalloc((void **)&d_dataB, sizeof(int64_t) * Row_B * Col_B); 57 | cudaMalloc((void **)&d_dataC, sizeof(int64_t) * Col_B * Row_A); 58 | 59 | receiveMessage(srcX, srcY, 0, 0, d_dataA, Col_A * Row_A * sizeof(int64_t)); 60 | receiveMessage(srcX, srcY, 0, 0, d_dataB, Col_B * Row_B * sizeof(int64_t)); 61 | 62 | cudaMemcpy(A, d_dataA, sizeof(int64_t) * Col_A * Row_A, cudaMemcpyDeviceToHost); 63 | for (int64_t i = 0; i < Row_A * Col_A; i++) { 64 | std::cout << A[i]; 65 | if (i % Col_A == 0 && i != 0) 66 | std::cout << std::endl; 67 | else 68 | std::cout << " "; 69 | } 70 | // calculate 71 | dim3 threadPerBlock(BLOCK_DIM, BLOCK_DIM); 72 | // dim3 blockNumber(1); 73 | dim3 blockNumber((Col_B + threadPerBlock.x - 1) / threadPerBlock.x, 74 | (Row_A + threadPerBlock.y - 1) / threadPerBlock.y); 75 | matrix_mul_gpu<<>>(d_dataA, d_dataB, d_dataC, Col_A, Row_A, 76 | Col_B); 77 | cudaMemcpy(C, d_dataC, sizeof(int64_t) * Row_A * Col_B, cudaMemcpyDeviceToHost); 78 | for (int64_t i = 0; i < Row_A * Col_B; i++) { 79 | std::cout << C[i]; 80 | if (i % Col_B == 0 && i != 0) 81 | std::cout << std::endl; 82 | else 83 | std::cout << " "; 84 | } 85 | sendMessage(0, 0, srcX, srcY, d_dataC, Row_A * Col_B * sizeof(int64_t)); 86 | cudaFree(d_dataA); 87 | cudaFree(d_dataB); 88 | cudaFree(d_dataC); 89 | } 90 | return 0; 91 | } -------------------------------------------------------------------------------- /benchmark/MLP/mlp.yml: -------------------------------------------------------------------------------- 1 | # Phase 1 configuration. 2 | phase1: 3 | # Process 0 4 | - cmd: "$BENCHMARK_ROOT/bin/mlp_cu" 5 | args: ["0", "1"] 6 | log: "gpgpusim.0.1.log" 7 | is_to_stdout: false 8 | clock_rate: 1 9 | pre_copy: "$SIMULATOR_ROOT/gpgpu-sim/configs/tested-cfgs/SM2_GTX480/*" 10 | # Process 1 11 | - cmd: "$BENCHMARK_ROOT/bin/mlp_cu" 12 | args: ["0", "2"] 13 | log: "gpgpusim.0.2.log" 14 | is_to_stdout: false 15 | clock_rate: 1 16 | pre_copy: "$SIMULATOR_ROOT/gpgpu-sim/configs/tested-cfgs/SM2_GTX480/*" 17 | # Process 2 18 | - cmd: "$BENCHMARK_ROOT/bin/mlp_cu" 19 | args: ["0", "3"] 20 | log: "gpgpusim.0.3.log" 21 | is_to_stdout: false 22 | clock_rate: 1 23 | pre_copy: "$SIMULATOR_ROOT/gpgpu-sim/configs/tested-cfgs/SM2_GTX480/*" 24 | # Process 3 25 | - cmd: "$BENCHMARK_ROOT/bin/mlp_cu" 26 | args: ["0", "4"] 27 | log: "gpgpusim.0.4.log" 28 | is_to_stdout: false 29 | clock_rate: 1 30 | pre_copy: "$SIMULATOR_ROOT/gpgpu-sim/configs/tested-cfgs/SM2_GTX480/*" 31 | # Process 4 32 | - cmd: "$BENCHMARK_ROOT/bin/mlp_cu" 33 | args: ["1", "1"] 34 | log: "gpgpusim.1.1.log" 35 | is_to_stdout: false 36 | clock_rate: 1 37 | pre_copy: "$SIMULATOR_ROOT/gpgpu-sim/configs/tested-cfgs/SM2_GTX480/*" 38 | # Process 5 39 | - cmd: "$BENCHMARK_ROOT/bin/mlp_cu" 40 | args: ["1", "2"] 41 | log: "gpgpusim.1.2.log" 42 | is_to_stdout: false 43 | clock_rate: 1 44 | pre_copy: "$SIMULATOR_ROOT/gpgpu-sim/configs/tested-cfgs/SM2_GTX480/*" 45 | # Process 6 46 | - cmd: "$BENCHMARK_ROOT/bin/mlp_cu" 47 | args: ["1", "3"] 48 | log: "gpgpusim.1.3.log" 49 | is_to_stdout: false 50 | clock_rate: 1 51 | pre_copy: "$SIMULATOR_ROOT/gpgpu-sim/configs/tested-cfgs/SM2_GTX480/*" 52 | # Process 7 53 | - cmd: "$BENCHMARK_ROOT/bin/mlp_cu" 54 | args: ["1", "4"] 55 | log: "gpgpusim.1.4.log" 56 | is_to_stdout: false 57 | clock_rate: 1 58 | pre_copy: "$SIMULATOR_ROOT/gpgpu-sim/configs/tested-cfgs/SM2_GTX480/*" 59 | # Process 8 60 | - cmd: "$SIMULATOR_ROOT/snipersim/run-sniper" 61 | args: ["--", "$BENCHMARK_ROOT/bin/mlp_cpu", "0", "0"] 62 | log: "sniper.0.0.log" 63 | is_to_stdout: false 64 | clock_rate: 1 65 | 66 | # Phase 2 configuration. 67 | phase2: 68 | # Process 0 69 | - cmd: "$SIMULATOR_ROOT/popnet_chiplet/build/popnet" 70 | args: ["-A", "2", "-c", "2", "-V", "3", "-B", "12", "-O", "12", "-F", "4", "-L", "1000", "-T", "10000000", "-r", "1", "-I", "../bench.txt", "-R", "0", "-D", "../delayInfo.txt", "-P"] 71 | log: "popnet.log" 72 | is_to_stdout: false 73 | clock_rate: 1 74 | 75 | # File configuration. (Not used yet) 76 | bench_file: "./bench.txt" 77 | delayinfo_file: "./delayInfo.txt" 78 | -------------------------------------------------------------------------------- /benchmark/MLP/readData.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from sklearn.datasets import load_breast_cancer,load_iris 3 | from sklearn.model_selection import train_test_split 4 | from sklearn.preprocessing import StandardScaler 5 | from sklearn.metrics import accuracy_score,r2_score 6 | from sklearn.preprocessing import OneHotEncoder 7 | import matplotlib.pyplot as plt 8 | import json 9 | import pandas as pd 10 | import subprocess 11 | 12 | data = pd.read_csv('./wine.csv') 13 | print(data) 14 | target_data = data['Class'] 15 | feature_data = data.drop(columns='Class') 16 | scaler = StandardScaler() 17 | feature_data = scaler.fit_transform(feature_data) 18 | x_train, x_test, y_train, y_test = train_test_split(feature_data, target_data, test_size=0.2, random_state=2) 19 | y_train,y_test=np.array(y_train),np.array(y_test) 20 | data_dict = {'x_train': x_train.tolist(), 'x_test': x_test.tolist(), 'y_train': y_train.tolist(), 'y_test': y_test.tolist()} 21 | with open('temp_data.json', 'w') as f: 22 | json.dump(data_dict, f) -------------------------------------------------------------------------------- /benchmark/matmul/makefile: -------------------------------------------------------------------------------- 1 | # Project environment 2 | # SIMULATOR_ROOT, defined by setup_env.sh 3 | BENCHMARK_ROOT=$(SIMULATOR_ROOT)/benchmark/matmul 4 | 5 | # Compiler environment of C/C++ 6 | CC=g++ 7 | CFLAGS=-Wall -Werror -g -I$(SIMULATOR_ROOT)/interchiplet/includes 8 | INTERCHIPLET_C_LIB=$(SIMULATOR_ROOT)/interchiplet/lib/libinterchiplet_c.a 9 | 10 | # C/C++ Source file 11 | C_SRCS=matmul.cpp 12 | C_OBJS=obj/matmul.o 13 | C_TARGET=bin/matmul_c 14 | 15 | # Compiler environment of CUDA 16 | NVCC=nvcc 17 | CUFLAGS=--compiler-options -Wall -I$(SIMULATOR_ROOT)/interchiplet/includes 18 | 19 | # CUDA Source file 20 | CUDA_SRCS=matmul.cu 21 | CUDA_OBJS=cuobj/matmul.o 22 | CUDA_TARGET=bin/matmul_cu 23 | 24 | all: bin_dir obj_dir cuobj_dir C_target CUDA_target 25 | 26 | # C language target 27 | C_target: $(C_OBJS) 28 | $(CC) $(C_OBJS) $(INTERCHIPLET_C_LIB) -o $(C_TARGET) 29 | 30 | # CUDA language target 31 | CUDA_target: $(CUDA_OBJS) 32 | $(NVCC) -L$(SIMULATOR_ROOT)/gpgpu-sim/lib/$(GPGPUSIM_CONFIG) --cudart shared $(CUDA_OBJS) -o $(CUDA_TARGET) 33 | 34 | # Rule for C object 35 | obj/%.o: %.cpp 36 | $(CC) $(CFLAGS) -c $< -o $@ 37 | 38 | # Rule for Cuda object 39 | cuobj/%.o: %.cu 40 | $(NVCC) $(CUFLAGS) -c $< -o $@ 41 | 42 | # Directory for binary files. 43 | bin_dir: 44 | mkdir -p bin 45 | 46 | # Directory for object files for C. 47 | obj_dir: 48 | mkdir -p obj 49 | 50 | # Directory for object files for CUDA. 51 | cuobj_dir: 52 | mkdir -p cuobj 53 | 54 | # Clean generated files. 55 | clean: 56 | rm -rf bench.txt delayInfo.txt buffer* message_record.txt 57 | rm -rf proc_r*_t* *.log 58 | rm -rf obj cuobj bin 59 | -------------------------------------------------------------------------------- /benchmark/matmul/matmul.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include "apis_c.h" 5 | 6 | #define Row 100 7 | #define Col 100 8 | 9 | int idX, idY; 10 | 11 | int main(int argc, char **argv) { 12 | idX = atoi(argv[1]); 13 | idY = atoi(argv[2]); 14 | 15 | int64_t *A = (int64_t *)malloc(sizeof(int64_t) * Row * Col); 16 | int64_t *B = (int64_t *)malloc(sizeof(int64_t) * Row * Col); 17 | int64_t *C1 = (int64_t *)malloc(sizeof(int64_t) * Col); 18 | int64_t *C2 = (int64_t *)malloc(sizeof(int64_t) * Col); 19 | int64_t *C3 = (int64_t *)malloc(sizeof(int64_t) * Col); 20 | 21 | for (int i = 0; i < Row * Col; i++) { 22 | A[i] = rand() % 51; 23 | B[i] = rand() % 51; 24 | } 25 | 26 | InterChiplet::sendMessage(0, 1, idX, idY, A, 10000 * sizeof(int64_t)); 27 | InterChiplet::sendMessage(1, 0, idX, idY, A, 10000 * sizeof(int64_t)); 28 | InterChiplet::sendMessage(1, 1, idX, idY, A, 10000 * sizeof(int64_t)); 29 | 30 | InterChiplet::sendMessage(0, 1, idX, idY, B, 10000 * sizeof(int64_t)); 31 | InterChiplet::sendMessage(1, 0, idX, idY, B, 10000 * sizeof(int64_t)); 32 | InterChiplet::sendMessage(1, 1, idX, idY, B, 10000 * sizeof(int64_t)); 33 | 34 | InterChiplet::receiveMessage(idX, idY, 0, 1, C1, 100 * sizeof(int64_t)); 35 | InterChiplet::receiveMessage(idX, idY, 1, 0, C2, 100 * sizeof(int64_t)); 36 | InterChiplet::receiveMessage(idX, idY, 1, 1, C3, 100 * sizeof(int64_t)); 37 | 38 | for (int i = 0; i < 100; i++) { 39 | C1[i] += C2[i]; 40 | C1[i] += C3[i]; 41 | } 42 | } 43 | -------------------------------------------------------------------------------- /benchmark/matmul/matmul.cu: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | #include 7 | #include 8 | #include 9 | 10 | #include "apis_cu.h" 11 | #include "cuda_runtime.h" 12 | #include "device_launch_parameters.h" 13 | 14 | /** 15 | * 本示例程序为:通过4个GPU chiplet 16 | * 计算随机数矩阵A(400 * 100)与随机数矩阵B(100 * 400)相乘结果。 17 | * 由矩阵乘法原理可知,我们可将计算任务划分为4个100*100的矩阵相乘,并将结果相加。 18 | */ 19 | 20 | #define Row 100 21 | #define Col 100 22 | 23 | /** 24 | * 矩阵乘法的核心函数,由每个线程都会运行一次本函数, 25 | * 根据线程编号不同计算出位于结果矩阵不同位置的数据。 26 | */ 27 | 28 | __global__ void matrix_mul_gpu(int64_t* M, int64_t* N, int64_t* P, int width) { 29 | int sumNum = threadIdx.x + threadIdx.y * 10; 30 | int i = threadIdx.x; 31 | int j = threadIdx.y; 32 | int64_t sum = 0; 33 | for (int k = 0; k < width; k++) { 34 | int64_t a = M[j * width + k]; 35 | int64_t b = N[k * width + i]; 36 | sum += a * b; 37 | } 38 | P[sumNum] = sum; 39 | } 40 | 41 | int main(int argc, char** argv) { 42 | // 读取本进程所代表的chiplet编号 43 | 44 | int idX = atoi(argv[1]); 45 | int idY = atoi(argv[2]); 46 | int64_t *d_dataA, *d_dataB, *d_dataC; 47 | cudaMalloc((void**)&d_dataA, sizeof(int64_t) * Row * Col); 48 | cudaMalloc((void**)&d_dataB, sizeof(int64_t) * Row * Col); 49 | cudaMalloc((void**)&d_dataC, sizeof(int64_t) * Col); 50 | 51 | receiveMessage(idX, idY, 0, 0, d_dataA, sizeof(int64_t) * Row * Col); 52 | receiveMessage(idX, idY, 0, 0, d_dataB, sizeof(int64_t) * Row * Col); 53 | 54 | // calculate 55 | dim3 threadPerBlock(10, 10); 56 | dim3 blockNumber(1); 57 | matrix_mul_gpu<<>>(d_dataA, d_dataB, d_dataC, Col); 58 | 59 | sendMessage(0, 0, idX, idY, d_dataC, 100 * sizeof(int64_t)); 60 | cudaFree(d_dataA); 61 | cudaFree(d_dataB); 62 | cudaFree(d_dataC); 63 | return 0; 64 | } 65 | -------------------------------------------------------------------------------- /benchmark/matmul/matmul.yml: -------------------------------------------------------------------------------- 1 | # Phase 1 configuration. 2 | phase1: 3 | # Process 0 4 | - cmd: "$BENCHMARK_ROOT/bin/matmul_cu" 5 | args: ["0", "1"] 6 | log: "gpgpusim.0.1.log" 7 | is_to_stdout: false 8 | clock_rate: 1 9 | pre_copy: "$SIMULATOR_ROOT/gpgpu-sim/configs/tested-cfgs/SM2_GTX480/*" 10 | # Process 1 11 | - cmd: "$BENCHMARK_ROOT/bin/matmul_cu" 12 | args: ["1", "0"] 13 | log: "gpgpusim.1.0.log" 14 | is_to_stdout: false 15 | clock_rate: 1 16 | pre_copy: "$SIMULATOR_ROOT/gpgpu-sim/configs/tested-cfgs/SM2_GTX480/*" 17 | # Process 2 18 | - cmd: "$BENCHMARK_ROOT/bin/matmul_cu" 19 | args: ["1", "1"] 20 | log: "gpgpusim.1.1.log" 21 | is_to_stdout: false 22 | clock_rate: 1 23 | pre_copy: "$SIMULATOR_ROOT/gpgpu-sim/configs/tested-cfgs/SM2_GTX480/*" 24 | # Process 3 25 | - cmd: "$SIMULATOR_ROOT/snipersim/run-sniper" 26 | args: ["--", "$BENCHMARK_ROOT/bin/matmul_c", "0", "0"] 27 | log: "sniper.0.0.log" 28 | is_to_stdout: false 29 | clock_rate: 1 30 | 31 | # Phase 2 configuration. 32 | phase2: 33 | # Process 0 34 | - cmd: "$SIMULATOR_ROOT/popnet_chiplet/build/popnet" 35 | args: ["-A", "2", "-c", "2", "-V", "3", "-B", "12", "-O", "12", "-F", "4", "-L", "1000", "-T", "10000000", "-r", "1", "-I", "../bench.txt", "-R", "0", "-D", "../delayInfo.txt", "-P"] 36 | log: "popnet_0.log" 37 | is_to_stdout: false 38 | clock_rate: 1 39 | 40 | # File configuration. (Not used yet) 41 | bench_file: "./bench.txt" 42 | delayinfo_file: "./delayInfo.txt" 43 | -------------------------------------------------------------------------------- /benchmark/test/gem5-arm/barrier/makefile: -------------------------------------------------------------------------------- 1 | # Project environment 2 | # SIMULATOR_ROOT, defined by setup_env.sh 3 | BENCHMARK_ROOT=$(SIMULATOR_ROOT)/benchmark/test/gem5/barrier 4 | 5 | # Compiler environment of C/C++ 6 | CC=aarch64-linux-gnu-g++ 7 | CFLAGS=-Wall -Werror -g -I$(SIMULATOR_ROOT)/interchiplet/includes 8 | 9 | # C/C++ Source file 10 | C_SRCS=test.cpp 11 | C_OBJS=obj/test.o 12 | C_TARGET=bin/test_c 13 | 14 | all: bin_dir obj_dir C_target 15 | 16 | # C language target 17 | C_target: $(C_OBJS) obj/interchiplet.o 18 | $(CC) $(C_OBJS) obj/interchiplet.o -o $(C_TARGET) 19 | 20 | # Interchiplet library 21 | obj/interchiplet.o: ../../../../interchiplet/srcs/apis_c.cpp 22 | $(CC) $(CFLAGS) -c $< -o $@ 23 | 24 | # Rule for C object 25 | obj/%.o: ../../snipersim/barrier/%.cpp 26 | $(CC) $(CFLAGS) -c $< -o $@ 27 | 28 | # Directory for binary files. 29 | bin_dir: 30 | mkdir -p bin 31 | 32 | # Directory for object files for C. 33 | obj_dir: 34 | mkdir -p obj 35 | 36 | # Clean generated files. 37 | clean: 38 | rm -rf bench.txt delayInfo.txt buffer* message_record.txt 39 | rm -rf proc_r*_t* *.log 40 | rm -rf obj bin 41 | -------------------------------------------------------------------------------- /benchmark/test/gem5-arm/barrier/test.yml: -------------------------------------------------------------------------------- 1 | # Phase 1 configuration. 2 | phase1: 3 | # Process 0 4 | - cmd: "$SIMULATOR_ROOT/gem5/build/ARM/gem5.opt" 5 | args: ["$SIMULATOR_ROOT/gem5/configs/deprecated/example/se.py", "--interp-dir", "/usr/aarch64-linux-gnu", "--redirects", "/lib=/usr/aarch64-linux-gnu/lib", "--cmd", "$BENCHMARK_ROOT/bin/test_c", "-o", "0 0"] 6 | log: "gem5.0.0.log" 7 | is_to_stdout: false 8 | clock_rate: 1 9 | # Process 1 10 | - cmd: "$SIMULATOR_ROOT/gem5/build/ARM/gem5.opt" 11 | args: ["$SIMULATOR_ROOT/gem5/configs/deprecated/example/se.py", "--interp-dir", "/usr/aarch64-linux-gnu", "--redirects", "/lib=/usr/aarch64-linux-gnu/lib", "--cmd", "$BENCHMARK_ROOT/bin/test_c", "-o", "0 1"] 12 | log: "gem5.0.1.log" 13 | is_to_stdout: false 14 | clock_rate: 1 15 | # Process 2 16 | - cmd: "$SIMULATOR_ROOT/gem5/build/ARM/gem5.opt" 17 | args: ["$SIMULATOR_ROOT/gem5/configs/deprecated/example/se.py", "--interp-dir", "/usr/aarch64-linux-gnu", "--redirects", "/lib=/usr/aarch64-linux-gnu/lib", "--cmd", "$BENCHMARK_ROOT/bin/test_c", "-o", "1 0"] 18 | log: "gem5.1.0.log" 19 | is_to_stdout: false 20 | clock_rate: 1 21 | # Process 3 22 | - cmd: "$SIMULATOR_ROOT/gem5/build/ARM/gem5.opt" 23 | args: ["$SIMULATOR_ROOT/gem5/configs/deprecated/example/se.py", "--interp-dir", "/usr/aarch64-linux-gnu", "--redirects", "/lib=/usr/aarch64-linux-gnu/lib", "--cmd", "$BENCHMARK_ROOT/bin/test_c", "-o", "1 1"] 24 | log: "gem5.1.1.log" 25 | is_to_stdout: false 26 | clock_rate: 1 27 | 28 | # Phase 2 configuration. 29 | phase2: 30 | # Process 0 31 | - cmd: "$SIMULATOR_ROOT/popnet_chiplet/build/popnet" 32 | args: ["-A", "2", "-c", "2", "-V", "3", "-B", "12", "-O", "12", "-F", "4", "-L", "1000", "-T", "10000000", "-r", "1", "-I", "../bench.txt", "-R", "0", "-D", "../delayInfo.txt", "-P"] 33 | log: "popnet_0.log" 34 | is_to_stdout: false 35 | clock_rate: 1 36 | 37 | # File configuration. (Not used yet) 38 | bench_file: "./bench.txt" 39 | delayinfo_file: "./delayInfo.txt" 40 | -------------------------------------------------------------------------------- /benchmark/test/gem5-arm/launch/makefile: -------------------------------------------------------------------------------- 1 | # Project environment 2 | # SIMULATOR_ROOT, defined by setup_env.sh 3 | BENCHMARK_ROOT=$(SIMULATOR_ROOT)/benchmark/test/gem5/launch 4 | 5 | # Compiler environment of C/C++ 6 | CC=aarch64-linux-gnu-g++ 7 | CFLAGS=-Wall -Werror -g -I$(SIMULATOR_ROOT)/interchiplet/includes 8 | 9 | # C/C++ Source file 10 | C_SRCS=test.cpp 11 | C_OBJS=obj/test.o 12 | C_TARGET=bin/test_c 13 | 14 | all: bin_dir obj_dir C_target 15 | 16 | # C language target 17 | C_target: $(C_OBJS) obj/interchiplet.o 18 | $(CC) $(C_OBJS) obj/interchiplet.o -o $(C_TARGET) 19 | 20 | # Interchiplet library 21 | obj/interchiplet.o: ../../../../interchiplet/srcs/apis_c.cpp 22 | $(CC) $(CFLAGS) -c $< -o $@ 23 | 24 | # Rule for C object 25 | obj/%.o: ../../snipersim/launch/%.cpp 26 | $(CC) $(CFLAGS) -c $< -o $@ 27 | 28 | # Directory for binary files. 29 | bin_dir: 30 | mkdir -p bin 31 | 32 | # Directory for object files for C. 33 | obj_dir: 34 | mkdir -p obj 35 | 36 | # Clean generated files. 37 | clean: 38 | rm -rf bench.txt delayInfo.txt buffer* message_record.txt 39 | rm -rf proc_r*_t* *.log 40 | rm -rf obj bin 41 | -------------------------------------------------------------------------------- /benchmark/test/gem5-arm/launch/test.yml: -------------------------------------------------------------------------------- 1 | # Phase 1 configuration. 2 | phase1: 3 | # Process 0 4 | - cmd: "$SIMULATOR_ROOT/gem5/build/ARM/gem5.opt" 5 | args: ["$SIMULATOR_ROOT/gem5/configs/deprecated/example/se.py", "--interp-dir", "/usr/aarch64-linux-gnu", "--redirects", "/lib=/usr/aarch64-linux-gnu/lib", "--cmd", "$BENCHMARK_ROOT/bin/test_c", "-o", "0 0"] 6 | log: "gem5.0.0.log" 7 | is_to_stdout: false 8 | clock_rate: 1 9 | # Process 1 10 | - cmd: "$SIMULATOR_ROOT/gem5/build/ARM/gem5.opt" 11 | args: ["$SIMULATOR_ROOT/gem5/configs/deprecated/example/se.py", "--interp-dir", "/usr/aarch64-linux-gnu", "--redirects", "/lib=/usr/aarch64-linux-gnu/lib", "--cmd", "$BENCHMARK_ROOT/bin/test_c", "-o", "0 1"] 12 | log: "gem5.0.1.log" 13 | is_to_stdout: false 14 | clock_rate: 1 15 | # Process 2 16 | - cmd: "$SIMULATOR_ROOT/gem5/build/ARM/gem5.opt" 17 | args: ["$SIMULATOR_ROOT/gem5/configs/deprecated/example/se.py", "--interp-dir", "/usr/aarch64-linux-gnu", "--redirects", "/lib=/usr/aarch64-linux-gnu/lib", "--cmd", "$BENCHMARK_ROOT/bin/test_c", "-o", "1 0"] 18 | log: "gem5.1.0.log" 19 | is_to_stdout: false 20 | clock_rate: 1 21 | # Process 3 22 | - cmd: "$SIMULATOR_ROOT/gem5/build/ARM/gem5.opt" 23 | args: ["$SIMULATOR_ROOT/gem5/configs/deprecated/example/se.py", "--interp-dir", "/usr/aarch64-linux-gnu", "--redirects", "/lib=/usr/aarch64-linux-gnu/lib", "--cmd", "$BENCHMARK_ROOT/bin/test_c", "-o", "1 1"] 24 | log: "gem5.1.1.log" 25 | is_to_stdout: false 26 | clock_rate: 1 27 | 28 | # Phase 2 configuration. 29 | phase2: 30 | # Process 0 31 | - cmd: "$SIMULATOR_ROOT/popnet_chiplet/build/popnet" 32 | args: ["-A", "2", "-c", "2", "-V", "3", "-B", "12", "-O", "12", "-F", "4", "-L", "1000", "-T", "10000000", "-r", "1", "-I", "../bench.txt", "-R", "0", "-D", "../delayInfo.txt", "-P"] 33 | log: "popnet_0.log" 34 | is_to_stdout: false 35 | clock_rate: 1 36 | 37 | # File configuration. (Not used yet) 38 | bench_file: "./bench.txt" 39 | delayinfo_file: "./delayInfo.txt" 40 | -------------------------------------------------------------------------------- /benchmark/test/gem5-arm/lock/makefile: -------------------------------------------------------------------------------- 1 | # Project environment 2 | # SIMULATOR_ROOT, defined by setup_env.sh 3 | BENCHMARK_ROOT=$(SIMULATOR_ROOT)/benchmark/test/gem5/lock 4 | 5 | # Compiler environment of C/C++ 6 | CC=aarch64-linux-gnu-g++ 7 | CFLAGS=-Wall -Werror -g -I$(SIMULATOR_ROOT)/interchiplet/includes 8 | 9 | # C/C++ Source file 10 | C_SRCS=test.cpp 11 | C_OBJS=obj/test.o 12 | C_TARGET=bin/test_c 13 | 14 | all: bin_dir obj_dir C_target 15 | 16 | # C language target 17 | C_target: $(C_OBJS) obj/interchiplet.o 18 | $(CC) $(C_OBJS) obj/interchiplet.o -o $(C_TARGET) 19 | 20 | # Interchiplet library 21 | obj/interchiplet.o: ../../../../interchiplet/srcs/apis_c.cpp 22 | $(CC) $(CFLAGS) -c $< -o $@ 23 | 24 | # Rule for C object 25 | obj/%.o: ../../snipersim/lock/%.cpp 26 | $(CC) $(CFLAGS) -c $< -o $@ 27 | 28 | # Directory for binary files. 29 | bin_dir: 30 | mkdir -p bin 31 | 32 | # Directory for object files for C. 33 | obj_dir: 34 | mkdir -p obj 35 | 36 | # Clean generated files. 37 | clean: 38 | rm -rf bench.txt delayInfo.txt buffer* message_record.txt 39 | rm -rf proc_r*_t* *.log 40 | rm -rf obj bin 41 | -------------------------------------------------------------------------------- /benchmark/test/gem5-arm/lock/test.yml: -------------------------------------------------------------------------------- 1 | # Phase 1 configuration. 2 | phase1: 3 | # Process 0 4 | - cmd: "$SIMULATOR_ROOT/gem5/build/ARM/gem5.opt" 5 | args: ["$SIMULATOR_ROOT/gem5/configs/deprecated/example/se.py", "--interp-dir", "/usr/aarch64-linux-gnu", "--redirects", "/lib=/usr/aarch64-linux-gnu/lib", "--cmd", "$BENCHMARK_ROOT/bin/test_c", "-o", "0 0"] 6 | log: "gem5.0.0.log" 7 | is_to_stdout: false 8 | clock_rate: 1 9 | # Process 1 10 | - cmd: "$SIMULATOR_ROOT/gem5/build/ARM/gem5.opt" 11 | args: ["$SIMULATOR_ROOT/gem5/configs/deprecated/example/se.py", "--interp-dir", "/usr/aarch64-linux-gnu", "--redirects", "/lib=/usr/aarch64-linux-gnu/lib", "--cmd", "$BENCHMARK_ROOT/bin/test_c", "-o", "0 1"] 12 | log: "gem5.0.1.log" 13 | is_to_stdout: false 14 | clock_rate: 1 15 | # Process 2 16 | - cmd: "$SIMULATOR_ROOT/gem5/build/ARM/gem5.opt" 17 | args: ["$SIMULATOR_ROOT/gem5/configs/deprecated/example/se.py", "--interp-dir", "/usr/aarch64-linux-gnu", "--redirects", "/lib=/usr/aarch64-linux-gnu/lib", "--cmd", "$BENCHMARK_ROOT/bin/test_c", "-o", "1 0"] 18 | log: "gem5.1.0.log" 19 | is_to_stdout: false 20 | clock_rate: 1 21 | # Process 3 22 | - cmd: "$SIMULATOR_ROOT/gem5/build/ARM/gem5.opt" 23 | args: ["$SIMULATOR_ROOT/gem5/configs/deprecated/example/se.py", "--interp-dir", "/usr/aarch64-linux-gnu", "--redirects", "/lib=/usr/aarch64-linux-gnu/lib", "--cmd", "$BENCHMARK_ROOT/bin/test_c", "-o", "1 1"] 24 | log: "gem5.1.1.log" 25 | is_to_stdout: false 26 | clock_rate: 1 27 | 28 | # Phase 2 configuration. 29 | phase2: 30 | # Process 0 31 | - cmd: "$SIMULATOR_ROOT/popnet_chiplet/build/popnet" 32 | args: ["-A", "2", "-c", "2", "-V", "3", "-B", "12", "-O", "12", "-F", "4", "-L", "1000", "-T", "10000000", "-r", "1", "-I", "../bench.txt", "-R", "0", "-D", "../delayInfo.txt", "-P"] 33 | log: "popnet_0.log" 34 | is_to_stdout: false 35 | clock_rate: 1 36 | 37 | # File configuration. (Not used yet) 38 | bench_file: "./bench.txt" 39 | delayinfo_file: "./delayInfo.txt" 40 | -------------------------------------------------------------------------------- /benchmark/test/gem5-arm/pipe/makefile: -------------------------------------------------------------------------------- 1 | # Project environment 2 | # SIMULATOR_ROOT, defined by setup_env.sh 3 | BENCHMARK_ROOT=$(SIMULATOR_ROOT)/benchmark/test/gem5/pipe 4 | 5 | # Compiler environment of C/C++ 6 | CC=aarch64-linux-gnu-g++ 7 | CFLAGS=-Wall -Werror -g -I$(SIMULATOR_ROOT)/interchiplet/includes 8 | 9 | # C/C++ Source file 10 | C_SRCS=test.cpp 11 | C_OBJS=obj/test.o 12 | C_TARGET=bin/test_c 13 | 14 | all: bin_dir obj_dir C_target 15 | 16 | # C language target 17 | C_target: $(C_OBJS) obj/interchiplet.o 18 | $(CC) $(C_OBJS) obj/interchiplet.o -o $(C_TARGET) 19 | 20 | # Interchiplet library 21 | obj/interchiplet.o: ../../../../interchiplet/srcs/apis_c.cpp 22 | $(CC) $(CFLAGS) -c $< -o $@ 23 | 24 | # Rule for C object 25 | obj/%.o: ../../snipersim/pipe/%.cpp 26 | $(CC) $(CFLAGS) -c $< -o $@ 27 | 28 | # Directory for binary files. 29 | bin_dir: 30 | mkdir -p bin 31 | 32 | # Directory for object files for C. 33 | obj_dir: 34 | mkdir -p obj 35 | 36 | # Clean generated files. 37 | clean: 38 | rm -rf bench.txt delayInfo.txt buffer* message_record.txt 39 | rm -rf proc_r*_t* *.log 40 | rm -rf obj bin 41 | -------------------------------------------------------------------------------- /benchmark/test/gem5-arm/pipe/test.yml: -------------------------------------------------------------------------------- 1 | # Phase 1 configuration. 2 | phase1: 3 | # Process 0 4 | - cmd: "$SIMULATOR_ROOT/gem5/build/ARM/gem5.opt" 5 | args: ["$SIMULATOR_ROOT/gem5/configs/deprecated/example/se.py", "--interp-dir", "/usr/aarch64-linux-gnu", "--redirects", "/lib=/usr/aarch64-linux-gnu/lib", "--cmd", "$BENCHMARK_ROOT/bin/test_c", "-o", "0 0"] 6 | log: "gem5.0.0.log" 7 | is_to_stdout: false 8 | clock_rate: 1 9 | # Process 1 10 | - cmd: "$SIMULATOR_ROOT/gem5/build/ARM/gem5.opt" 11 | args: ["$SIMULATOR_ROOT/gem5/configs/deprecated/example/se.py", "--interp-dir", "/usr/aarch64-linux-gnu", "--redirects", "/lib=/usr/aarch64-linux-gnu/lib", "--cmd", "$BENCHMARK_ROOT/bin/test_c", "-o", "0 1"] 12 | log: "gem5.0.1.log" 13 | is_to_stdout: false 14 | clock_rate: 1 15 | # Process 2 16 | - cmd: "$SIMULATOR_ROOT/gem5/build/ARM/gem5.opt" 17 | args: ["$SIMULATOR_ROOT/gem5/configs/deprecated/example/se.py", "--interp-dir", "/usr/aarch64-linux-gnu", "--redirects", "/lib=/usr/aarch64-linux-gnu/lib", "--cmd", "$BENCHMARK_ROOT/bin/test_c", "-o", "1 0"] 18 | log: "gem5.1.0.log" 19 | is_to_stdout: false 20 | clock_rate: 1 21 | # Process 3 22 | - cmd: "$SIMULATOR_ROOT/gem5/build/ARM/gem5.opt" 23 | args: ["$SIMULATOR_ROOT/gem5/configs/deprecated/example/se.py", "--interp-dir", "/usr/aarch64-linux-gnu", "--redirects", "/lib=/usr/aarch64-linux-gnu/lib", "--cmd", "$BENCHMARK_ROOT/bin/test_c", "-o", "1 1"] 24 | log: "gem5.1.1.log" 25 | is_to_stdout: false 26 | clock_rate: 1 27 | 28 | # Phase 2 configuration. 29 | phase2: 30 | # Process 0 31 | - cmd: "$SIMULATOR_ROOT/popnet_chiplet/build/popnet" 32 | args: ["-A", "2", "-c", "2", "-V", "3", "-B", "12", "-O", "12", "-F", "4", "-L", "1000", "-T", "10000000", "-r", "1", "-I", "../bench.txt", "-R", "0", "-D", "../delayInfo.txt", "-P"] 33 | log: "popnet_0.log" 34 | is_to_stdout: false 35 | clock_rate: 1 36 | 37 | # File configuration. (Not used yet) 38 | bench_file: "./bench.txt" 39 | delayinfo_file: "./delayInfo.txt" 40 | -------------------------------------------------------------------------------- /benchmark/test/gem5/barrier/makefile: -------------------------------------------------------------------------------- 1 | # Project environment 2 | # SIMULATOR_ROOT, defined by setup_env.sh 3 | BENCHMARK_ROOT=$(SIMULATOR_ROOT)/benchmark/test/gem5/barrier 4 | 5 | # Compiler environment of C/C++ 6 | CC=g++ 7 | CFLAGS=-Wall -Werror -g -I$(SIMULATOR_ROOT)/interchiplet/includes 8 | INTERCHIPLET_C_LIB=$(SIMULATOR_ROOT)/interchiplet/lib/libinterchiplet_c.a 9 | 10 | # C/C++ Source file 11 | C_SRCS=test.cpp 12 | C_OBJS=obj/test.o 13 | C_TARGET=bin/test_c 14 | 15 | all: bin_dir obj_dir C_target 16 | 17 | # C language target 18 | C_target: $(C_OBJS) 19 | $(CC) $(C_OBJS) $(INTERCHIPLET_C_LIB) -o $(C_TARGET) 20 | 21 | # Rule for C object 22 | obj/%.o: ../../snipersim/barrier/%.cpp 23 | $(CC) $(CFLAGS) -c $< -o $@ 24 | 25 | # Directory for binary files. 26 | bin_dir: 27 | mkdir -p bin 28 | 29 | # Directory for object files for C. 30 | obj_dir: 31 | mkdir -p obj 32 | 33 | # Clean generated files. 34 | clean: 35 | rm -rf bench.txt delayInfo.txt buffer* message_record.txt 36 | rm -rf proc_r*_t* *.log 37 | rm -rf obj bin 38 | -------------------------------------------------------------------------------- /benchmark/test/gem5/barrier/test_atomic.yml: -------------------------------------------------------------------------------- 1 | # Phase 1 configuration. 2 | phase1: 3 | # Process 0 4 | - cmd: "$SIMULATOR_ROOT/gem5/build/X86/gem5.opt" 5 | args: ["$SIMULATOR_ROOT/gem5/configs/deprecated/example/se.py", "--cmd", "$BENCHMARK_ROOT/bin/test_c", "-o", "0 0"] 6 | log: "gem5.0.0.log" 7 | is_to_stdout: false 8 | clock_rate: 500 9 | # Process 1 10 | - cmd: "$SIMULATOR_ROOT/gem5/build/X86/gem5.opt" 11 | args: ["$SIMULATOR_ROOT/gem5/configs/deprecated/example/se.py", "--cmd", "$BENCHMARK_ROOT/bin/test_c", "-o", "0 1"] 12 | log: "gem5.0.1.log" 13 | is_to_stdout: false 14 | clock_rate: 500 15 | # Process 2 16 | - cmd: "$SIMULATOR_ROOT/gem5/build/X86/gem5.opt" 17 | args: ["$SIMULATOR_ROOT/gem5/configs/deprecated/example/se.py", "--cmd", "$BENCHMARK_ROOT/bin/test_c", "-o", "1 0"] 18 | log: "gem5.1.0.log" 19 | is_to_stdout: false 20 | clock_rate: 500 21 | # Process 3 22 | - cmd: "$SIMULATOR_ROOT/gem5/build/X86/gem5.opt" 23 | args: ["$SIMULATOR_ROOT/gem5/configs/deprecated/example/se.py", "--cmd", "$BENCHMARK_ROOT/bin/test_c", "-o", "1 1"] 24 | log: "gem5.1.1.log" 25 | is_to_stdout: false 26 | clock_rate: 500 27 | 28 | # Phase 2 configuration. 29 | phase2: 30 | # Process 0 31 | - cmd: "$SIMULATOR_ROOT/popnet_chiplet/build/popnet" 32 | args: ["-A", "2", "-c", "2", "-V", "3", "-B", "12", "-O", "12", "-F", "4", "-L", "1000", "-T", "10000000", "-r", "1", "-I", "../bench.txt", "-R", "0", "-D", "../delayInfo.txt", "-P"] 33 | log: "popnet_0.log" 34 | is_to_stdout: false 35 | clock_rate: 1 36 | 37 | # File configuration. (Not used yet) 38 | bench_file: "./bench.txt" 39 | delayinfo_file: "./delayInfo.txt" 40 | -------------------------------------------------------------------------------- /benchmark/test/gem5/barrier/test_o3.yml: -------------------------------------------------------------------------------- 1 | # Phase 1 configuration. 2 | phase1: 3 | # Process 0 4 | - cmd: "$SIMULATOR_ROOT/gem5/build/X86/gem5.opt" 5 | args: ["$SIMULATOR_ROOT/gem5/configs/deprecated/example/se.py", "--cpu-type", "O3CPU", "--caches", "--cmd", "$BENCHMARK_ROOT/bin/test_c", "-o", "0 0"] 6 | log: "gem5.0.0.log" 7 | is_to_stdout: false 8 | clock_rate: 500 9 | # Process 1 10 | - cmd: "$SIMULATOR_ROOT/gem5/build/X86/gem5.opt" 11 | args: ["$SIMULATOR_ROOT/gem5/configs/deprecated/example/se.py", "--cpu-type", "O3CPU", "--caches", "--cmd", "$BENCHMARK_ROOT/bin/test_c", "-o", "0 1"] 12 | log: "gem5.0.1.log" 13 | is_to_stdout: false 14 | clock_rate: 500 15 | # Process 2 16 | - cmd: "$SIMULATOR_ROOT/gem5/build/X86/gem5.opt" 17 | args: ["$SIMULATOR_ROOT/gem5/configs/deprecated/example/se.py", "--cpu-type", "O3CPU", "--caches", "--cmd", "$BENCHMARK_ROOT/bin/test_c", "-o", "1 0"] 18 | log: "gem5.1.0.log" 19 | is_to_stdout: false 20 | clock_rate: 500 21 | # Process 3 22 | - cmd: "$SIMULATOR_ROOT/gem5/build/X86/gem5.opt" 23 | args: ["$SIMULATOR_ROOT/gem5/configs/deprecated/example/se.py", "--cpu-type", "O3CPU", "--caches", "--cmd", "$BENCHMARK_ROOT/bin/test_c", "-o", "1 1"] 24 | log: "gem5.1.1.log" 25 | is_to_stdout: false 26 | clock_rate: 500 27 | 28 | # Phase 2 configuration. 29 | phase2: 30 | # Process 0 31 | - cmd: "$SIMULATOR_ROOT/popnet_chiplet/build/popnet" 32 | args: ["-A", "2", "-c", "2", "-V", "3", "-B", "12", "-O", "12", "-F", "4", "-L", "1000", "-T", "10000000", "-r", "1", "-I", "../bench.txt", "-R", "0", "-D", "../delayInfo.txt", "-P"] 33 | log: "popnet_0.log" 34 | is_to_stdout: false 35 | clock_rate: 1 36 | 37 | # File configuration. (Not used yet) 38 | bench_file: "./bench.txt" 39 | delayinfo_file: "./delayInfo.txt" 40 | -------------------------------------------------------------------------------- /benchmark/test/gem5/barrier/test_timing.yml: -------------------------------------------------------------------------------- 1 | # Phase 1 configuration. 2 | phase1: 3 | # Process 0 4 | - cmd: "$SIMULATOR_ROOT/gem5/build/X86/gem5.opt" 5 | args: ["$SIMULATOR_ROOT/gem5/configs/deprecated/example/se.py", "--cpu-type", "TimingSimpleCPU", "--caches", "--cmd", "$BENCHMARK_ROOT/bin/test_c", "-o", "0 0"] 6 | log: "gem5.0.0.log" 7 | is_to_stdout: false 8 | clock_rate: 500 9 | # Process 1 10 | - cmd: "$SIMULATOR_ROOT/gem5/build/X86/gem5.opt" 11 | args: ["$SIMULATOR_ROOT/gem5/configs/deprecated/example/se.py", "--cpu-type", "TimingSimpleCPU", "--caches", "--cmd", "$BENCHMARK_ROOT/bin/test_c", "-o", "0 1"] 12 | log: "gem5.0.1.log" 13 | is_to_stdout: false 14 | clock_rate: 500 15 | # Process 2 16 | - cmd: "$SIMULATOR_ROOT/gem5/build/X86/gem5.opt" 17 | args: ["$SIMULATOR_ROOT/gem5/configs/deprecated/example/se.py", "--cpu-type", "TimingSimpleCPU", "--caches", "--cmd", "$BENCHMARK_ROOT/bin/test_c", "-o", "1 0"] 18 | log: "gem5.1.0.log" 19 | is_to_stdout: false 20 | clock_rate: 500 21 | # Process 3 22 | - cmd: "$SIMULATOR_ROOT/gem5/build/X86/gem5.opt" 23 | args: ["$SIMULATOR_ROOT/gem5/configs/deprecated/example/se.py", "--cpu-type", "TimingSimpleCPU", "--caches", "--cmd", "$BENCHMARK_ROOT/bin/test_c", "-o", "1 1"] 24 | log: "gem5.1.1.log" 25 | is_to_stdout: false 26 | clock_rate: 500 27 | 28 | # Phase 2 configuration. 29 | phase2: 30 | # Process 0 31 | - cmd: "$SIMULATOR_ROOT/popnet_chiplet/build/popnet" 32 | args: ["-A", "2", "-c", "2", "-V", "3", "-B", "12", "-O", "12", "-F", "4", "-L", "1000", "-T", "10000000", "-r", "1", "-I", "../bench.txt", "-R", "0", "-D", "../delayInfo.txt", "-P"] 33 | log: "popnet_0.log" 34 | is_to_stdout: false 35 | clock_rate: 1 36 | 37 | # File configuration. (Not used yet) 38 | bench_file: "./bench.txt" 39 | delayinfo_file: "./delayInfo.txt" 40 | -------------------------------------------------------------------------------- /benchmark/test/gem5/launch/makefile: -------------------------------------------------------------------------------- 1 | # Project environment 2 | # SIMULATOR_ROOT, defined by setup_env.sh 3 | BENCHMARK_ROOT=$(SIMULATOR_ROOT)/benchmark/test/gem5/launch 4 | 5 | # Compiler environment of C/C++ 6 | CC=g++ 7 | CFLAGS=-Wall -Werror -g -I$(SIMULATOR_ROOT)/interchiplet/includes 8 | INTERCHIPLET_C_LIB=$(SIMULATOR_ROOT)/interchiplet/lib/libinterchiplet_c.a 9 | 10 | # C/C++ Source file 11 | C_SRCS=test.cpp 12 | C_OBJS=obj/test.o 13 | C_TARGET=bin/test_c 14 | 15 | all: bin_dir obj_dir C_target 16 | 17 | # C language target 18 | C_target: $(C_OBJS) 19 | $(CC) $(C_OBJS) $(INTERCHIPLET_C_LIB) -o $(C_TARGET) 20 | 21 | # Rule for C object 22 | obj/%.o: ../../snipersim/launch/%.cpp 23 | $(CC) $(CFLAGS) -c $< -o $@ 24 | 25 | # Directory for binary files. 26 | bin_dir: 27 | mkdir -p bin 28 | 29 | # Directory for object files for C. 30 | obj_dir: 31 | mkdir -p obj 32 | 33 | # Clean generated files. 34 | clean: 35 | rm -rf bench.txt delayInfo.txt buffer* message_record.txt 36 | rm -rf proc_r*_t* *.log 37 | rm -rf obj bin 38 | -------------------------------------------------------------------------------- /benchmark/test/gem5/launch/test_atomic.yml: -------------------------------------------------------------------------------- 1 | # Phase 1 configuration. 2 | phase1: 3 | # Process 0 4 | - cmd: "$SIMULATOR_ROOT/gem5/build/X86/gem5.opt" 5 | args: ["$SIMULATOR_ROOT/gem5/configs/deprecated/example/se.py", "--cmd", "$BENCHMARK_ROOT/bin/test_c", "-o", "0 0"] 6 | log: "gem5.0.0.log" 7 | is_to_stdout: false 8 | clock_rate: 500 9 | # Process 1 10 | - cmd: "$SIMULATOR_ROOT/gem5/build/X86/gem5.opt" 11 | args: ["$SIMULATOR_ROOT/gem5/configs/deprecated/example/se.py", "--cmd", "$BENCHMARK_ROOT/bin/test_c", "-o", "0 1"] 12 | log: "gem5.0.1.log" 13 | is_to_stdout: false 14 | clock_rate: 500 15 | # Process 2 16 | - cmd: "$SIMULATOR_ROOT/gem5/build/X86/gem5.opt" 17 | args: ["$SIMULATOR_ROOT/gem5/configs/deprecated/example/se.py", "--cmd", "$BENCHMARK_ROOT/bin/test_c", "-o", "1 0"] 18 | log: "gem5.1.0.log" 19 | is_to_stdout: false 20 | clock_rate: 500 21 | # Process 3 22 | - cmd: "$SIMULATOR_ROOT/gem5/build/X86/gem5.opt" 23 | args: ["$SIMULATOR_ROOT/gem5/configs/deprecated/example/se.py", "--cmd", "$BENCHMARK_ROOT/bin/test_c", "-o", "1 1"] 24 | log: "gem5.1.1.log" 25 | is_to_stdout: false 26 | clock_rate: 500 27 | 28 | # Phase 2 configuration. 29 | phase2: 30 | # Process 0 31 | - cmd: "$SIMULATOR_ROOT/popnet_chiplet/build/popnet" 32 | args: ["-A", "2", "-c", "2", "-V", "3", "-B", "12", "-O", "12", "-F", "4", "-L", "1000", "-T", "10000000", "-r", "1", "-I", "../bench.txt", "-R", "0", "-D", "../delayInfo.txt", "-P"] 33 | log: "popnet_0.log" 34 | is_to_stdout: false 35 | clock_rate: 1 36 | 37 | # File configuration. (Not used yet) 38 | bench_file: "./bench.txt" 39 | delayinfo_file: "./delayInfo.txt" 40 | -------------------------------------------------------------------------------- /benchmark/test/gem5/launch/test_o3.yml: -------------------------------------------------------------------------------- 1 | # Phase 1 configuration. 2 | phase1: 3 | # Process 0 4 | - cmd: "$SIMULATOR_ROOT/gem5/build/X86/gem5.opt" 5 | args: ["$SIMULATOR_ROOT/gem5/configs/deprecated/example/se.py", "--cpu-type", "O3CPU", "--caches", "--cmd", "$BENCHMARK_ROOT/bin/test_c", "-o", "0 0"] 6 | log: "gem5.0.0.log" 7 | is_to_stdout: false 8 | clock_rate: 500 9 | # Process 1 10 | - cmd: "$SIMULATOR_ROOT/gem5/build/X86/gem5.opt" 11 | args: ["$SIMULATOR_ROOT/gem5/configs/deprecated/example/se.py", "--cpu-type", "O3CPU", "--caches", "--cmd", "$BENCHMARK_ROOT/bin/test_c", "-o", "0 1"] 12 | log: "gem5.0.1.log" 13 | is_to_stdout: false 14 | clock_rate: 500 15 | # Process 2 16 | - cmd: "$SIMULATOR_ROOT/gem5/build/X86/gem5.opt" 17 | args: ["$SIMULATOR_ROOT/gem5/configs/deprecated/example/se.py", "--cpu-type", "O3CPU", "--caches", "--cmd", "$BENCHMARK_ROOT/bin/test_c", "-o", "1 0"] 18 | log: "gem5.1.0.log" 19 | is_to_stdout: false 20 | clock_rate: 500 21 | # Process 3 22 | - cmd: "$SIMULATOR_ROOT/gem5/build/X86/gem5.opt" 23 | args: ["$SIMULATOR_ROOT/gem5/configs/deprecated/example/se.py", "--cpu-type", "O3CPU", "--caches", "--cmd", "$BENCHMARK_ROOT/bin/test_c", "-o", "1 1"] 24 | log: "gem5.1.1.log" 25 | is_to_stdout: false 26 | clock_rate: 500 27 | 28 | # Phase 2 configuration. 29 | phase2: 30 | # Process 0 31 | - cmd: "$SIMULATOR_ROOT/popnet_chiplet/build/popnet" 32 | args: ["-A", "2", "-c", "2", "-V", "3", "-B", "12", "-O", "12", "-F", "4", "-L", "1000", "-T", "10000000", "-r", "1", "-I", "../bench.txt", "-R", "0", "-D", "../delayInfo.txt", "-P"] 33 | log: "popnet_0.log" 34 | is_to_stdout: false 35 | clock_rate: 1 36 | 37 | # File configuration. (Not used yet) 38 | bench_file: "./bench.txt" 39 | delayinfo_file: "./delayInfo.txt" 40 | -------------------------------------------------------------------------------- /benchmark/test/gem5/launch/test_timing.yml: -------------------------------------------------------------------------------- 1 | # Phase 1 configuration. 2 | phase1: 3 | # Process 0 4 | - cmd: "$SIMULATOR_ROOT/gem5/build/X86/gem5.opt" 5 | args: ["$SIMULATOR_ROOT/gem5/configs/deprecated/example/se.py", "--cpu-type", "TimingSimpleCPU", "--caches", "--cmd", "$BENCHMARK_ROOT/bin/test_c", "-o", "0 0"] 6 | log: "gem5.0.0.log" 7 | is_to_stdout: false 8 | clock_rate: 500 9 | # Process 1 10 | - cmd: "$SIMULATOR_ROOT/gem5/build/X86/gem5.opt" 11 | args: ["$SIMULATOR_ROOT/gem5/configs/deprecated/example/se.py", "--cpu-type", "TimingSimpleCPU", "--caches", "--cmd", "$BENCHMARK_ROOT/bin/test_c", "-o", "0 1"] 12 | log: "gem5.0.1.log" 13 | is_to_stdout: false 14 | clock_rate: 500 15 | # Process 2 16 | - cmd: "$SIMULATOR_ROOT/gem5/build/X86/gem5.opt" 17 | args: ["$SIMULATOR_ROOT/gem5/configs/deprecated/example/se.py", "--cpu-type", "TimingSimpleCPU", "--caches", "--cmd", "$BENCHMARK_ROOT/bin/test_c", "-o", "1 0"] 18 | log: "gem5.1.0.log" 19 | is_to_stdout: false 20 | clock_rate: 500 21 | # Process 3 22 | - cmd: "$SIMULATOR_ROOT/gem5/build/X86/gem5.opt" 23 | args: ["$SIMULATOR_ROOT/gem5/configs/deprecated/example/se.py", "--cpu-type", "TimingSimpleCPU", "--caches", "--cmd", "$BENCHMARK_ROOT/bin/test_c", "-o", "1 1"] 24 | log: "gem5.1.1.log" 25 | is_to_stdout: false 26 | clock_rate: 500 27 | 28 | # Phase 2 configuration. 29 | phase2: 30 | # Process 0 31 | - cmd: "$SIMULATOR_ROOT/popnet_chiplet/build/popnet" 32 | args: ["-A", "2", "-c", "2", "-V", "3", "-B", "12", "-O", "12", "-F", "4", "-L", "1000", "-T", "10000000", "-r", "1", "-I", "../bench.txt", "-R", "0", "-D", "../delayInfo.txt", "-P"] 33 | log: "popnet_0.log" 34 | is_to_stdout: false 35 | clock_rate: 1 36 | 37 | # File configuration. (Not used yet) 38 | bench_file: "./bench.txt" 39 | delayinfo_file: "./delayInfo.txt" 40 | -------------------------------------------------------------------------------- /benchmark/test/gem5/lock/makefile: -------------------------------------------------------------------------------- 1 | # Project environment 2 | # SIMULATOR_ROOT, defined by setup_env.sh 3 | BENCHMARK_ROOT=$(SIMULATOR_ROOT)/benchmark/test/gem5/lock 4 | 5 | # Compiler environment of C/C++ 6 | CC=g++ 7 | CFLAGS=-Wall -Werror -g -I$(SIMULATOR_ROOT)/interchiplet/includes 8 | INTERCHIPLET_C_LIB=$(SIMULATOR_ROOT)/interchiplet/lib/libinterchiplet_c.a 9 | 10 | # C/C++ Source file 11 | C_SRCS=test.cpp 12 | C_OBJS=obj/test.o 13 | C_TARGET=bin/test_c 14 | 15 | all: bin_dir obj_dir C_target 16 | 17 | # C language target 18 | C_target: $(C_OBJS) 19 | $(CC) $(C_OBJS) $(INTERCHIPLET_C_LIB) -o $(C_TARGET) 20 | 21 | # Rule for C object 22 | obj/%.o: ../../snipersim/lock/%.cpp 23 | $(CC) $(CFLAGS) -c $< -o $@ 24 | 25 | # Directory for binary files. 26 | bin_dir: 27 | mkdir -p bin 28 | 29 | # Directory for object files for C. 30 | obj_dir: 31 | mkdir -p obj 32 | 33 | # Clean generated files. 34 | clean: 35 | rm -rf bench.txt delayInfo.txt buffer* message_record.txt 36 | rm -rf proc_r*_t* *.log 37 | rm -rf obj bin 38 | -------------------------------------------------------------------------------- /benchmark/test/gem5/lock/test_atomic.yml: -------------------------------------------------------------------------------- 1 | # Phase 1 configuration. 2 | phase1: 3 | # Process 0 4 | - cmd: "$SIMULATOR_ROOT/gem5/build/X86/gem5.opt" 5 | args: ["$SIMULATOR_ROOT/gem5/configs/deprecated/example/se.py", "--cmd", "$BENCHMARK_ROOT/bin/test_c", "-o", "0 0"] 6 | log: "gem5.0.0.log" 7 | is_to_stdout: false 8 | clock_rate: 500 9 | # Process 1 10 | - cmd: "$SIMULATOR_ROOT/gem5/build/X86/gem5.opt" 11 | args: ["$SIMULATOR_ROOT/gem5/configs/deprecated/example/se.py", "--cmd", "$BENCHMARK_ROOT/bin/test_c", "-o", "0 1"] 12 | log: "gem5.0.1.log" 13 | is_to_stdout: false 14 | clock_rate: 500 15 | # Process 2 16 | - cmd: "$SIMULATOR_ROOT/gem5/build/X86/gem5.opt" 17 | args: ["$SIMULATOR_ROOT/gem5/configs/deprecated/example/se.py", "--cmd", "$BENCHMARK_ROOT/bin/test_c", "-o", "1 0"] 18 | log: "gem5.1.0.log" 19 | is_to_stdout: false 20 | clock_rate: 500 21 | # Process 3 22 | - cmd: "$SIMULATOR_ROOT/gem5/build/X86/gem5.opt" 23 | args: ["$SIMULATOR_ROOT/gem5/configs/deprecated/example/se.py", "--cmd", "$BENCHMARK_ROOT/bin/test_c", "-o", "1 1"] 24 | log: "gem5.1.1.log" 25 | is_to_stdout: false 26 | clock_rate: 500 27 | 28 | # Phase 2 configuration. 29 | phase2: 30 | # Process 0 31 | - cmd: "$SIMULATOR_ROOT/popnet_chiplet/build/popnet" 32 | args: ["-A", "2", "-c", "2", "-V", "3", "-B", "12", "-O", "12", "-F", "4", "-L", "1000", "-T", "10000000", "-r", "1", "-I", "../bench.txt", "-R", "0", "-D", "../delayInfo.txt", "-P"] 33 | log: "popnet_0.log" 34 | is_to_stdout: false 35 | clock_rate: 1 36 | 37 | # File configuration. (Not used yet) 38 | bench_file: "./bench.txt" 39 | delayinfo_file: "./delayInfo.txt" 40 | -------------------------------------------------------------------------------- /benchmark/test/gem5/lock/test_o3.yml: -------------------------------------------------------------------------------- 1 | # Phase 1 configuration. 2 | phase1: 3 | # Process 0 4 | - cmd: "$SIMULATOR_ROOT/gem5/build/X86/gem5.opt" 5 | args: ["$SIMULATOR_ROOT/gem5/configs/deprecated/example/se.py", "--cpu-type", "O3CPU", "--caches", "--cmd", "$BENCHMARK_ROOT/bin/test_c", "-o", "0 0"] 6 | log: "gem5.0.0.log" 7 | is_to_stdout: false 8 | clock_rate: 500 9 | # Process 1 10 | - cmd: "$SIMULATOR_ROOT/gem5/build/X86/gem5.opt" 11 | args: ["$SIMULATOR_ROOT/gem5/configs/deprecated/example/se.py", "--cpu-type", "O3CPU", "--caches", "--cmd", "$BENCHMARK_ROOT/bin/test_c", "-o", "0 1"] 12 | log: "gem5.0.1.log" 13 | is_to_stdout: false 14 | clock_rate: 500 15 | # Process 2 16 | - cmd: "$SIMULATOR_ROOT/gem5/build/X86/gem5.opt" 17 | args: ["$SIMULATOR_ROOT/gem5/configs/deprecated/example/se.py", "--cpu-type", "O3CPU", "--caches", "--cmd", "$BENCHMARK_ROOT/bin/test_c", "-o", "1 0"] 18 | log: "gem5.1.0.log" 19 | is_to_stdout: false 20 | clock_rate: 500 21 | # Process 3 22 | - cmd: "$SIMULATOR_ROOT/gem5/build/X86/gem5.opt" 23 | args: ["$SIMULATOR_ROOT/gem5/configs/deprecated/example/se.py", "--cpu-type", "O3CPU", "--caches", "--cmd", "$BENCHMARK_ROOT/bin/test_c", "-o", "1 1"] 24 | log: "gem5.1.1.log" 25 | is_to_stdout: false 26 | clock_rate: 500 27 | 28 | # Phase 2 configuration. 29 | phase2: 30 | # Process 0 31 | - cmd: "$SIMULATOR_ROOT/popnet_chiplet/build/popnet" 32 | args: ["-A", "2", "-c", "2", "-V", "3", "-B", "12", "-O", "12", "-F", "4", "-L", "1000", "-T", "10000000", "-r", "1", "-I", "../bench.txt", "-R", "0", "-D", "../delayInfo.txt", "-P"] 33 | log: "popnet_0.log" 34 | is_to_stdout: false 35 | clock_rate: 1 36 | 37 | # File configuration. (Not used yet) 38 | bench_file: "./bench.txt" 39 | delayinfo_file: "./delayInfo.txt" 40 | -------------------------------------------------------------------------------- /benchmark/test/gem5/lock/test_timing.yml: -------------------------------------------------------------------------------- 1 | # Phase 1 configuration. 2 | phase1: 3 | # Process 0 4 | - cmd: "$SIMULATOR_ROOT/gem5/build/X86/gem5.opt" 5 | args: ["$SIMULATOR_ROOT/gem5/configs/deprecated/example/se.py", "--cpu-type", "TimingSimpleCPU", "--caches", "--cmd", "$BENCHMARK_ROOT/bin/test_c", "-o", "0 0"] 6 | log: "gem5.0.0.log" 7 | is_to_stdout: false 8 | clock_rate: 500 9 | # Process 1 10 | - cmd: "$SIMULATOR_ROOT/gem5/build/X86/gem5.opt" 11 | args: ["$SIMULATOR_ROOT/gem5/configs/deprecated/example/se.py", "--cpu-type", "TimingSimpleCPU", "--caches", "--cmd", "$BENCHMARK_ROOT/bin/test_c", "-o", "0 1"] 12 | log: "gem5.0.1.log" 13 | is_to_stdout: false 14 | clock_rate: 500 15 | # Process 2 16 | - cmd: "$SIMULATOR_ROOT/gem5/build/X86/gem5.opt" 17 | args: ["$SIMULATOR_ROOT/gem5/configs/deprecated/example/se.py", "--cpu-type", "TimingSimpleCPU", "--caches", "--cmd", "$BENCHMARK_ROOT/bin/test_c", "-o", "1 0"] 18 | log: "gem5.1.0.log" 19 | is_to_stdout: false 20 | clock_rate: 500 21 | # Process 3 22 | - cmd: "$SIMULATOR_ROOT/gem5/build/X86/gem5.opt" 23 | args: ["$SIMULATOR_ROOT/gem5/configs/deprecated/example/se.py", "--cpu-type", "TimingSimpleCPU", "--caches", "--cmd", "$BENCHMARK_ROOT/bin/test_c", "-o", "1 1"] 24 | log: "gem5.1.1.log" 25 | is_to_stdout: false 26 | clock_rate: 500 27 | 28 | # Phase 2 configuration. 29 | phase2: 30 | # Process 0 31 | - cmd: "$SIMULATOR_ROOT/popnet_chiplet/build/popnet" 32 | args: ["-A", "2", "-c", "2", "-V", "3", "-B", "12", "-O", "12", "-F", "4", "-L", "1000", "-T", "10000000", "-r", "1", "-I", "../bench.txt", "-R", "0", "-D", "../delayInfo.txt", "-P"] 33 | log: "popnet_0.log" 34 | is_to_stdout: false 35 | clock_rate: 1 36 | 37 | # File configuration. (Not used yet) 38 | bench_file: "./bench.txt" 39 | delayinfo_file: "./delayInfo.txt" 40 | -------------------------------------------------------------------------------- /benchmark/test/gem5/pipe/makefile: -------------------------------------------------------------------------------- 1 | # Project environment 2 | # SIMULATOR_ROOT, defined by setup_env.sh 3 | BENCHMARK_ROOT=$(SIMULATOR_ROOT)/benchmark/test/gem5/pipe 4 | 5 | # Compiler environment of C/C++ 6 | CC=g++ 7 | CFLAGS=-Wall -Werror -g -I$(SIMULATOR_ROOT)/interchiplet/includes 8 | INTERCHIPLET_C_LIB=$(SIMULATOR_ROOT)/interchiplet/lib/libinterchiplet_c.a 9 | 10 | # C/C++ Source file 11 | C_SRCS=test.cpp 12 | C_OBJS=obj/test.o 13 | C_TARGET=bin/test_c 14 | 15 | all: bin_dir obj_dir C_target 16 | 17 | # C language target 18 | C_target: $(C_OBJS) 19 | $(CC) $(C_OBJS) $(INTERCHIPLET_C_LIB) -o $(C_TARGET) 20 | 21 | # Rule for C object 22 | obj/%.o: ../../snipersim/pipe/%.cpp 23 | $(CC) $(CFLAGS) -c $< -o $@ 24 | 25 | # Directory for binary files. 26 | bin_dir: 27 | mkdir -p bin 28 | 29 | # Directory for object files for C. 30 | obj_dir: 31 | mkdir -p obj 32 | 33 | # Clean generated files. 34 | clean: 35 | rm -rf bench.txt delayInfo.txt buffer* message_record.txt 36 | rm -rf proc_r*_t* *.log 37 | rm -rf obj bin 38 | -------------------------------------------------------------------------------- /benchmark/test/gem5/pipe/test_atomic.yml: -------------------------------------------------------------------------------- 1 | # Phase 1 configuration. 2 | phase1: 3 | # Process 0 4 | - cmd: "$SIMULATOR_ROOT/gem5/build/X86/gem5.opt" 5 | args: ["$SIMULATOR_ROOT/gem5/configs/deprecated/example/se.py", "--cmd", "$BENCHMARK_ROOT/bin/test_c", "-o", "0 0"] 6 | log: "gem5.0.0.log" 7 | is_to_stdout: false 8 | clock_rate: 500 9 | # Process 1 10 | - cmd: "$SIMULATOR_ROOT/gem5/build/X86/gem5.opt" 11 | args: ["$SIMULATOR_ROOT/gem5/configs/deprecated/example/se.py", "--cmd", "$BENCHMARK_ROOT/bin/test_c", "-o", "0 1"] 12 | log: "gem5.0.1.log" 13 | is_to_stdout: false 14 | clock_rate: 500 15 | # Process 2 16 | - cmd: "$SIMULATOR_ROOT/gem5/build/X86/gem5.opt" 17 | args: ["$SIMULATOR_ROOT/gem5/configs/deprecated/example/se.py", "--cmd", "$BENCHMARK_ROOT/bin/test_c", "-o", "1 0"] 18 | log: "gem5.1.0.log" 19 | is_to_stdout: false 20 | clock_rate: 500 21 | # Process 3 22 | - cmd: "$SIMULATOR_ROOT/gem5/build/X86/gem5.opt" 23 | args: ["$SIMULATOR_ROOT/gem5/configs/deprecated/example/se.py", "--cmd", "$BENCHMARK_ROOT/bin/test_c", "-o", "1 1"] 24 | log: "gem5.1.1.log" 25 | is_to_stdout: false 26 | clock_rate: 500 27 | 28 | # Phase 2 configuration. 29 | phase2: 30 | # Process 0 31 | - cmd: "$SIMULATOR_ROOT/popnet_chiplet/build/popnet" 32 | args: ["-A", "2", "-c", "2", "-V", "3", "-B", "12", "-O", "12", "-F", "4", "-L", "1000", "-T", "10000000", "-r", "1", "-I", "../bench.txt", "-R", "0", "-D", "../delayInfo.txt", "-P"] 33 | log: "popnet_0.log" 34 | is_to_stdout: false 35 | clock_rate: 1 36 | 37 | # File configuration. (Not used yet) 38 | bench_file: "./bench.txt" 39 | delayinfo_file: "./delayInfo.txt" 40 | -------------------------------------------------------------------------------- /benchmark/test/gem5/pipe/test_o3.yml: -------------------------------------------------------------------------------- 1 | # Phase 1 configuration. 2 | phase1: 3 | # Process 0 4 | - cmd: "$SIMULATOR_ROOT/gem5/build/X86/gem5.opt" 5 | args: ["$SIMULATOR_ROOT/gem5/configs/deprecated/example/se.py", "--cpu-type", "O3CPU", "--caches", "--cmd", "$BENCHMARK_ROOT/bin/test_c", "-o", "0 0"] 6 | log: "gem5.0.0.log" 7 | is_to_stdout: false 8 | clock_rate: 500 9 | # Process 1 10 | - cmd: "$SIMULATOR_ROOT/gem5/build/X86/gem5.opt" 11 | args: ["$SIMULATOR_ROOT/gem5/configs/deprecated/example/se.py", "--cpu-type", "O3CPU", "--caches", "--cmd", "$BENCHMARK_ROOT/bin/test_c", "-o", "0 1"] 12 | log: "gem5.0.1.log" 13 | is_to_stdout: false 14 | clock_rate: 500 15 | # Process 2 16 | - cmd: "$SIMULATOR_ROOT/gem5/build/X86/gem5.opt" 17 | args: ["$SIMULATOR_ROOT/gem5/configs/deprecated/example/se.py", "--cpu-type", "O3CPU", "--caches", "--cmd", "$BENCHMARK_ROOT/bin/test_c", "-o", "1 0"] 18 | log: "gem5.1.0.log" 19 | is_to_stdout: false 20 | clock_rate: 500 21 | # Process 3 22 | - cmd: "$SIMULATOR_ROOT/gem5/build/X86/gem5.opt" 23 | args: ["$SIMULATOR_ROOT/gem5/configs/deprecated/example/se.py", "--cpu-type", "O3CPU", "--caches", "--cmd", "$BENCHMARK_ROOT/bin/test_c", "-o", "1 1"] 24 | log: "gem5.1.1.log" 25 | is_to_stdout: false 26 | clock_rate: 500 27 | 28 | # Phase 2 configuration. 29 | phase2: 30 | # Process 0 31 | - cmd: "$SIMULATOR_ROOT/popnet_chiplet/build/popnet" 32 | args: ["-A", "2", "-c", "2", "-V", "3", "-B", "12", "-O", "12", "-F", "4", "-L", "1000", "-T", "10000000", "-r", "1", "-I", "../bench.txt", "-R", "0", "-D", "../delayInfo.txt", "-P"] 33 | log: "popnet_0.log" 34 | is_to_stdout: false 35 | clock_rate: 1 36 | 37 | # File configuration. (Not used yet) 38 | bench_file: "./bench.txt" 39 | delayinfo_file: "./delayInfo.txt" 40 | -------------------------------------------------------------------------------- /benchmark/test/gem5/pipe/test_timing.yml: -------------------------------------------------------------------------------- 1 | # Phase 1 configuration. 2 | phase1: 3 | # Process 0 4 | - cmd: "$SIMULATOR_ROOT/gem5/build/X86/gem5.opt" 5 | args: ["$SIMULATOR_ROOT/gem5/configs/deprecated/example/se.py", "--cpu-type", "TimingSimpleCPU", "--caches", "--cmd", "$BENCHMARK_ROOT/bin/test_c", "-o", "0 0"] 6 | log: "gem5.0.0.log" 7 | is_to_stdout: false 8 | clock_rate: 500 9 | # Process 1 10 | - cmd: "$SIMULATOR_ROOT/gem5/build/X86/gem5.opt" 11 | args: ["$SIMULATOR_ROOT/gem5/configs/deprecated/example/se.py", "--cpu-type", "TimingSimpleCPU", "--caches", "--cmd", "$BENCHMARK_ROOT/bin/test_c", "-o", "0 1"] 12 | log: "gem5.0.1.log" 13 | is_to_stdout: false 14 | clock_rate: 500 15 | # Process 2 16 | - cmd: "$SIMULATOR_ROOT/gem5/build/X86/gem5.opt" 17 | args: ["$SIMULATOR_ROOT/gem5/configs/deprecated/example/se.py", "--cpu-type", "TimingSimpleCPU", "--caches", "--cmd", "$BENCHMARK_ROOT/bin/test_c", "-o", "1 0"] 18 | log: "gem5.1.0.log" 19 | is_to_stdout: false 20 | clock_rate: 500 21 | # Process 3 22 | - cmd: "$SIMULATOR_ROOT/gem5/build/X86/gem5.opt" 23 | args: ["$SIMULATOR_ROOT/gem5/configs/deprecated/example/se.py", "--cpu-type", "TimingSimpleCPU", "--caches", "--cmd", "$BENCHMARK_ROOT/bin/test_c", "-o", "1 1"] 24 | log: "gem5.1.1.log" 25 | is_to_stdout: false 26 | clock_rate: 500 27 | 28 | # Phase 2 configuration. 29 | phase2: 30 | # Process 0 31 | - cmd: "$SIMULATOR_ROOT/popnet_chiplet/build/popnet" 32 | args: ["-A", "2", "-c", "2", "-V", "3", "-B", "12", "-O", "12", "-F", "4", "-L", "1000", "-T", "10000000", "-r", "1", "-I", "../bench.txt", "-R", "0", "-D", "../delayInfo.txt", "-P"] 33 | log: "popnet_0.log" 34 | is_to_stdout: false 35 | clock_rate: 1 36 | 37 | # File configuration. (Not used yet) 38 | bench_file: "./bench.txt" 39 | delayinfo_file: "./delayInfo.txt" 40 | -------------------------------------------------------------------------------- /benchmark/test/gpgpu-sim/barrier/makefile: -------------------------------------------------------------------------------- 1 | # Project environment 2 | # SIMULATOR_ROOT, defined by setup_env.sh 3 | BENCHMARK_ROOT=$(SIMULATOR_ROOT)/benchmark/test/barrier 4 | 5 | # Compiler environment of C/C++ 6 | CC=g++ 7 | CFLAGS=-Wall -Werror -g -I$(SIMULATOR_ROOT)/interchiplet/includes 8 | INTERCHIPLET_C_LIB=$(SIMULATOR_ROOT)/interchiplet/lib/libinterchiplet_c.a 9 | 10 | # C/C++ Source file 11 | C_SRCS=msnip.cpp 12 | C_OBJS=obj/msnip.o 13 | C_TARGET=bin/msnip_c 14 | 15 | # Compiler environment of CUDA 16 | NVCC=nvcc 17 | CUFLAGS=--compiler-options -Wall -I$(SIMULATOR_ROOT)/interchiplet/includes 18 | 19 | # CUDA Source file 20 | CUDA_SRCS=msnip.cu 21 | CUDA_OBJS=cuobj/msnip.o 22 | CUDA_TARGET=bin/msnip_cu 23 | 24 | all: bin_dir obj_dir cuobj_dir C_target CUDA_target 25 | 26 | # C language target 27 | C_target: $(C_OBJS) 28 | $(CC) $(C_OBJS) $(INTERCHIPLET_C_LIB) -o $(C_TARGET) 29 | 30 | # CUDA language target 31 | CUDA_target: $(CUDA_OBJS) 32 | $(NVCC) -L$(SIMULATOR_ROOT)/gpgpu-sim/lib/$(GPGPUSIM_CONFIG) --cudart shared $(CUDA_OBJS) -o $(CUDA_TARGET) 33 | 34 | # Rule for C object 35 | obj/%.o: %.cpp 36 | $(CC) $(CFLAGS) -c $< -o $@ 37 | 38 | # Rule for Cuda object 39 | cuobj/%.o: %.cu 40 | $(NVCC) $(CUFLAGS) -c $< -o $@ 41 | 42 | # Directory for binary files. 43 | bin_dir: 44 | mkdir -p bin 45 | 46 | # Directory for object files for C. 47 | obj_dir: 48 | mkdir -p obj 49 | 50 | # Directory for object files for CUDA. 51 | cuobj_dir: 52 | mkdir -p cuobj 53 | 54 | # Clean generated files. 55 | clean: 56 | rm -rf bench.txt delayInfo.txt buffer* message_record.txt 57 | rm -rf proc_r*_t* *.log 58 | rm -rf obj cuobj bin 59 | -------------------------------------------------------------------------------- /benchmark/test/gpgpu-sim/barrier/matmul.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include "apis_c.h" 5 | #include "unistd.h" 6 | 7 | #define Row 100 8 | #define Col 100 9 | 10 | int idX, idY; 11 | 12 | int main(int argc, char **argv) { 13 | idX = atoi(argv[1]); 14 | idY = atoi(argv[2]); 15 | 16 | int64_t *A = (int64_t *)malloc(sizeof(int64_t) * Row * Col); 17 | int64_t *B = (int64_t *)malloc(sizeof(int64_t) * Row * Col); 18 | int64_t *C = (int64_t *)malloc(sizeof(int64_t) * Col); 19 | 20 | for (int i = 0; i < Row * Col; i++) { 21 | A[i] = rand() % 51; 22 | B[i] = rand() % 51; 23 | } 24 | 25 | for (int i = 0; i < 2; i ++) 26 | { 27 | // Create time gap between cycles. 28 | usleep((idX * 2 + idY) * 100); 29 | 30 | InterChiplet::launch(1, 1, idX, idY); 31 | 32 | InterChiplet::sendMessage(1, 1, idX, idY, A, 10000 * sizeof(int64_t)); 33 | InterChiplet::sendMessage(1, 1, idX, idY, B, 10000 * sizeof(int64_t)); 34 | 35 | InterChiplet::receiveMessage(idX, idY, 1, 1, C, 100 * sizeof(int64_t)); 36 | 37 | InterChiplet::barrier(0xFF, idX, idY, 3); 38 | } 39 | } 40 | -------------------------------------------------------------------------------- /benchmark/test/gpgpu-sim/barrier/matmul.cu: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | #include 7 | #include 8 | #include 9 | 10 | #include "apis_cu.h" 11 | #include "cuda_runtime.h" 12 | #include "device_launch_parameters.h" 13 | 14 | /** 15 | * 本示例程序为:通过4个GPU chiplet 16 | * 计算随机数矩阵A(400 * 100)与随机数矩阵B(100 * 400)相乘结果。 17 | * 由矩阵乘法原理可知,我们可将计算任务划分为4个100*100的矩阵相乘,并将结果相加。 18 | */ 19 | 20 | #define Row 100 21 | #define Col 100 22 | 23 | /** 24 | * 矩阵乘法的核心函数,由每个线程都会运行一次本函数, 25 | * 根据线程编号不同计算出位于结果矩阵不同位置的数据。 26 | */ 27 | 28 | __global__ void matrix_mul_gpu(int64_t* M, int64_t* N, int64_t* P, int width) { 29 | int sumNum = threadIdx.x + threadIdx.y * 10; 30 | int i = threadIdx.x; 31 | int j = threadIdx.y; 32 | int64_t sum = 0; 33 | for (int k = 0; k < width; k++) { 34 | int64_t a = M[j * width + k]; 35 | int64_t b = N[k * width + i]; 36 | sum += a * b; 37 | } 38 | P[sumNum] = sum; 39 | } 40 | 41 | int main(int argc, char** argv) { 42 | // 读取本进程所代表的chiplet编号 43 | int idX = atoi(argv[1]); 44 | int idY = atoi(argv[2]); 45 | 46 | for (int round = 0; round < 6; round++) { 47 | int locker_x = -1, locker_y = -1; 48 | waitLaunch(idX, idY, &locker_x, &locker_y); 49 | 50 | int64_t *d_dataA, *d_dataB, *d_dataC; 51 | cudaMalloc((void**)&d_dataA, sizeof(int64_t) * Row * Col); 52 | cudaMalloc((void**)&d_dataB, sizeof(int64_t) * Row * Col); 53 | cudaMalloc((void**)&d_dataC, sizeof(int64_t) * Col); 54 | 55 | receiveMessage(idX, idY, locker_x, locker_y, d_dataA, sizeof(int64_t) * Row * Col); 56 | receiveMessage(idX, idY, locker_x, locker_y, d_dataB, sizeof(int64_t) * Row * Col); 57 | 58 | // calculate 59 | dim3 threadPerBlock(10, 10); 60 | dim3 blockNumber(1); 61 | matrix_mul_gpu<<>>(d_dataA, d_dataB, d_dataC, Col); 62 | 63 | sendMessage(locker_x, locker_y, idX, idY, d_dataC, 100 * sizeof(int64_t)); 64 | cudaFree(d_dataA); 65 | cudaFree(d_dataB); 66 | cudaFree(d_dataC); 67 | } 68 | return 0; 69 | } 70 | -------------------------------------------------------------------------------- /benchmark/test/gpgpu-sim/barrier/matmul.yml: -------------------------------------------------------------------------------- 1 | # Phase 1 configuration. 2 | phase1: 3 | # Process 0 4 | - cmd: "$SIMULATOR_ROOT/snipersim/run-sniper" 5 | args: ["--", "$BENCHMARK_ROOT/bin/msnip_c", "0", "1"] 6 | log: "sniper.0.1.log" 7 | is_to_stdout: false 8 | clock_rate: 1 9 | # Process 1 10 | - cmd: "$SIMULATOR_ROOT/snipersim/run-sniper" 11 | args: ["--", "$BENCHMARK_ROOT/bin/msnip_c", "1", "0"] 12 | log: "sniper.1.0.log" 13 | is_to_stdout: false 14 | clock_rate: 1 15 | # Process 2 16 | - cmd: "$BENCHMARK_ROOT/bin/msnip_cu" 17 | args: ["1", "1"] 18 | log: "gpgpusim.1.1.log" 19 | is_to_stdout: false 20 | clock_rate: 1 21 | pre_copy: "$SIMULATOR_ROOT/gpgpu-sim/configs/tested-cfgs/SM2_GTX480/*" 22 | # Process 3 23 | - cmd: "$SIMULATOR_ROOT/snipersim/run-sniper" 24 | args: ["--", "$BENCHMARK_ROOT/bin/msnip_c", "0", "0"] 25 | log: "sniper.0.0.log" 26 | is_to_stdout: false 27 | clock_rate: 1 28 | 29 | # Phase 2 configuration. 30 | phase2: 31 | # Process 0 32 | - cmd: "$SIMULATOR_ROOT/popnet_chiplet/build/popnet" 33 | args: ["-A", "2", "-c", "2", "-V", "3", "-B", "12", "-O", "12", "-F", "4", "-L", "1000", "-T", "10000000", "-r", "1", "-I", "../bench.txt", "-R", "0", "-D", "../delayInfo.txt", "-P"] 34 | log: "popnet_0.log" 35 | is_to_stdout: false 36 | clock_rate: 1 37 | 38 | # File configuration. (Not used yet) 39 | bench_file: "./bench.txt" 40 | delayinfo_file: "./delayInfo.txt" 41 | -------------------------------------------------------------------------------- /benchmark/test/gpgpu-sim/launch/makefile: -------------------------------------------------------------------------------- 1 | # Project environment 2 | # SIMULATOR_ROOT, defined by setup_env.sh 3 | BENCHMARK_ROOT=$(SIMULATOR_ROOT)/benchmark/test/launch 4 | 5 | # Compiler environment of C/C++ 6 | CC=g++ 7 | CFLAGS=-Wall -Werror -g -I$(SIMULATOR_ROOT)/interchiplet/includes 8 | INTERCHIPLET_C_LIB=$(SIMULATOR_ROOT)/interchiplet/lib/libinterchiplet_c.a 9 | 10 | # C/C++ Source file 11 | C_SRCS=msnip.cpp 12 | C_OBJS=obj/msnip.o 13 | C_TARGET=bin/msnip_c 14 | 15 | # Compiler environment of CUDA 16 | NVCC=nvcc 17 | CUFLAGS=--compiler-options -Wall -I$(SIMULATOR_ROOT)/interchiplet/includes 18 | 19 | # CUDA Source file 20 | CUDA_SRCS=msnip.cu 21 | CUDA_OBJS=cuobj/msnip.o 22 | CUDA_TARGET=bin/msnip_cu 23 | 24 | all: bin_dir obj_dir cuobj_dir C_target CUDA_target 25 | 26 | # C language target 27 | C_target: $(C_OBJS) 28 | $(CC) $(C_OBJS) $(INTERCHIPLET_C_LIB) -o $(C_TARGET) 29 | 30 | # CUDA language target 31 | CUDA_target: $(CUDA_OBJS) 32 | $(NVCC) -L$(SIMULATOR_ROOT)/gpgpu-sim/lib/$(GPGPUSIM_CONFIG) --cudart shared $(CUDA_OBJS) -o $(CUDA_TARGET) 33 | 34 | # Rule for C object 35 | obj/%.o: %.cpp 36 | $(CC) $(CFLAGS) -c $< -o $@ 37 | 38 | # Rule for Cuda object 39 | cuobj/%.o: %.cu 40 | $(NVCC) $(CUFLAGS) -c $< -o $@ 41 | 42 | # Directory for binary files. 43 | bin_dir: 44 | mkdir -p bin 45 | 46 | # Directory for object files for C. 47 | obj_dir: 48 | mkdir -p obj 49 | 50 | # Directory for object files for CUDA. 51 | cuobj_dir: 52 | mkdir -p cuobj 53 | 54 | # Clean generated files. 55 | clean: 56 | rm -rf bench.txt delayInfo.txt buffer* message_record.txt 57 | rm -rf proc_r*_t* *.log 58 | rm -rf obj cuobj bin 59 | -------------------------------------------------------------------------------- /benchmark/test/gpgpu-sim/launch/matmul.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include "apis_c.h" 5 | #include "unistd.h" 6 | 7 | #define Row 100 8 | #define Col 100 9 | 10 | int idX, idY; 11 | 12 | int main(int argc, char **argv) { 13 | idX = atoi(argv[1]); 14 | idY = atoi(argv[2]); 15 | 16 | int64_t *A = (int64_t *)malloc(sizeof(int64_t) * Row * Col); 17 | int64_t *B = (int64_t *)malloc(sizeof(int64_t) * Row * Col); 18 | int64_t *C = (int64_t *)malloc(sizeof(int64_t) * Col); 19 | 20 | for (int i = 0; i < Row * Col; i++) { 21 | A[i] = rand() % 51; 22 | B[i] = rand() % 51; 23 | } 24 | 25 | for (int i = 0; i < 2; i ++) 26 | { 27 | // Create time gap between cycles. 28 | usleep((idX * 2 + idY) * 100); 29 | 30 | InterChiplet::launch(1, 1, idX, idY); 31 | 32 | InterChiplet::sendMessage(1, 1, idX, idY, A, 10000 * sizeof(int64_t)); 33 | InterChiplet::sendMessage(1, 1, idX, idY, B, 10000 * sizeof(int64_t)); 34 | 35 | InterChiplet::receiveMessage(idX, idY, 1, 1, C, 100 * sizeof(int64_t)); 36 | 37 | InterChiplet::barrier(0xFF, idX, idY, 3); 38 | } 39 | } 40 | -------------------------------------------------------------------------------- /benchmark/test/gpgpu-sim/launch/matmul.cu: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | #include 7 | #include 8 | #include 9 | 10 | #include "apis_cu.h" 11 | #include "cuda_runtime.h" 12 | #include "device_launch_parameters.h" 13 | 14 | /** 15 | * 本示例程序为:通过4个GPU chiplet 16 | * 计算随机数矩阵A(400 * 100)与随机数矩阵B(100 * 400)相乘结果。 17 | * 由矩阵乘法原理可知,我们可将计算任务划分为4个100*100的矩阵相乘,并将结果相加。 18 | */ 19 | 20 | #define Row 100 21 | #define Col 100 22 | 23 | /** 24 | * 矩阵乘法的核心函数,由每个线程都会运行一次本函数, 25 | * 根据线程编号不同计算出位于结果矩阵不同位置的数据。 26 | */ 27 | 28 | __global__ void matrix_mul_gpu(int64_t* M, int64_t* N, int64_t* P, int width) { 29 | int sumNum = threadIdx.x + threadIdx.y * 10; 30 | int i = threadIdx.x; 31 | int j = threadIdx.y; 32 | int64_t sum = 0; 33 | for (int k = 0; k < width; k++) { 34 | int64_t a = M[j * width + k]; 35 | int64_t b = N[k * width + i]; 36 | sum += a * b; 37 | } 38 | P[sumNum] = sum; 39 | } 40 | 41 | int main(int argc, char** argv) { 42 | // 读取本进程所代表的chiplet编号 43 | int idX = atoi(argv[1]); 44 | int idY = atoi(argv[2]); 45 | 46 | for (int round = 0; round < 6; round++) { 47 | int locker_x = -1, locker_y = -1; 48 | waitLaunch(idX, idY, &locker_x, &locker_y); 49 | 50 | int64_t *d_dataA, *d_dataB, *d_dataC; 51 | cudaMalloc((void**)&d_dataA, sizeof(int64_t) * Row * Col); 52 | cudaMalloc((void**)&d_dataB, sizeof(int64_t) * Row * Col); 53 | cudaMalloc((void**)&d_dataC, sizeof(int64_t) * Col); 54 | 55 | receiveMessage(idX, idY, locker_x, locker_y, d_dataA, sizeof(int64_t) * Row * Col); 56 | receiveMessage(idX, idY, locker_x, locker_y, d_dataB, sizeof(int64_t) * Row * Col); 57 | 58 | // calculate 59 | dim3 threadPerBlock(10, 10); 60 | dim3 blockNumber(1); 61 | matrix_mul_gpu<<>>(d_dataA, d_dataB, d_dataC, Col); 62 | 63 | sendMessage(locker_x, locker_y, idX, idY, d_dataC, 100 * sizeof(int64_t)); 64 | cudaFree(d_dataA); 65 | cudaFree(d_dataB); 66 | cudaFree(d_dataC); 67 | } 68 | return 0; 69 | } 70 | -------------------------------------------------------------------------------- /benchmark/test/gpgpu-sim/launch/matmul.yml: -------------------------------------------------------------------------------- 1 | # Phase 1 configuration. 2 | phase1: 3 | # Process 0 4 | - cmd: "$SIMULATOR_ROOT/snipersim/run-sniper" 5 | args: ["--", "$BENCHMARK_ROOT/bin/msnip_c", "0", "1"] 6 | log: "sniper.0.1.log" 7 | is_to_stdout: false 8 | clock_rate: 1 9 | # Process 1 10 | - cmd: "$SIMULATOR_ROOT/snipersim/run-sniper" 11 | args: ["--", "$BENCHMARK_ROOT/bin/msnip_c", "1", "0"] 12 | log: "sniper.1.0.log" 13 | is_to_stdout: false 14 | clock_rate: 1 15 | # Process 2 16 | - cmd: "$BENCHMARK_ROOT/bin/msnip_cu" 17 | args: ["1", "1"] 18 | log: "gpgpusim.1.1.log" 19 | is_to_stdout: false 20 | clock_rate: 1 21 | pre_copy: "$SIMULATOR_ROOT/gpgpu-sim/configs/tested-cfgs/SM2_GTX480/*" 22 | # Process 3 23 | - cmd: "$SIMULATOR_ROOT/snipersim/run-sniper" 24 | args: ["--", "$BENCHMARK_ROOT/bin/msnip_c", "0", "0"] 25 | log: "sniper.0.0.log" 26 | is_to_stdout: false 27 | clock_rate: 1 28 | 29 | # Phase 2 configuration. 30 | phase2: 31 | # Process 0 32 | - cmd: "$SIMULATOR_ROOT/popnet_chiplet/build/popnet" 33 | args: ["-A", "2", "-c", "2", "-V", "3", "-B", "12", "-O", "12", "-F", "4", "-L", "1000", "-T", "10000000", "-r", "1", "-I", "../bench.txt", "-R", "0", "-D", "../delayInfo.txt", "-P"] 34 | log: "popnet_0.log" 35 | is_to_stdout: false 36 | clock_rate: 1 37 | 38 | # File configuration. (Not used yet) 39 | bench_file: "./bench.txt" 40 | delayinfo_file: "./delayInfo.txt" 41 | -------------------------------------------------------------------------------- /benchmark/test/gpgpu-sim/lock/makefile: -------------------------------------------------------------------------------- 1 | # Project environment 2 | # SIMULATOR_ROOT, defined by setup_env.sh 3 | BENCHMARK_ROOT=$(SIMULATOR_ROOT)/benchmark/test/lock 4 | 5 | # Compiler environment of C/C++ 6 | CC=g++ 7 | CFLAGS=-Wall -Werror -g -I$(SIMULATOR_ROOT)/interchiplet/includes 8 | INTERCHIPLET_C_LIB=$(SIMULATOR_ROOT)/interchiplet/lib/libinterchiplet_c.a 9 | 10 | # C/C++ Source file 11 | C_SRCS=msnip.cpp 12 | C_OBJS=obj/msnip.o 13 | C_TARGET=bin/msnip_c 14 | 15 | # Compiler environment of CUDA 16 | NVCC=nvcc 17 | CUFLAGS=--compiler-options -Wall -I$(SIMULATOR_ROOT)/interchiplet/includes 18 | 19 | # CUDA Source file 20 | CUDA_SRCS=msnip.cu 21 | CUDA_OBJS=cuobj/msnip.o 22 | CUDA_TARGET=bin/msnip_cu 23 | 24 | all: bin_dir obj_dir cuobj_dir C_target CUDA_target 25 | 26 | # C language target 27 | C_target: $(C_OBJS) 28 | $(CC) $(C_OBJS) $(INTERCHIPLET_C_LIB) -o $(C_TARGET) 29 | 30 | # CUDA language target 31 | CUDA_target: $(CUDA_OBJS) 32 | $(NVCC) -L$(SIMULATOR_ROOT)/gpgpu-sim/lib/$(GPGPUSIM_CONFIG) --cudart shared $(CUDA_OBJS) -o $(CUDA_TARGET) 33 | 34 | # Rule for C object 35 | obj/%.o: %.cpp 36 | $(CC) $(CFLAGS) -c $< -o $@ 37 | 38 | # Rule for Cuda object 39 | cuobj/%.o: %.cu 40 | $(NVCC) $(CUFLAGS) -c $< -o $@ 41 | 42 | # Directory for binary files. 43 | bin_dir: 44 | mkdir -p bin 45 | 46 | # Directory for object files for C. 47 | obj_dir: 48 | mkdir -p obj 49 | 50 | # Directory for object files for CUDA. 51 | cuobj_dir: 52 | mkdir -p cuobj 53 | 54 | # Clean generated files. 55 | clean: 56 | rm -rf bench.txt delayInfo.txt buffer* message_record.txt 57 | rm -rf proc_r*_t* *.log 58 | rm -rf obj cuobj bin 59 | -------------------------------------------------------------------------------- /benchmark/test/gpgpu-sim/lock/matmul.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include "apis_c.h" 5 | #include "unistd.h" 6 | 7 | #define Row 100 8 | #define Col 100 9 | 10 | int idX, idY; 11 | 12 | int main(int argc, char **argv) { 13 | idX = atoi(argv[1]); 14 | idY = atoi(argv[2]); 15 | 16 | int64_t *A = (int64_t *)malloc(sizeof(int64_t) * Row * Col); 17 | int64_t *B = (int64_t *)malloc(sizeof(int64_t) * Row * Col); 18 | int64_t *C = (int64_t *)malloc(sizeof(int64_t) * Col); 19 | 20 | for (int i = 0; i < Row * Col; i++) { 21 | A[i] = rand() % 51; 22 | B[i] = rand() % 51; 23 | } 24 | 25 | for (int i = 0; i < 2; i ++) 26 | { 27 | // Create time gap between cycles. 28 | usleep((idX * 2 + idY) * 100); 29 | 30 | InterChiplet::launch(1, 1, idX, idY); 31 | 32 | InterChiplet::sendMessage(1, 1, idX, idY, A, 10000 * sizeof(int64_t)); 33 | InterChiplet::sendMessage(1, 1, idX, idY, B, 10000 * sizeof(int64_t)); 34 | 35 | InterChiplet::receiveMessage(idX, idY, 1, 1, C, 100 * sizeof(int64_t)); 36 | 37 | InterChiplet::barrier(0xFF, idX, idY, 3); 38 | } 39 | } 40 | -------------------------------------------------------------------------------- /benchmark/test/gpgpu-sim/lock/matmul.cu: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | #include 7 | #include 8 | #include 9 | 10 | #include "apis_cu.h" 11 | #include "cuda_runtime.h" 12 | #include "device_launch_parameters.h" 13 | 14 | /** 15 | * 本示例程序为:通过4个GPU chiplet 16 | * 计算随机数矩阵A(400 * 100)与随机数矩阵B(100 * 400)相乘结果。 17 | * 由矩阵乘法原理可知,我们可将计算任务划分为4个100*100的矩阵相乘,并将结果相加。 18 | */ 19 | 20 | #define Row 100 21 | #define Col 100 22 | 23 | /** 24 | * 矩阵乘法的核心函数,由每个线程都会运行一次本函数, 25 | * 根据线程编号不同计算出位于结果矩阵不同位置的数据。 26 | */ 27 | 28 | __global__ void matrix_mul_gpu(int64_t* M, int64_t* N, int64_t* P, int width) { 29 | int sumNum = threadIdx.x + threadIdx.y * 10; 30 | int i = threadIdx.x; 31 | int j = threadIdx.y; 32 | int64_t sum = 0; 33 | for (int k = 0; k < width; k++) { 34 | int64_t a = M[j * width + k]; 35 | int64_t b = N[k * width + i]; 36 | sum += a * b; 37 | } 38 | P[sumNum] = sum; 39 | } 40 | 41 | int main(int argc, char** argv) { 42 | // 读取本进程所代表的chiplet编号 43 | int idX = atoi(argv[1]); 44 | int idY = atoi(argv[2]); 45 | 46 | for (int round = 0; round < 6; round++) { 47 | int locker_x = -1, locker_y = -1; 48 | waitLaunch(idX, idY, &locker_x, &locker_y); 49 | 50 | int64_t *d_dataA, *d_dataB, *d_dataC; 51 | cudaMalloc((void**)&d_dataA, sizeof(int64_t) * Row * Col); 52 | cudaMalloc((void**)&d_dataB, sizeof(int64_t) * Row * Col); 53 | cudaMalloc((void**)&d_dataC, sizeof(int64_t) * Col); 54 | 55 | receiveMessage(idX, idY, locker_x, locker_y, d_dataA, sizeof(int64_t) * Row * Col); 56 | receiveMessage(idX, idY, locker_x, locker_y, d_dataB, sizeof(int64_t) * Row * Col); 57 | 58 | // calculate 59 | dim3 threadPerBlock(10, 10); 60 | dim3 blockNumber(1); 61 | matrix_mul_gpu<<>>(d_dataA, d_dataB, d_dataC, Col); 62 | 63 | sendMessage(locker_x, locker_y, idX, idY, d_dataC, 100 * sizeof(int64_t)); 64 | cudaFree(d_dataA); 65 | cudaFree(d_dataB); 66 | cudaFree(d_dataC); 67 | } 68 | return 0; 69 | } 70 | -------------------------------------------------------------------------------- /benchmark/test/gpgpu-sim/lock/matmul.yml: -------------------------------------------------------------------------------- 1 | # Phase 1 configuration. 2 | phase1: 3 | # Process 0 4 | - cmd: "$SIMULATOR_ROOT/snipersim/run-sniper" 5 | args: ["--", "$BENCHMARK_ROOT/bin/msnip_c", "0", "1"] 6 | log: "sniper.0.1.log" 7 | is_to_stdout: false 8 | clock_rate: 1 9 | # Process 1 10 | - cmd: "$SIMULATOR_ROOT/snipersim/run-sniper" 11 | args: ["--", "$BENCHMARK_ROOT/bin/msnip_c", "1", "0"] 12 | log: "sniper.1.0.log" 13 | is_to_stdout: false 14 | clock_rate: 1 15 | # Process 2 16 | - cmd: "$BENCHMARK_ROOT/bin/msnip_cu" 17 | args: ["1", "1"] 18 | log: "gpgpusim.1.1.log" 19 | is_to_stdout: false 20 | clock_rate: 1 21 | pre_copy: "$SIMULATOR_ROOT/gpgpu-sim/configs/tested-cfgs/SM2_GTX480/*" 22 | # Process 3 23 | - cmd: "$SIMULATOR_ROOT/snipersim/run-sniper" 24 | args: ["--", "$BENCHMARK_ROOT/bin/msnip_c", "0", "0"] 25 | log: "sniper.0.0.log" 26 | is_to_stdout: false 27 | clock_rate: 1 28 | 29 | # Phase 2 configuration. 30 | phase2: 31 | # Process 0 32 | - cmd: "$SIMULATOR_ROOT/popnet_chiplet/build/popnet" 33 | args: ["-A", "2", "-c", "2", "-V", "3", "-B", "12", "-O", "12", "-F", "4", "-L", "1000", "-T", "10000000", "-r", "1", "-I", "../bench.txt", "-R", "0", "-D", "../delayInfo.txt", "-P"] 34 | log: "popnet_0.log" 35 | is_to_stdout: false 36 | clock_rate: 1 37 | 38 | # File configuration. (Not used yet) 39 | bench_file: "./bench.txt" 40 | delayinfo_file: "./delayInfo.txt" 41 | -------------------------------------------------------------------------------- /benchmark/test/gpgpu-sim/pipe/makefile: -------------------------------------------------------------------------------- 1 | # Project environment 2 | # SIMULATOR_ROOT, defined by setup_env.sh 3 | BENCHMARK_ROOT=$(SIMULATOR_ROOT)/benchmark/test/pipe 4 | 5 | # Compiler environment of C/C++ 6 | CC=g++ 7 | CFLAGS=-Wall -Werror -g -I$(SIMULATOR_ROOT)/interchiplet/includes 8 | INTERCHIPLET_C_LIB=$(SIMULATOR_ROOT)/interchiplet/lib/libinterchiplet_c.a 9 | 10 | # C/C++ Source file 11 | C_SRCS=matmul.cpp 12 | C_OBJS=obj/matmul.o 13 | C_TARGET=bin/matmul_c 14 | 15 | # Compiler environment of CUDA 16 | NVCC=nvcc 17 | CUFLAGS=--compiler-options -Wall -I$(SIMULATOR_ROOT)/interchiplet/includes 18 | 19 | # CUDA Source file 20 | CUDA_SRCS=matmul.cu 21 | CUDA_OBJS=cuobj/matmul.o 22 | CUDA_TARGET=bin/matmul_cu 23 | 24 | all: bin_dir obj_dir cuobj_dir C_target CUDA_target 25 | 26 | # C language target 27 | C_target: $(C_OBJS) 28 | $(CC) $(C_OBJS) $(INTERCHIPLET_C_LIB) -o $(C_TARGET) 29 | 30 | # CUDA language target 31 | CUDA_target: $(CUDA_OBJS) 32 | $(NVCC) -L$(SIMULATOR_ROOT)/gpgpu-sim/lib/$(GPGPUSIM_CONFIG) --cudart shared $(CUDA_OBJS) -o $(CUDA_TARGET) 33 | 34 | # Rule for C object 35 | obj/%.o: %.cpp 36 | $(CC) $(CFLAGS) -c $< -o $@ 37 | 38 | # Rule for Cuda object 39 | cuobj/%.o: %.cu 40 | $(NVCC) $(CUFLAGS) -c $< -o $@ 41 | 42 | # Directory for binary files. 43 | bin_dir: 44 | mkdir -p bin 45 | 46 | # Directory for object files for C. 47 | obj_dir: 48 | mkdir -p obj 49 | 50 | # Directory for object files for CUDA. 51 | cuobj_dir: 52 | mkdir -p cuobj 53 | 54 | # Clean generated files. 55 | clean: 56 | rm -rf bench.txt delayInfo.txt buffer* message_record.txt 57 | rm -rf proc_r*_t* *.log 58 | rm -rf obj cuobj bin 59 | -------------------------------------------------------------------------------- /benchmark/test/gpgpu-sim/pipe/matmul.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include "apis_c.h" 5 | 6 | #define Row 100 7 | #define Col 100 8 | 9 | int idX, idY; 10 | 11 | int main(int argc, char **argv) { 12 | idX = atoi(argv[1]); 13 | idY = atoi(argv[2]); 14 | 15 | int64_t *A = (int64_t *)malloc(sizeof(int64_t) * Row * Col); 16 | int64_t *B = (int64_t *)malloc(sizeof(int64_t) * Row * Col); 17 | int64_t *C1 = (int64_t *)malloc(sizeof(int64_t) * Col); 18 | int64_t *C2 = (int64_t *)malloc(sizeof(int64_t) * Col); 19 | int64_t *C3 = (int64_t *)malloc(sizeof(int64_t) * Col); 20 | 21 | for (int i = 0; i < Row * Col; i++) { 22 | A[i] = rand() % 51; 23 | B[i] = rand() % 51; 24 | } 25 | 26 | InterChiplet::sendMessage(0, 1, idX, idY, A, 10000 * sizeof(int64_t)); 27 | InterChiplet::sendMessage(1, 0, idX, idY, A, 10000 * sizeof(int64_t)); 28 | InterChiplet::sendMessage(1, 1, idX, idY, A, 10000 * sizeof(int64_t)); 29 | 30 | InterChiplet::sendMessage(0, 1, idX, idY, B, 10000 * sizeof(int64_t)); 31 | InterChiplet::sendMessage(1, 0, idX, idY, B, 10000 * sizeof(int64_t)); 32 | InterChiplet::sendMessage(1, 1, idX, idY, B, 10000 * sizeof(int64_t)); 33 | 34 | InterChiplet::receiveMessage(idX, idY, 0, 1, C1, 100 * sizeof(int64_t)); 35 | InterChiplet::receiveMessage(idX, idY, 1, 0, C2, 100 * sizeof(int64_t)); 36 | InterChiplet::receiveMessage(idX, idY, 1, 1, C3, 100 * sizeof(int64_t)); 37 | 38 | for (int i = 0; i < 100; i++) { 39 | C1[i] += C2[i]; 40 | C1[i] += C3[i]; 41 | } 42 | } 43 | -------------------------------------------------------------------------------- /benchmark/test/gpgpu-sim/pipe/matmul.cu: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | #include 7 | #include 8 | #include 9 | 10 | #include "apis_cu.h" 11 | #include "cuda_runtime.h" 12 | #include "device_launch_parameters.h" 13 | 14 | /** 15 | * 本示例程序为:通过4个GPU chiplet 16 | * 计算随机数矩阵A(400 * 100)与随机数矩阵B(100 * 400)相乘结果。 17 | * 由矩阵乘法原理可知,我们可将计算任务划分为4个100*100的矩阵相乘,并将结果相加。 18 | */ 19 | 20 | #define Row 100 21 | #define Col 100 22 | 23 | /** 24 | * 矩阵乘法的核心函数,由每个线程都会运行一次本函数, 25 | * 根据线程编号不同计算出位于结果矩阵不同位置的数据。 26 | */ 27 | 28 | __global__ void matrix_mul_gpu(int64_t* M, int64_t* N, int64_t* P, int width) { 29 | int sumNum = threadIdx.x + threadIdx.y * 10; 30 | int i = threadIdx.x; 31 | int j = threadIdx.y; 32 | int64_t sum = 0; 33 | for (int k = 0; k < width; k++) { 34 | int64_t a = M[j * width + k]; 35 | int64_t b = N[k * width + i]; 36 | sum += a * b; 37 | } 38 | P[sumNum] = sum; 39 | } 40 | 41 | int main(int argc, char** argv) { 42 | // 读取本进程所代表的chiplet编号 43 | 44 | int idX = atoi(argv[1]); 45 | int idY = atoi(argv[2]); 46 | int64_t *d_dataA, *d_dataB, *d_dataC; 47 | cudaMalloc((void**)&d_dataA, sizeof(int64_t) * Row * Col); 48 | cudaMalloc((void**)&d_dataB, sizeof(int64_t) * Row * Col); 49 | cudaMalloc((void**)&d_dataC, sizeof(int64_t) * Col); 50 | 51 | receiveMessage(idX, idY, 0, 0, d_dataA, sizeof(int64_t) * Row * Col); 52 | receiveMessage(idX, idY, 0, 0, d_dataB, sizeof(int64_t) * Row * Col); 53 | 54 | // calculate 55 | dim3 threadPerBlock(10, 10); 56 | dim3 blockNumber(1); 57 | matrix_mul_gpu<<>>(d_dataA, d_dataB, d_dataC, Col); 58 | 59 | sendMessage(0, 0, idX, idY, d_dataC, 100 * sizeof(int64_t)); 60 | cudaFree(d_dataA); 61 | cudaFree(d_dataB); 62 | cudaFree(d_dataC); 63 | return 0; 64 | } 65 | -------------------------------------------------------------------------------- /benchmark/test/gpgpu-sim/pipe/matmul.yml: -------------------------------------------------------------------------------- 1 | # Phase 1 configuration. 2 | phase1: 3 | # Process 0 4 | - cmd: "$BENCHMARK_ROOT/bin/matmul_cu" 5 | args: ["0", "1"] 6 | log: "gpgpusim.0.1.log" 7 | is_to_stdout: false 8 | clock_rate: 1 9 | pre_copy: "$SIMULATOR_ROOT/gpgpu-sim/configs/tested-cfgs/SM2_GTX480/*" 10 | # Process 1 11 | - cmd: "$BENCHMARK_ROOT/bin/matmul_cu" 12 | args: ["1", "0"] 13 | log: "gpgpusim.1.0.log" 14 | is_to_stdout: false 15 | clock_rate: 1 16 | pre_copy: "$SIMULATOR_ROOT/gpgpu-sim/configs/tested-cfgs/SM2_GTX480/*" 17 | # Process 2 18 | - cmd: "$BENCHMARK_ROOT/bin/matmul_cu" 19 | args: ["1", "1"] 20 | log: "gpgpusim.1.1.log" 21 | is_to_stdout: false 22 | clock_rate: 1 23 | pre_copy: "$SIMULATOR_ROOT/gpgpu-sim/configs/tested-cfgs/SM2_GTX480/*" 24 | # Process 3 25 | - cmd: "$SIMULATOR_ROOT/snipersim/run-sniper" 26 | args: ["--", "$BENCHMARK_ROOT/bin/matmul_c", "0", "0"] 27 | log: "sniper.0.0.log" 28 | is_to_stdout: false 29 | clock_rate: 1 30 | 31 | # Phase 2 configuration. 32 | phase2: 33 | # Process 0 34 | - cmd: "$SIMULATOR_ROOT/popnet_chiplet/build/popnet" 35 | args: ["-A", "2", "-c", "2", "-V", "3", "-B", "12", "-O", "12", "-F", "4", "-L", "1000", "-T", "10000000", "-r", "1", "-I", "../bench.txt", "-R", "0", "-D", "../delayInfo.txt", "-P"] 36 | log: "popnet_0.log" 37 | is_to_stdout: false 38 | clock_rate: 1 39 | 40 | # File configuration. (Not used yet) 41 | bench_file: "./bench.txt" 42 | delayinfo_file: "./delayInfo.txt" 43 | -------------------------------------------------------------------------------- /benchmark/test/snipersim/barrier/makefile: -------------------------------------------------------------------------------- 1 | # Project environment 2 | # SIMULATOR_ROOT, defined by setup_env.sh 3 | BENCHMARK_ROOT=$(SIMULATOR_ROOT)/benchmark/test/snipersim/barrier 4 | 5 | # Compiler environment of C/C++ 6 | CC=g++ 7 | CFLAGS=-Wall -Werror -g -I$(SIMULATOR_ROOT)/interchiplet/includes 8 | INTERCHIPLET_C_LIB=$(SIMULATOR_ROOT)/interchiplet/lib/libinterchiplet_c.a 9 | 10 | # C/C++ Source file 11 | C_SRCS=test.cpp 12 | C_OBJS=obj/test.o 13 | C_TARGET=bin/test_c 14 | 15 | all: bin_dir obj_dir C_target 16 | 17 | # C language target 18 | C_target: $(C_OBJS) 19 | $(CC) $(C_OBJS) $(INTERCHIPLET_C_LIB) -o $(C_TARGET) 20 | 21 | # Rule for C object 22 | obj/%.o: %.cpp 23 | $(CC) $(CFLAGS) -c $< -o $@ 24 | 25 | # Directory for binary files. 26 | bin_dir: 27 | mkdir -p bin 28 | 29 | # Directory for object files for C. 30 | obj_dir: 31 | mkdir -p obj 32 | 33 | # Clean generated files. 34 | clean: 35 | rm -rf bench.txt delayInfo.txt buffer* message_record.txt 36 | rm -rf proc_r*_t* *.log 37 | rm -rf obj bin 38 | -------------------------------------------------------------------------------- /benchmark/test/snipersim/barrier/test.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include "apis_c.h" 5 | #include "unistd.h" 6 | 7 | #define Row 100 8 | #define Col 100 9 | 10 | int idX, idY; 11 | 12 | int main(int argc, char **argv) { 13 | idX = atoi(argv[1]); 14 | idY = atoi(argv[2]); 15 | 16 | int delay_count[2] = {0, 0}; 17 | 18 | if (idX == 0 && idY == 0) { 19 | delay_count[0] = 5000; 20 | delay_count[1] = 4000; 21 | } else if (idX == 0 && idY == 1) { 22 | delay_count[0] = 1000; 23 | delay_count[1] = 3000; 24 | } else if (idX == 1 && idY == 0) { 25 | delay_count[0] = 2000; 26 | delay_count[1] = 1000; 27 | } else if (idX == 1 && idY == 1) { 28 | delay_count[0] = 2000; 29 | delay_count[1] = 4000; 30 | } 31 | 32 | int64_t sum = 0; 33 | 34 | for (int r = 0; r < 2; r ++) { 35 | // Create time gap between threads. 36 | for (int j = 0; j < delay_count[r]; j++) { 37 | sum += rand() % 10; 38 | } 39 | 40 | InterChiplet::barrier(255, idX, idY, 4); 41 | } 42 | std::cout << "Sum = " << sum << std::endl; 43 | } 44 | -------------------------------------------------------------------------------- /benchmark/test/snipersim/barrier/test.yml: -------------------------------------------------------------------------------- 1 | # Phase 1 configuration. 2 | phase1: 3 | # Process 0 4 | - cmd: "$SIMULATOR_ROOT/snipersim/run-sniper" 5 | args: ["--", "$BENCHMARK_ROOT/bin/test_c", "0", "0"] 6 | log: "sniper.0.0.log" 7 | is_to_stdout: false 8 | clock_rate: 1 9 | # Process 1 10 | - cmd: "$SIMULATOR_ROOT/snipersim/run-sniper" 11 | args: ["--", "$BENCHMARK_ROOT/bin/test_c", "0", "1"] 12 | log: "sniper.0.1.log" 13 | is_to_stdout: false 14 | clock_rate: 1 15 | # Process 2 16 | - cmd: "$SIMULATOR_ROOT/snipersim/run-sniper" 17 | args: ["--", "$BENCHMARK_ROOT/bin/test_c", "1", "0"] 18 | log: "sniper.1.0.log" 19 | is_to_stdout: false 20 | clock_rate: 1 21 | # Process 3 22 | - cmd: "$SIMULATOR_ROOT/snipersim/run-sniper" 23 | args: ["--", "$BENCHMARK_ROOT/bin/test_c", "1", "1"] 24 | log: "sniper.1.1.log" 25 | is_to_stdout: false 26 | clock_rate: 1 27 | 28 | # Phase 2 configuration. 29 | phase2: 30 | # Process 0 31 | - cmd: "$SIMULATOR_ROOT/popnet_chiplet/build/popnet" 32 | args: ["-A", "2", "-c", "2", "-V", "3", "-B", "12", "-O", "12", "-F", "4", "-L", "1000", "-T", "10000000", "-r", "1", "-I", "../bench.txt", "-R", "0", "-D", "../delayInfo.txt", "-P"] 33 | log: "popnet_0.log" 34 | is_to_stdout: false 35 | clock_rate: 1 36 | 37 | # File configuration. (Not used yet) 38 | bench_file: "./bench.txt" 39 | delayinfo_file: "./delayInfo.txt" 40 | -------------------------------------------------------------------------------- /benchmark/test/snipersim/launch/makefile: -------------------------------------------------------------------------------- 1 | # Project environment 2 | # SIMULATOR_ROOT, defined by setup_env.sh 3 | BENCHMARK_ROOT=$(SIMULATOR_ROOT)/benchmark/test/snipersim/launch 4 | 5 | # Compiler environment of C/C++ 6 | CC=g++ 7 | CFLAGS=-Wall -Werror -g -I$(SIMULATOR_ROOT)/interchiplet/includes 8 | INTERCHIPLET_C_LIB=$(SIMULATOR_ROOT)/interchiplet/lib/libinterchiplet_c.a 9 | 10 | # C/C++ Source file 11 | C_SRCS=test.cpp 12 | C_OBJS=obj/test.o 13 | C_TARGET=bin/test_c 14 | 15 | all: bin_dir obj_dir C_target 16 | 17 | # C language target 18 | C_target: $(C_OBJS) 19 | $(CC) $(C_OBJS) $(INTERCHIPLET_C_LIB) -o $(C_TARGET) 20 | 21 | # Rule for C object 22 | obj/%.o: %.cpp 23 | $(CC) $(CFLAGS) -c $< -o $@ 24 | 25 | # Directory for binary files. 26 | bin_dir: 27 | mkdir -p bin 28 | 29 | # Directory for object files for C. 30 | obj_dir: 31 | mkdir -p obj 32 | 33 | # Clean generated files. 34 | clean: 35 | rm -rf bench.txt delayInfo.txt buffer* message_record.txt 36 | rm -rf proc_r*_t* *.log 37 | rm -rf obj bin 38 | -------------------------------------------------------------------------------- /benchmark/test/snipersim/launch/test.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include "apis_c.h" 5 | #include "unistd.h" 6 | 7 | #define Row 100 8 | #define Col 100 9 | 10 | int idX, idY; 11 | 12 | int main(int argc, char **argv) { 13 | idX = atoi(argv[1]); 14 | idY = atoi(argv[2]); 15 | 16 | // Core (0,0), wait launch 17 | if (idX == 0 && idY == 0) { 18 | for (int r = 0; r < 6; r ++) { 19 | int64_t srcX = -1, srcY = -1; 20 | InterChiplet::waitLaunch(0, 0, &srcX, &srcY); 21 | 22 | int64_t *A = (int64_t *)malloc(sizeof(int64_t) * Row * Col); 23 | 24 | int64_t sum = 0; 25 | for (int i = 0; i < Row * Col; i++) { 26 | sum = sum + A[i]; 27 | } 28 | 29 | InterChiplet::sendMessage(srcX, srcY, 0, 0, &sum, sizeof(int64_t)); 30 | } 31 | } 32 | // Core (0,1),(1,0),(1,1), launch 33 | else { 34 | int delay_count[2] = {0, 0}; 35 | 36 | if (idX == 0 && idY == 0) { 37 | delay_count[0] = 5000; 38 | delay_count[1] = 4000; 39 | } else if (idX == 0 && idY == 1) { 40 | delay_count[0] = 1000; 41 | delay_count[1] = 3000; 42 | } else if (idX == 1 && idY == 0) { 43 | delay_count[0] = 2000; 44 | delay_count[1] = 1000; 45 | } else if (idX == 1 && idY == 1) { 46 | delay_count[0] = 2000; 47 | delay_count[1] = 4000; 48 | } 49 | 50 | int64_t sum = 0; 51 | for (int r = 0; r < 2; r ++) { 52 | // Create time gap between threads. 53 | for (int j = 0; j < delay_count[r]; j++) { 54 | sum += rand() % 10; 55 | } 56 | 57 | InterChiplet::launch(0, 0, idX, idY); 58 | 59 | // Read result from Core (0,0) 60 | int64_t result; 61 | InterChiplet::receiveMessage(idX, idY, 0, 0, &result, sizeof(int64_t)); 62 | 63 | sum = sum + result; 64 | } 65 | std::cout << "Sum = " << sum << std::endl; 66 | } 67 | } 68 | -------------------------------------------------------------------------------- /benchmark/test/snipersim/launch/test.yml: -------------------------------------------------------------------------------- 1 | # Phase 1 configuration. 2 | phase1: 3 | # Process 0 4 | - cmd: "$SIMULATOR_ROOT/snipersim/run-sniper" 5 | args: ["--", "$BENCHMARK_ROOT/bin/test_c", "0", "0"] 6 | log: "sniper.0.0.log" 7 | is_to_stdout: false 8 | clock_rate: 1 9 | # Process 1 10 | - cmd: "$SIMULATOR_ROOT/snipersim/run-sniper" 11 | args: ["--", "$BENCHMARK_ROOT/bin/test_c", "0", "1"] 12 | log: "sniper.0.1.log" 13 | is_to_stdout: false 14 | clock_rate: 1 15 | # Process 2 16 | - cmd: "$SIMULATOR_ROOT/snipersim/run-sniper" 17 | args: ["--", "$BENCHMARK_ROOT/bin/test_c", "1", "0"] 18 | log: "sniper.1.0.log" 19 | is_to_stdout: false 20 | clock_rate: 1 21 | # Process 3 22 | - cmd: "$SIMULATOR_ROOT/snipersim/run-sniper" 23 | args: ["--", "$BENCHMARK_ROOT/bin/test_c", "1", "1"] 24 | log: "sniper.1.1.log" 25 | is_to_stdout: false 26 | clock_rate: 1 27 | 28 | # Phase 2 configuration. 29 | phase2: 30 | # Process 0 31 | - cmd: "$SIMULATOR_ROOT/popnet_chiplet/build/popnet" 32 | args: ["-A", "2", "-c", "2", "-V", "3", "-B", "12", "-O", "12", "-F", "4", "-L", "1000", "-T", "10000000", "-r", "1", "-I", "../bench.txt", "-R", "0", "-D", "../delayInfo.txt", "-P"] 33 | log: "popnet_0.log" 34 | is_to_stdout: false 35 | clock_rate: 1 36 | 37 | # File configuration. (Not used yet) 38 | bench_file: "./bench.txt" 39 | delayinfo_file: "./delayInfo.txt" 40 | -------------------------------------------------------------------------------- /benchmark/test/snipersim/lock/makefile: -------------------------------------------------------------------------------- 1 | # Project environment 2 | # SIMULATOR_ROOT, defined by setup_env.sh 3 | BENCHMARK_ROOT=$(SIMULATOR_ROOT)/benchmark/test/snipersim/lock 4 | 5 | # Compiler environment of C/C++ 6 | CC=g++ 7 | CFLAGS=-Wall -Werror -g -I$(SIMULATOR_ROOT)/interchiplet/includes 8 | INTERCHIPLET_C_LIB=$(SIMULATOR_ROOT)/interchiplet/lib/libinterchiplet_c.a 9 | 10 | # C/C++ Source file 11 | C_SRCS=test.cpp 12 | C_OBJS=obj/test.o 13 | C_TARGET=bin/test_c 14 | 15 | all: bin_dir obj_dir C_target 16 | 17 | # C language target 18 | C_target: $(C_OBJS) 19 | $(CC) $(C_OBJS) $(INTERCHIPLET_C_LIB) -o $(C_TARGET) 20 | 21 | # Rule for C object 22 | obj/%.o: %.cpp 23 | $(CC) $(CFLAGS) -c $< -o $@ 24 | 25 | # Directory for binary files. 26 | bin_dir: 27 | mkdir -p bin 28 | 29 | # Directory for object files for C. 30 | obj_dir: 31 | mkdir -p obj 32 | 33 | # Clean generated files. 34 | clean: 35 | rm -rf bench.txt delayInfo.txt buffer* message_record.txt 36 | rm -rf proc_r*_t* *.log 37 | rm -rf obj bin 38 | -------------------------------------------------------------------------------- /benchmark/test/snipersim/lock/test.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #include "apis_c.h" 6 | #include "unistd.h" 7 | 8 | #define Row 100 9 | #define Col 100 10 | 11 | int idX, idY; 12 | 13 | int main(int argc, char **argv) { 14 | idX = atoi(argv[1]); 15 | idY = atoi(argv[2]); 16 | 17 | int delay_count[3] = {0, 0, 0}; 18 | if (idX == 0 && idY == 0) { 19 | delay_count[0] = 5000; 20 | delay_count[1] = 4000; 21 | delay_count[2] = 3000; 22 | } else if (idX == 0 && idY == 1) { 23 | delay_count[0] = 1000; 24 | delay_count[1] = 3000; 25 | delay_count[2] = 5000; 26 | } else if (idX == 1 && idY == 0) { 27 | delay_count[0] = 2000; 28 | delay_count[1] = 1000; 29 | delay_count[2] = 0000; 30 | } else if (idX == 1 && idY == 1) { 31 | delay_count[0] = 2000; 32 | delay_count[1] = 4000; 33 | delay_count[2] = 6000; 34 | } 35 | 36 | int64_t sum = 0; 37 | for (int i = 0; i < 3; i++) { 38 | // Create time gap between threads. 39 | for (int j = 0; j < delay_count[i]; j++) { 40 | sum += rand() % 10; 41 | } 42 | 43 | InterChiplet::lock(255, idX, idY); 44 | 45 | for (int j = 0; j < delay_count[i]; j++) { 46 | sum += rand() % 10; 47 | } 48 | 49 | InterChiplet::unlock(255, idX, idY); 50 | } 51 | 52 | std::cout << "Sum = " << sum << std::endl; 53 | 54 | return 0; 55 | } 56 | -------------------------------------------------------------------------------- /benchmark/test/snipersim/lock/test.yml: -------------------------------------------------------------------------------- 1 | # Phase 1 configuration. 2 | phase1: 3 | # Process 0 4 | - cmd: "$SIMULATOR_ROOT/snipersim/run-sniper" 5 | args: ["--", "$BENCHMARK_ROOT/bin/test_c", "0", "0"] 6 | log: "sniper.0.0.log" 7 | is_to_stdout: false 8 | clock_rate: 1 9 | # Process 1 10 | - cmd: "$SIMULATOR_ROOT/snipersim/run-sniper" 11 | args: ["--", "$BENCHMARK_ROOT/bin/test_c", "0", "1"] 12 | log: "sniper.0.1.log" 13 | is_to_stdout: false 14 | clock_rate: 1 15 | # Process 2 16 | - cmd: "$SIMULATOR_ROOT/snipersim/run-sniper" 17 | args: ["--", "$BENCHMARK_ROOT/bin/test_c", "1", "0"] 18 | log: "sniper.1.0.log" 19 | is_to_stdout: false 20 | clock_rate: 1 21 | # Process 3 22 | - cmd: "$SIMULATOR_ROOT/snipersim/run-sniper" 23 | args: ["--", "$BENCHMARK_ROOT/bin/test_c", "1", "1"] 24 | log: "sniper.1.1.log" 25 | is_to_stdout: false 26 | clock_rate: 1 27 | 28 | # Phase 2 configuration. 29 | phase2: 30 | # Process 0 31 | - cmd: "$SIMULATOR_ROOT/popnet_chiplet/build/popnet" 32 | args: ["-A", "2", "-c", "2", "-V", "3", "-B", "12", "-O", "12", "-F", "4", "-L", "1000", "-T", "10000000", "-r", "1", "-I", "../bench.txt", "-R", "0", "-D", "../delayInfo.txt", "-P"] 33 | log: "popnet_0.log" 34 | is_to_stdout: false 35 | clock_rate: 1 36 | 37 | # File configuration. (Not used yet) 38 | bench_file: "./bench.txt" 39 | delayinfo_file: "./delayInfo.txt" 40 | -------------------------------------------------------------------------------- /benchmark/test/snipersim/pipe/makefile: -------------------------------------------------------------------------------- 1 | # Project environment 2 | # SIMULATOR_ROOT, defined by setup_env.sh 3 | BENCHMARK_ROOT=$(SIMULATOR_ROOT)/benchmark/test/snipersim/pipe 4 | 5 | # Compiler environment of C/C++ 6 | CC=g++ 7 | CFLAGS=-Wall -Werror -g -I$(SIMULATOR_ROOT)/interchiplet/includes 8 | INTERCHIPLET_C_LIB=$(SIMULATOR_ROOT)/interchiplet/lib/libinterchiplet_c.a 9 | 10 | # C/C++ Source file 11 | C_SRCS=test.cpp 12 | C_OBJS=obj/test.o 13 | C_TARGET=bin/test_c 14 | 15 | all: bin_dir obj_dir C_target 16 | 17 | # C language target 18 | C_target: $(C_OBJS) 19 | $(CC) $(C_OBJS) $(INTERCHIPLET_C_LIB) -o $(C_TARGET) 20 | 21 | # Rule for C object 22 | obj/%.o: %.cpp 23 | $(CC) $(CFLAGS) -c $< -o $@ 24 | 25 | # Directory for binary files. 26 | bin_dir: 27 | mkdir -p bin 28 | 29 | # Directory for object files for C. 30 | obj_dir: 31 | mkdir -p obj 32 | 33 | # Clean generated files. 34 | clean: 35 | rm -rf bench.txt delayInfo.txt buffer* message_record.txt 36 | rm -rf proc_r*_t* *.log 37 | rm -rf obj bin 38 | -------------------------------------------------------------------------------- /benchmark/test/snipersim/pipe/test.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #include "apis_c.h" 6 | 7 | #define Row 100 8 | #define Col 100 9 | 10 | int idX, idY; 11 | 12 | int main(int argc, char **argv) { 13 | idX = atoi(argv[1]); 14 | idY = atoi(argv[2]); 15 | 16 | // Test Purpose: 17 | // Communication (0,0) -> (0,1) -> (1,0) -> (1,1) -> (0,0) 18 | 19 | // Core (0,0) 20 | if (idX == 0 && idY == 0) { 21 | int64_t *A = (int64_t *)malloc(sizeof(int64_t) * Row * Col); 22 | int64_t *B = (int64_t *)malloc(sizeof(int64_t) * Row * Col); 23 | 24 | // Initialization value. 25 | for (int i = 0; i < Row * Col; i++) { 26 | A[i] = rand() % 51; 27 | } 28 | 29 | // Send message to (0,1) 30 | InterChiplet::sendMessage(0, 1, idX, idY, A, Row * Col * sizeof(int64_t)); 31 | // Receive message from (1,1) 32 | InterChiplet::receiveMessage(idX, idY, 1, 1, B, Row * Col * sizeof(int64_t)); 33 | 34 | // Check result 35 | for (int i = 0; i < Row * Col; i++) { 36 | if (A[i] != B[i]) { 37 | std::cout << "Data check error!" << std::endl; 38 | return 1; 39 | } 40 | } 41 | std::cout << "Data check PASS!" << std::endl; 42 | 43 | return 0; 44 | } 45 | // Core (0,1) 46 | else if (idX == 0 && idY == 1) { 47 | int64_t *A = (int64_t *)malloc(sizeof(int64_t) * Row * Col); 48 | memset(A, 0, Row * Col * sizeof(int64_t)); 49 | // Receive message from (0,0) 50 | InterChiplet::receiveMessage(idX, idY, 0, 0, A, Row * Col * sizeof(int64_t)); 51 | // Send message to (1,0) 52 | InterChiplet::sendMessage(1, 0, idX, idY, A, Row * Col * sizeof(int64_t)); 53 | 54 | return 0; 55 | } 56 | // Core (1,0) 57 | else if (idX == 1 && idY == 0) { 58 | int64_t *A = (int64_t *)malloc(sizeof(int64_t) * Row * Col); 59 | memset(A, 0, Row * Col * sizeof(int64_t)); 60 | // Receive message from (0,1) 61 | InterChiplet::receiveMessage(idX, idY, 0, 1, A, Row * Col * sizeof(int64_t)); 62 | // Send message to (1,1) 63 | InterChiplet::sendMessage(1, 1, idX, idY, A, Row * Col * sizeof(int64_t)); 64 | 65 | return 0; 66 | } 67 | // Core (1,1) 68 | else if (idX == 1 && idY == 1) { 69 | int64_t *A = (int64_t *)malloc(sizeof(int64_t) * Row * Col); 70 | memset(A, 0, Row * Col * sizeof(int64_t)); 71 | // Receive message from (0,0) 72 | InterChiplet::receiveMessage(idX, idY, 1, 0, A, Row * Col * sizeof(int64_t)); 73 | // Send message to (1,0) 74 | InterChiplet::sendMessage(0, 0, idX, idY, A, Row * Col * sizeof(int64_t)); 75 | 76 | return 0; 77 | } 78 | } 79 | -------------------------------------------------------------------------------- /benchmark/test/snipersim/pipe/test.yml: -------------------------------------------------------------------------------- 1 | # Phase 1 configuration. 2 | phase1: 3 | # Process 0 4 | - cmd: "$SIMULATOR_ROOT/snipersim/run-sniper" 5 | args: ["--", "$BENCHMARK_ROOT/bin/test_c", "0", "0"] 6 | log: "sniper.0.0.log" 7 | is_to_stdout: false 8 | clock_rate: 1 9 | # Process 1 10 | - cmd: "$SIMULATOR_ROOT/snipersim/run-sniper" 11 | args: ["--", "$BENCHMARK_ROOT/bin/test_c", "0", "1"] 12 | log: "sniper.0.1.log" 13 | is_to_stdout: false 14 | clock_rate: 1 15 | # Process 2 16 | - cmd: "$SIMULATOR_ROOT/snipersim/run-sniper" 17 | args: ["--", "$BENCHMARK_ROOT/bin/test_c", "1", "0"] 18 | log: "sniper.1.0.log" 19 | is_to_stdout: false 20 | clock_rate: 1 21 | # Process 3 22 | - cmd: "$SIMULATOR_ROOT/snipersim/run-sniper" 23 | args: ["--", "$BENCHMARK_ROOT/bin/test_c", "1", "1"] 24 | log: "sniper.1.1.log" 25 | is_to_stdout: false 26 | clock_rate: 1 27 | 28 | # Phase 2 configuration. 29 | phase2: 30 | # Process 0 31 | - cmd: "$SIMULATOR_ROOT/popnet_chiplet/build/popnet" 32 | args: ["-A", "2", "-c", "2", "-V", "3", "-B", "12", "-O", "12", "-F", "4", "-L", "1000", "-T", "10000000", "-r", "1", "-I", "../bench.txt", "-R", "0", "-D", "../delayInfo.txt", "-P"] 33 | log: "popnet_0.log" 34 | is_to_stdout: false 35 | clock_rate: 1 36 | 37 | # File configuration. (Not used yet) 38 | bench_file: "./bench.txt" 39 | delayinfo_file: "./delayInfo.txt" 40 | -------------------------------------------------------------------------------- /docs/.gitignore: -------------------------------------------------------------------------------- 1 | 2 | site 3 | html 4 | latex 5 | -------------------------------------------------------------------------------- /docs/docs/02-benchmarks/022-yaml.md: -------------------------------------------------------------------------------- 1 | 2 | # YAML Configuration File 3 | 4 | You can learn the following topics from this page: 5 | 6 | - Create the configuration file (YAML format) of a novel benchmark. 7 | 8 | ## YAML Configuration File Format 9 | 10 | The execution process is controlled by a YAML configuration file. One benchmark must have at least one YAML configuration file. More configuration files can be created to describe different configurations of one benchmark. 11 | 12 | The example structure of the YAML file is as follows: 13 | 14 | ```yaml 15 | # Phase 1 configuration. 16 | phase1: 17 | # Process 0 18 | - cmd: "$BENCHMARK_ROOT/bin/matmul_cu" 19 | args: ["0", "1"] 20 | log: "gpgpusim.0.1.log" 21 | is_to_stdout: false 22 | pre_copy: "$SIMULATOR_ROOT/gpgpu-sim/configs/tested-cfgs/SM2_GTX480/*" 23 | clock_rate: 1 24 | # Process 1 25 | - cmd: "$BENCHMARK_ROOT/bin/matmul_cu" 26 | args: ["1", "0"] 27 | log: "gpgpusim.1.0.log" 28 | is_to_stdout: false 29 | pre_copy: "$SIMULATOR_ROOT/gpgpu-sim/configs/tested-cfgs/SM2_GTX480/*" 30 | clock_rate: 1 31 | ...... 32 | 33 | # Phase 2 configuration. 34 | phase2: 35 | # Process 0 36 | - cmd: "$SIMULATOR_ROOT/popnet_chiplet/build/popnet" 37 | args: ["-A", "2", "-c", "2", "-V", "3", "-B", "12", "-O", "12", "-F", "4", "-L", "1000", "-T", "10000000", "-r", "1", "-I", "../bench.txt", "-R", "0", "-D", "../delayInfo.txt", "-P"] 38 | log: "popnet_0.log" 39 | is_to_stdout: false 40 | clock_rate: 1 41 | ``` 42 | 43 | In the above configuration files, the first-level tags are 44 | 45 | * `phase1` provides the configuration for processes in Phase 1. 46 | * `phase2` provides the configuration for processes in Phase 2. 47 | 48 | Both `phase1` and `phase2` accept a list of process configuration structures. Each structure corresponds to one parallel simulator process. 49 | 50 | Configuration structures provide the following tags: 51 | 52 | - `cmd` ppresents the command of the simulator. A string is accepted. The environment variables `$BENCHMARK_ROOT` and `$SIMULATOR_ROOT` are supported to describe the path of the simulator. 53 | - `args` presents the arguments of the simulator. A list of strings is accepted. The environment variables `$BENCHMARK_ROOT` and `$SIMULATOR_ROOT` are also supported to specify the path of related files. `cmd` and `args` combine the SHELL command to execute one simulator. 54 | - `log` presents the name of the logger. A string is accepted. Neither the absolute path nor the related path is supported. The log file is stored in the sub-directory of each simulation process. 55 | - `is_to_stdout` presents whether the standard output and standard error output of this simulator process are redirected to the standard output of the intercoupled. 56 | - `pre_copy` provides a list of files that should be copied to the sub-directory of this simulation process before calling the simulator. A string is accepted. If there are multiple files to copy, files are separated by space. 57 | - `clock_rate` provides a floating-pointing number as the ratio between chiplet clocks (clocks of simulators) and the system clock (clock of the *interchiplet*). 58 | 59 | > TODO: Change pre_copy to pre_cmd. 60 | 61 | The following commands are supported when writing one benchmark configuration file. 62 | 63 | - `$BENCHMARK_ROOT` presents the root path of the benchmark, specified by the location of the YAML configuration file. 64 | - `$SIMULATOR_ROOT` presents the root path of the LegoSim, set by *setup_env.sh*. 65 | -------------------------------------------------------------------------------- /docs/docs/03-sync-proto/030-net-proto.md: -------------------------------------------------------------------------------- 1 | 2 | # Interconnection Simulation 3 | 4 | LegoSim applies Popnet as the interconnection simulator to provide the latency of each transaction. Popnet simulates the interconnection behavior according to the benchmark file provided by Phase 1. Popnet generates one latency information file, providing the latency of each package. 5 | 6 | The latency of packages is related to the source-destination address and the network congestion. The source-destination address does not change crossing iterations. The same degree of network congestion leads to similar latency information. Hence, the latency information generated by Phase 2 can be used in Phase 1 of the next iteration. 7 | 8 | Each pair of WRITE and READ commands generates one transaction in the interconnection. In particular, WRITE commands with LOCK, UNLOCK, and BARRIER descriptors generate transactions in the interconnection without pairing READ commands. 9 | 10 | ## File format 11 | 12 | LegoSim uses the trace file and the latency information file to communicate with Popnet. 13 | 14 | ### Trace file format 15 | 16 | The trace file is one text file. Each line presents one transaction in the interconnection, as shown below. 17 | 18 | ``` 19 | 20 | ``` 21 | 22 | Fields in the trace file are filled by the value from the WRITE and READ commands. 23 | 24 | | Field in trace file | Field in WRITE command | Field in READ command | Description | 25 | | ---- | ---- | ---- | ---- | 26 | | `src_cycle` | `cycle` | | Start cycle of the transaction from the source's view. | 27 | | `dst_cycle` | | `cycle` | Start cycle of the transaction form the destination's view. | 28 | | `src_x`, `src_y` | `src_x`, `src_y` | `src_x`, `src_y` | Source address. | 29 | | `dst_x`, `dst_y` | `dst_x`, `dst_y` | `dst_x`, `dst_y` | Destination address. | 30 | | `flit_num` | | | Flit number. $\text{ceil}(nbytes / payload\#) + 1$. | 31 | | `desc` | `desc` | `desc` | Behavior of transaction. | 32 | 33 | 34 | The trace file is generated by *interchiplet*. *interchiplet* always keeps packages in the order of the package injection cycle. For example, 35 | 36 | ``` 37 | 2846470 0 0 0 0 1 1251 0 38 | 2847814 0 0 0 1 0 1251 0 39 | 2849309 0 0 0 1 1 1251 0 40 | 2850905 2847725 0 0 0 1 1251 0 41 | 2852501 2849069 0 0 1 0 1251 0 42 | 2854098 2850569 0 0 1 1 1251 0 43 | 2875272 2855527 0 1 0 0 14 0 44 | 2876868 2875644 1 0 0 0 14 0 45 | 2878470 2877240 1 1 0 0 14 0 46 | ``` 47 | 48 | > TODO: flexible format for address. Flexible format for different kinds of PComps and SComps. 49 | 50 | ### Latency information file format 51 | 52 | The latency information file is also one text file. Each line provides the latency information for one transaction in the interconnection, as shown below. 53 | 54 | ``` 55 | [ ...] 56 | ``` 57 | 58 | `cycle` presents the start cycle of one transaction from the source's view as the `src_cycle` field in the trace file. `src_x`, `src_y`, `dst_x`, and `dst_y` present the source and destination address as the trace file. `desc` describes the behavior of the transaction as the trace file. 59 | 60 | For each transaction, latency information provides the latency of each package related to one transaction. For example, normal communication transactions only provide the latency of one package. BARRIER/LOCK/UNLOCK/LAUNCH transactions provide the latency of one request package and one acknowledge package. 61 | 62 | `lat_num` presents the number of provided latency values. Each package has two latency values. `lat_{2i}` and `lat_{2i+1}` are the latency of the `i`-th package from the source's and destination's views. 63 | 64 | | Transaction's behavior | `lat_num` | `lat_0` and `lat_1` | `lat_2` and `lat_3` | 65 | | ---- | :----: | ---- | ---- | 66 | | Normal | 2 | Normal transfer package | | 67 | | LAUNCH | 4 | Request package | Acknowledge package | 68 | | BARRIER | 4 | Request package | Acknowledge package | 69 | | LOCK | 4 | Request package | Acknowledge package | 70 | | UNLOCK | 4 | Request package | Acknowledge package | 71 | 72 | Popnet does not need to sort packages. *interchiplet* will reorder packages according to the package injection cycle when loading files. One example of latency information files is shown below: 73 | 74 | ``` 75 | 2847106 0 0 0 1 0 2 1250 1255 76 | 2848612 0 0 1 0 0 2 1250 1255 77 | 2850107 0 0 1 1 0 2 1250 1260 78 | 2851703 0 0 0 1 0 2 1250 1255 79 | 2853299 0 0 1 0 0 2 1250 1255 80 | 2854896 0 0 1 1 0 2 1250 1260 81 | 2876066 0 1 0 0 0 2 13 18 82 | 2877662 1 0 0 0 0 2 13 18 83 | 2879259 1 1 0 0 0 2 13 23 84 | ``` 85 | 86 | > TODO: flexible format for address. Flexible format for different kinds of PComps and SComps. 87 | 88 | ## Sorting Latency Information 89 | 90 | *interchiplet* needs to rebuild the order of transactions according to the latency information file. 91 | 92 | Transactions are organized by a two-level structure. The first level is a map of addresses and ordered queues of latency information. The second level is one queue of latency information ordered by cycles. 93 | 94 | | | Ordered content | Key of map | Key for ordering | 95 | | ---- | ---- | ---- | ---- | 96 | | Order of Transaction | All latency informations | Source addresses | Transaction start cycle | 97 | | Order of Launch | Latency information of Launch transactions | Destination addresses | Cycle when the request package arrives the destination | 98 | | Order of Lock and unlock | Latency information of Lock/unlock transactions | Destination addresses | Cycle when the request package arrives the destination | 99 | 100 | ## TODO 101 | 102 | The network latency is provided by Phase 2 of the previous iteration. *interchiplet* will load all delay information before starting the simulation processes in Phase 1. When it receives the paired READ and WRITE commands, it will search for the first delay information message with the same source and destination. The matched delay information message will be dropped after use. 103 | 104 | There is no implicit guarantee that the number and the order of transactions do not change crossing the iteration. Because the simulation flow is based on iteration, the difference crossing iteration should reduce as the simulation continues. 105 | 106 | If the network delay from SComps is missing, the network delay only considers the propagation delay, which equals the data amount divided by network bandwidth. In the first iteration, the network delay is determined in the same way. 107 | -------------------------------------------------------------------------------- /docs/docs/03-sync-proto/032-barrier.md: -------------------------------------------------------------------------------- 1 | # Barrier 2 | 3 | You can find the following topics on this page: 4 | 5 | - The sequences of synchronization protocol to handle barrier transactions between processes. 6 | - The algorithm to handle synchronization protocol within *interchiplet*. 7 | - The algorithm to calculate the end cycle of one barrier transaction. 8 | 9 | ## Command syntax 10 | 11 | ``` 12 | # barrier 13 | BARRIER 14 | WRITE 15 | ``` 16 | 17 | `src_x` and `src_y` present the source address of the process that enters the barrier. `uid` specifies the unique ID of the barrier. `count` specifies the number of processes that enter the barrier when the barrier overflows. Non-zero `count` always overrides the number of the barrier. 18 | 19 | The `cycle` field in the WRITE command presents the time when the process enters the barrier. 20 | 21 | The figure below shows the relationship between arguments of APIs and commands. 22 | 23 | ```mermaid 24 | flowchart TB 25 | 26 | subgraph barrier 27 | A1[__src_x] 28 | A2[__src_y] 29 | A3[__uid] 30 | A4[__count] 31 | end 32 | 33 | subgraph BARRIER command 34 | B1[src_x] 35 | B2[src_y] 36 | B3[uid] 37 | B4[count] 38 | end 39 | 40 | subgraph WRITE command 41 | C0[cycle] 42 | C1[src_x] 43 | C2[src_y] 44 | C3[dst_x] 45 | C4[dst_y=0] 46 | C5[nbytes=1] 47 | C6[desc=0x20000+count] 48 | end 49 | 50 | A1 -.-> B1 -.-> C1 51 | A2 -.-> B2 -.-> C2 52 | A3 -.-> B3 -.-> C3 53 | A4 -.-> B4 -.-> C6 54 | 55 | ``` 56 | 57 | ## Command Sequence 58 | 59 | One example of the command sequence is shown below: 60 | 61 | ```mermaid 62 | sequenceDiagram 63 | autonumber 64 | 65 | participant interchiplet 66 | participant SP0 as Simulator
Process 0 67 | participant SP1 as Simulator
Process 1 68 | participant SP2 as Simulator
Process 2 69 | participant SP3 as Simulator
Process 3 70 | 71 | activate SP0 72 | activate SP1 73 | activate SP2 74 | activate SP3 75 | 76 | Note over SP0,SP3: Example starts 77 | 78 | SP1->>interchiplet: BARRIER 0 1 255 4 79 | deactivate SP1 80 | activate interchiplet 81 | Note over interchiplet: Register BARRIER command. 82 | deactivate interchiplet 83 | 84 | SP0->>interchiplet: BARRIER 0 0 255 4 85 | deactivate SP0 86 | activate interchiplet 87 | Note over interchiplet: Register BARRIER command. 88 | deactivate interchiplet 89 | 90 | SP3->>interchiplet: BARRIER 1 1 255 4 91 | deactivate SP3 92 | activate interchiplet 93 | Note over interchiplet: Register BARRIER command. 94 | deactivate interchiplet 95 | 96 | SP2->>interchiplet: BARRIER 1 0 255 4 97 | deactivate SP2 98 | activate interchiplet 99 | Note over interchiplet: 1. Register BARRIER command.
2. Barrier overflows.
3. Send RESULT command to
each Simulator Process. 100 | interchiplet->>SP1: RESULT 0 101 | interchiplet->>SP0: RESULT 0 102 | interchiplet->>SP3: RESULT 0 103 | interchiplet->>SP2: RESULT 0 104 | deactivate interchiplet 105 | 106 | SP1->>interchiplet: WRITE 2305339 0 1 255 0 1 131076 107 | activate interchiplet 108 | Note over interchiplet: Register WRITE command
with the barrier flag. 109 | deactivate interchiplet 110 | 111 | SP0->>interchiplet: WRITE 2410745 0 0 255 0 1 131076 112 | activate interchiplet 113 | Note over interchiplet: Register WRITE command
with the barrier flag. 114 | deactivate interchiplet 115 | 116 | SP3->>interchiplet: WRITE 2330513 1 1 255 0 1 131076 117 | activate interchiplet 118 | Note over interchiplet: Register WRITE command
with the barrier flag. 119 | deactivate interchiplet 120 | 121 | SP2->>interchiplet: WRITE 2331564 1 0 255 0 1 131076 122 | activate interchiplet 123 | Note over interchiplet: 1. Register BARRIER command
with the barrier flag.
2. Barrier overflows and
calculate barrier overflow time.
3. Send SYNC command to
each Simulator Process. 124 | interchiplet->>SP1: SYNC 2411664 125 | activate SP1 126 | interchiplet->>SP0: SYNC 2411659 127 | activate SP0 128 | interchiplet->>SP3: SYNC 2411669 129 | activate SP3 130 | interchiplet->>SP2: SYNC 2411664 131 | activate SP2 132 | deactivate interchiplet 133 | 134 | Note over SP0,SP3: Example ends 135 | 136 | deactivate SP0 137 | deactivate SP1 138 | deactivate SP2 139 | deactivate SP3 140 | ``` 141 | 142 | ## Handle BARRIER Command 143 | 144 | *interchiplet* emulates the function of the barrier. The following diagram shows the flow to handle one BARRIER command. 145 | 146 | ```mermaid 147 | flowchart TB 148 | 149 | A(Start) 150 | B[Register BARRIER command] 151 | C{Check whether
barrier overflows} 152 | E[Send RESULT commands
to each pending
BARRIER command] 153 | Z(End) 154 | 155 | A-->B-->C--"Yes"-->E-->Z 156 | C--"No"-->Z 157 | ``` 158 | 159 | *interchiplet* response one RESULT command without any result for each process that enters the barrier when the barrier overflows. 160 | 161 | > The order of BARRIER does not change by the timing information. 162 | 163 | ## Handle WRITE Command with the Barrier Flag 164 | 165 | In a realistic system, when a process enters a barrier, the process sends one request to a controller, like a mailbox. Then, the process blocks till it receives the acknowledgment from the controller. The location of the controller is configured in Popnet. 166 | 167 | The `cycle` field in the WRITE command with the barrier flag presents the time when the source component sends the barrier requirement to the controller in the system, referenced as `src_cycle`. WRITE commands with the barrier flag do not need to pair with READ commands. 168 | 169 | The SYNC command after one WRITE command with the barrier flag means the source has received acknowledgment. The task or flow in the source can continue after receiving the SYNC command. The execution cycle of the source should be adjusted to the value specified in the cycle field of SYNC commands. 170 | 171 | Latency information provides four latency values (`lat_0`, `lat_1`, `lat_2`, and `lat_3`) for one barrier transaction: 172 | 173 | | | From the source's view | From the destination's view | 174 | | ---- | :----: | :----: | 175 | | **Request package** | `lat_0` | `lat_1` | 176 | | **Acknowledgement package** | `lat_2` | `lat_3` | 177 | 178 | The request package is injected at `src_cycle`. Hence, the request package arrives at the controller at `src_cycle + lat_1`. Then, when the barrier overflows, the controller sends one acknowledgment package to each source component. 179 | 180 | The timing sequence is shown below: 181 | 182 | ```mermaid 183 | sequenceDiagram 184 | autonumber 185 | 186 | participant SP0 as Simulator
Process 0 187 | participant SP1 as Simulator
Process 1 188 | participant SP2 as Simulator
Process 2 189 | 190 | note right of SP2: src_cycle[2] 191 | SP2->>SP0: 192 | note left of SP0: src_cycle[2] + lat_1[2] 193 | 194 | note right of SP1: src_cycle[1] 195 | SP1->>SP0: 196 | note left of SP0: src_cycle[1] + lat_1[1] 197 | 198 | SP0->>SP2: 199 | note right of SP2: src_cycle[1] + lat_1[1] + lat_3[2] 200 | 201 | SP0->>SP1: 202 | note right of SP1: src_cycle[1] + lat_1[1] + lat_3[1] 203 | ``` 204 | 205 | In summary, 206 | 207 | - The barrier overflow time is `max(src_cycle[i] + lat_1[i])`. 208 | - The `cycle` of the SYNC command to the WRITE command with the barrier flag is `barrier overflow time + lat_3`. 209 | -------------------------------------------------------------------------------- /docs/docs/03-sync-proto/035-cycle.md: -------------------------------------------------------------------------------- 1 | # Cycle 2 | 3 | ``` 4 | CYCLE 5 | ``` 6 | 7 | CYCLE command reports the execution cycle of one simulator process to *interchiplet*. This command does not need any response at this time. 8 | 9 | After receiving a CYCLE command, one sub-thread updates the execution cycle with the value provided by the CYCLE command if the new execution cycle is greater than the recorded execution cycle. At last, the maximum execution cycles recorded by all CYCLE commands are reported as the total execution cycle. 10 | 11 | > TODO: Use the cycle command to build up period synchronization. 12 | -------------------------------------------------------------------------------- /docs/docs/03-sync-proto/index.md: -------------------------------------------------------------------------------- 1 | 2 | # Synchronization Protocol 3 | 4 | You can find the following topics on this page: 5 | 6 | - The synchronization protocol between simulation processes. 7 | 8 | ## List of Protocol Commands 9 | 10 | Synchronization protocol can be categorized into functional protocol and timing protocol. 11 | 12 | ### Functional Protocol Commands 13 | 14 | The functional protocol performs the functionality of benchmarks correctly, and commands do not care about the execution cycle. It is used in the functional model of simulators. Functional protocol commands and their syntax are listed below: 15 | 16 | | Command | Arguments | Direction | Location | Pair with | Usage | 17 | | :----------: | --------------------------------- | :-------: | :---------: | :----------: | ----- | 18 | | `BARRIER` | ` ` | Up | Source | -- | The thread at the source enters the barrier. | 19 | | `LAUNCH` | ` ` | Up | Source | `WAITLAUNCH` | The thread at the source requires to execute a task at the destination. | 20 | | `LOCK` | ` ` | Up | Source | `UNLOCK` | The thread at the source requires to locking the mutex. | 21 | | `RECEIVE` | ` ` | Up | Destination | `SEND` | Receive data from the source to the destination. | 22 | | `RESULT` | ` [ ...]` | Down | -- | -- | Response to functional protocol commands with vary-number of results. | 23 | | `SEND` | ` ` | Up | Source | `RECEIVE` | Send data from the source to the destination. | 24 | | `UNLOCK` | ` ` | Up | Source | `LOCK` | The thread at the source unlocks the mutex. | 25 | | `WAITLAUNCH` | ` ` | Up | Destination | `LAUNCH` | The thread at the destiantion waits to launch task. | 26 | 27 | > "Up" means from simulation process to interchiplet. "Down" means from interchiplet to simulation process. 28 | 29 | Every functional protocol command needs one RESULT command as the response. RESULT commands provide a list of result fields whose number is specified by `res_num`. 30 | 31 | As the response to BARRIER, LOCK, UNLOCK, and LAUNCH commands, RESULT commands provide no result. As the response to SEND and RECEIVE commands, RESULT commands should provide the path of the Named Pipe. As the response to WAITLAUNCH commands, RESULT commands should provide the source of LAUNCH commands. 32 | 33 | > TODO: More flexible format for address 34 | 35 | ### Timing Protocol Commands 36 | 37 | The timing protocol synchronizes the execution cycles of simulators. It is used in the timing model of simulators. Timing protocol commands and their syntax are listed below: 38 | 39 | | Command | Arguments | Direction | Location | Pair with | Usage | 40 | | :-----: | --------------------------------------------------------- | :-------: | :---------: | :-------: | ----- | 41 | | `CYCLE` | `` | Up | Any | -- | Report execution time of simulation processes. | 42 | | `READ` | ` ` | Up | Source | `WRITE` | Read transaction request from the source to the destination. | 43 | | `SYNC` | `` | Down | -- | -- | Response to timing protocol commands. | 44 | | `WRITE` | ` ` | Up | Destination | `READ` | Write transaction request from the source to the destination. | 45 | 46 | As described in the [Overview](../index.md), the sequence to perform the communication between processes has been abstracted as the transaction. Hence, the essential read/write operations to lock flags are abstracted into READ/WRITE commands. READ/WRITE commands can be used for both communication and synchronization, such as barrier, lock, and launch. Therefore, **one READ or WRITE command presents a sequence of communication rather than one single package.** `desc` field defines the transaction's behavior. 47 | 48 | Bit [19:16] presents the category of communication. 49 | 50 | | Bit [19:16] | Behavior flag | Transaction's behavior | 51 | | :---------: | :-----------: | ---------------------- | 52 | | `0x0` | | Controlled by the bit [15:0] of `desc`. | 53 | | `0x1` | launch | One request package and one acknowledgment package. | 54 | | `0x2` | barrier | One request package and one acknowledgment package.
Bit [15:0] presents the number of processes when the barrier overflows. | 55 | | `0x4` | lock | One request package and one acknowledgment package. | 56 | | `0x8` | unlock | One request package and one acknowledgment package. | 57 | 58 | READ/WRITE commands need SYNC commands as the response. SYNC commands provide a cycle to specify the end cycle of transactions. 59 | 60 | > TODO: More flexible format for address 61 | 62 | ## APIs vs. Synchronization Protocol 63 | 64 | Each API in benchmarks needs one command from the functional protocol and one from the timing protocol. Some trace-based simulators, like SniperSim, provide separated timing and function models. Hence, the functional protocol should not merge with the timing protocol. 65 | 66 | | Benchmark API | Command from functional protocol | Command from timing protocol | 67 | | :--------------: | :------------------------------: | :---------------------------: | 68 | | `sendMessage` | `SEND` | `WRITE` | 69 | | `receiveMessage` | `RECEIVE` | `READ` | 70 | | `barrier` | `BARRIER` | `WRITE` with the barrier flag | 71 | | `lock` | `LOCK` | `WRITE` with the lock flag | 72 | | `unlock` | `UNLOCK` | `WRITE` with the unlock flag | 73 | | `launch` | `LAUNCH` | `WRITE` with the launch flag | 74 | | `waitLaunch` | `WAITLAUNCH` | `READ` with the launch flag | 75 | 76 | The basic flow for APIs is shown below: 77 | 78 | ```mermaid 79 | flowchart TB 80 | 81 | A[Start] 82 | B[Issue one functional protocol command] 83 | C[Wait RESULT command] 84 | D[Issue one timing protocol command] 85 | E[Wait SYNC command] 86 | F[End] 87 | 88 | A-->B-->C-->D-->E-->F 89 | ``` 90 | 91 | It is not necessary to implement the above flow in one single function. For those simulators that provide one separate function model and timing model, functional protocol commands are handled in the function model, and the timing protocol commands are handled in the timing model. 92 | 93 | 94 | 95 | -------------------------------------------------------------------------------- /docs/docs/04-import-sim/041-snipersim.md: -------------------------------------------------------------------------------- 1 | 2 | # Importing SniperSim 3 | 4 | [SniperSim](http://snipersim.org//w/The_Sniper_Multi-Core_Simulator) is a trace-based CPU simulator that can achieve high speed and reasonable accuracy. 5 | 6 | ## APIs 7 | 8 | APIs are implemented by System Calls. The following system call numbers are assigned to these APIs. 9 | 10 | ```c++ 11 | SYSCALL_LAUNCH = 501, // Launch request. 12 | SYSCALL_WAITLAUNCH = 502, // Waiit launch request. 13 | SYSCALL_BARRIER = 503, // Enter barrier. 14 | SYSCALL_LOCK = 504, // Lock mutex. 15 | SYSCALL_UNLOCK = 505, // Unlock mutex. 16 | SYSCALL_REMOTE_READ = 506, // Read cross chiplet 17 | SYSCALL_REMOTE_WRITE = 507, // Write cross chiplet 18 | ``` 19 | 20 | Each benchmark API corresponds to one system call. All arguments of the benchmark APIs are also the arguments for system calls. 21 | 22 | ## Handle Syscalls 23 | 24 | SniperSim provides separate functional and timing models. Hence, syscalls are handled separately in functional and timing models. 25 | 26 | In the functional model, system calls are handled in file *$SIMULATOR_ROOT/snipersim/sift/recorder/syscall_modeling.cc*. In the timing model, system calls are handled in file *\$SIMULATOR_ROOT/snipersim/common/core/syscall_model.cc*. 27 | 28 | ### Handle SYSCALL_REMOTE_WRITE/SYSCALL_REMOTE_READ 29 | 30 | The flow chart of the function model is as follows: 31 | 32 | ```mermaid 33 | flowchart TD 34 | 35 | subgraph Write Syscall 36 | A1[Issue SEND command] 37 | B1[Wait for RESULT command] 38 | C1[Open PIPE] 39 | D1[Write data to PIPE] 40 | end 41 | 42 | A1-->B1-->C1-->D1 43 | B1-->B1 44 | 45 | subgraph Read Syscall 46 | A2[Issue RECEIVE command] 47 | B2[Wait for RESULT command] 48 | C2[Open PIPE] 49 | D2[Read data from PIPE] 50 | end 51 | 52 | A2-->B2-->C2-->D2 53 | B2-->B2 54 | ``` 55 | 56 | The flow chart of the timing model is as follows: 57 | 58 | ```mermaid 59 | flowchart TD 60 | 61 | subgraph Write Syscall 62 | A1[Get current execution cycle] 63 | B1[Issue WRITE command] 64 | C1[Wait for SYNC command] 65 | D1[Sleep core until cycle specified by SYNC command] 66 | end 67 | 68 | A1-->B1-->C1-->D1 69 | C1-->C1 70 | 71 | subgraph Read Syscall 72 | A2[Get current execution cycle] 73 | B2[Issue READ command] 74 | C2[Wait for SYNC command] 75 | D2[Sleep core until cycle specified by SYNC command] 76 | end 77 | 78 | A2-->B2-->C2-->D2 79 | C2-->C2 80 | ``` 81 | 82 | SniperSim is not a cycle-driven simulator. Hence, the execution cycle cannot be changed by modifying the value of some variables. Instead, one Sleep instruction is injected into the timing model, and the duration of the Sleep instruction equals the gap from the cycle issue one READ/WRITE command to the cycle receiving the corresponding SYNC command. 83 | 84 | ```c++ 85 | // Update simulator time. 86 | ComponentPeriod time_wake_period = *(Sim()->getDvfsManager()->getGlobalDomain()) * end_time; 87 | SubsecondTime time_wake = time_wake_period.getPeriod(); 88 | SubsecondTime sleep_end_time; 89 | Sim()->getSyscallServer()->handleSleepCall(m_thread->getId(), time_wake, start_time, sleep_end_time); 90 | 91 | // Sleep core until specified time. 92 | if (m_thread->reschedule(sleep_end_time, core)) 93 | core = m_thread->getCore(); 94 | 95 | core->getPerformanceModel()->queuePseudoInstruction(new SyncInstruction(sleep_end_time, SyncInstruction::SLEEP)); 96 | ``` 97 | 98 | ### Handle Other System Calls 99 | 100 | Different from SYSCALL_REMOTE_READ and SYSCALL_REMOTE_WRITE, except functional and timing commands, it is not necessary to handle other functionality. 101 | 102 | The flow chart is as follows: 103 | 104 | ```mermaid 105 | flowchart TD 106 | 107 | subgraph Functional model 108 | A1[Issue functional command] 109 | B1[Wait for READ command] 110 | end 111 | 112 | A1-->B1 113 | B1-->B1 114 | 115 | subgraph Timing model 116 | A2[Issue timing command] 117 | B2[Wait for SYNC command] 118 | C2[Sleep core until cycle specified by SYNC command] 119 | end 120 | 121 | A2-->B2-->C2 122 | B2-->B2 123 | ``` 124 | 125 | The mapping between system calls and commands is shown below: 126 | 127 | | System call | Functional command | Timing command | 128 | | -------------------- | :----------------: | :------------: | 129 | | SYSCALL_LAUNCH | `LAUNCH` | `WRITE` | 130 | | SYSCALL_WAITLAUNCH | `WAITLAUNCH` | `READ` | 131 | | SYSCALL_BARRIER | `BARRIER` | `WRITE` | 132 | | SYSCALL_LOCK | `LOCK` | `WRITE` | 133 | | SYSCALL_UNLOCK | `UNLOCK` | `WRITE` | 134 | | SYSCALL_REMOTE_READ | `READ` | `READ` | 135 | | SYSCALL_REMOTE_WRITE | `WRITE` | `WRITE` | 136 | 137 | ## Issue CYCLE command 138 | 139 | Because the CPU always controls the flow of benchmarks, the CPU's execution cycle plays a vital role in the execution cycle of the entire simulation. CYCLE command is issued in file *$SIMULATOR_ROOT/snipersim/common/core/core.cc*. 140 | -------------------------------------------------------------------------------- /docs/docs/04-import-sim/042-gpgpusim.md: -------------------------------------------------------------------------------- 1 | 2 | # Importing GPGPUSim 3 | 4 | [GPGPUSim](http://www.gpgpu-sim.org) is a cycle-accurate model that simulates the micro-architecture of Nvidia GPGPU. 5 | 6 | ## APIs 7 | 8 | In real GPGPU systems, benchmark APIs will be scheduled to particular APIs. For example, DMA units handle communication. The mailbox handles the barrier and lock. Therefore, benchmark APIs should be implemented by CUDA runtime APIs rather than kernels. 9 | 10 | CUDA runtime APIs are declared and implemented in the CUDA kit. GPGPUSim provides a different runtime library **libcudart.so** rather than the standard runtime library from the CUDA kit. When compiling CUDA executable files, nvcc will link the standard runtime library. 11 | 12 | Benchmark APIs are added to the runtime library provided by GPGPUSim. APIs are implemented in file *$SIMULATOR_ROOT/gpgpu-sim/libcuda/cuda_runtime_api.cc*. Hence, when compiling CUDA executable files, the path to search libraries must be redirected to GPGPUSim: 13 | 14 | ``` 15 | nvcc -L$(SIMULATOR_ROOT)/gpgpu-sim/lib/$(GPGPUSIM_CONFIG) --cudart shared $(CUDA_OBJS) -o $(CUDA_TARGET) 16 | ``` 17 | 18 | ### sendMessage and receiveMessage 19 | 20 | The flow chart of `sendMessage` and `receiveMessage` is as follows: 21 | 22 | ```mermaid 23 | flowchart TD 24 | 25 | subgraph Write Syscall 26 | O1(Start) 27 | A1[Copy data from device to host] 28 | B1[Issue SEND command] 29 | C1[Wait for RESULT command] 30 | D1[Open Pipe] 31 | E1[Write data to Pipe] 32 | F1[Get current simulation cycle] 33 | G1[Send WRITE command] 34 | H1[Wait for SYNC command] 35 | I1[Adjust simulation cycle] 36 | Z1(End) 37 | end 38 | 39 | O1-->A1-->B1-->C1-->D1-->E1-->F1-->G1-->H1---->I1-->Z1 40 | C1-->C1 41 | H1-->H1 42 | 43 | subgraph Read Syscall 44 | O2(Start) 45 | A2[Issue RECEIVE command] 46 | B2[Wait for RESULT command] 47 | C2[Open Pipe] 48 | D2[Read data from Pipe] 49 | E2[Get current simulation cycle] 50 | F2[Send READ command] 51 | G2[Wait for SYNC command] 52 | H2[Write data from host to device] 53 | I2[Adjust simulation cycle] 54 | Z2(End) 55 | end 56 | 57 | O2---->A2-->B2-->C2-->D2-->E2-->F2-->G2-->H2-->I2-->Z2 58 | B2-->B2 59 | G2-->G2 60 | ``` 61 | 62 | CUDA provides memory copy APIs to transfer data between the host and the device. 63 | 64 | ```C++ 65 | // From the device to the host, used by sendMessage 66 | cudaMemcpy(interdata, __addr, __nbyte, cudaMemcpyDeviceToHost); 67 | 68 | // From the host to the device, used by receiveMessage 69 | cudaMemcpy(__addr, interdata, __nbyte, cudaMemcpyHostToDevice); 70 | ``` 71 | 72 | GPGPUSim is a cycle-driven simulator whose cycle loop can be found in file *\$SIMULATOR_ROOT/gpgpu-sim/src/gpgpu-sim/gpu-sim.h* and *\$SIMULATOR_ROOT/gpgpu-sim/src/gpgpu-sim/gpu-sim.cc*. 73 | 74 | GPGPUSim applies two variables to record the execution cycles: `gpgpu_sim::gpu_sim_cycle` and `gpgpu_sim::gpu_tot_sim_cycle`. The sum of these two variables presents the real consumed cycle, which should be replaced by the cycle value in the SYNC command. 75 | 76 | When GPGPUSim handles CUDA APIs, the cycle loop has not started. Hence, `gpgpu_sim::gpu_sim_cycle` can be directly modified: 77 | 78 | ```c++ 79 | // Get the current cycle. 80 | gpgpu_sim *gpu = GPGPU_Context()->the_gpgpusim->the_context->get_device()->get_gpgpu(); 81 | long long unsigned int timeNow = gpu->gpu_sim_cycle + gpu->gpu_tot_sim_cycle; 82 | 83 | long long int timeEnd = ... 84 | 85 | // Update the current cycle. 86 | gpu->gpu_tot_sim_cycle = timeEnd - gpu->gpu_sim_cycle; 87 | ``` 88 | 89 | ### Other APIs 90 | 91 | Different from `sendMessage` and `receiveMessage`, except for functional and timing commands, it is not necessary to handle other functionality. 92 | 93 | The flow chart is as follows: 94 | 95 | ```mermaid 96 | flowchart TD 97 | 98 | A1[Issue functional command] 99 | B1[Wait for RESULT command] 100 | 101 | A2[Issue timing command] 102 | B2[Wait for SYNC command] 103 | C2[Change the simulator cycle] 104 | 105 | A1-->B1-->A2-->B2-->C2 106 | B1-->B1 107 | B2-->B2 108 | ``` 109 | 110 | The mapping between APIs and commands is shown below: 111 | 112 | | System call | Functional command | Timing command | 113 | | ---------------- | :----------------: | :------------: | 114 | | `launch` | `LAUNCH` | `WRITE` | 115 | | `waitlaunch` | `WAITLAUNCH` | `READ` | 116 | | `barrier` | `BARRIER` | `WRITE` | 117 | | `lock` | `LOCK` | `WRITE` | 118 | | `unlock` | `UNLOCK` | `WRITE` | 119 | | `receiveMessage` | `READ` | `READ` | 120 | | `sendMessage` | `WRITE` | `WRITE` | 121 | 122 | ## Issue CYCLE command 123 | 124 | The task on the GPU is triggered by CPUs in the system. CPUs prepare the data required by tasks and accept the generated result. The execution cycle of CPUs reflects the execution cycle of GPUs through the synchronization performed by data transmission. Therefore, GPGPUSim does not issue CYCLE commands. 125 | -------------------------------------------------------------------------------- /docs/docs/images/Chiplet Simulator.drawio: -------------------------------------------------------------------------------- 1 | 7Vtbc5s4FP41zLQP6YAkiP2Y2G3azmY3U0+m6aMWFKNdjFxZvu2vrzACjIRTbxJArvtio4ME4jsXnfMJHDiabW44nse3LCKJA9xo48CxA8DQ8+VvJtjmgmDo5oIpp1Eu8irBhP5HlLDotqQRWdQ6CsYSQed1YcjSlISiJsOcs3W92yNL6ned4ykxBJMQJ6b0K41EnEsHvlvJPxI6jYs7e646M8NFZyVYxDhi6z0RfO/AEWdM5EezzYgkGXYFLvm4DwfOlhPjJBXHDKCf5+EQP36Eq/vFzThKvpDp/YUXqMmJbfHEnC3TiGSDXAdeMy5iNmUpTv5gbC6FnhT+Q4TYKl3hpWBSFItZos6SDRUPe8ffsku981VrvFFX3jW2RSMVfPuw38hHAb9oV+N2rWLgQmAurjJFS0GY4MWChoX4A02KOeVPSSJD2RV6SrRgSx6SpyBTVoj5lIin+oFSydI5CJsROW85kJMEC7qqTwQrM52W/SpNygOlzP+j2Py6K5ws1Z1Gd/dSMKbEAUEiJ379N68pPvi+zEzx+pGl4mKxU+6V7OCB+WYHYHFeHk2z/zdrKi1cGjqZMb59+xM7WsdUkMkc76Bdy1BRt5nspsqgJG6FvlaEC7J5WmMmwMWAwvFU5PEC1V5Xflz2ifd8GLkt6QQ066TSRg7siHHyMjRfAT2ooYeAiR7qEjx4SuAFloGHTgg85FoGnn9K4CHLwAtOCDxwRMwLugTv8oTA846IeZ2CNzDAm8TyaaIyZclummbNEIexrDGsQ7D3hGV4ahAaWUsThJ1aYVHH7mN42ztOem586Zo4dWppnlmv3HG6woJYa2s6hkHv7updGqBkFe9ENVOWyr/rlup78LwC38L6XpVpPy/wUbN9dFTgD3pU9uX5KdvvVdnDTpXtPkvZ3q+jbNirsk2e6NNfzqtTd2wpHHvZOzDQ2Dt05OpapnevrxeTgro5c04VHKuV9nIek9o6N6XonE//RLdnUmbnrpT+PQXBbnOIcsfv296Z5hSi+1Rgbwf2eSu8GnrHaCqcgwyiry9GeeqhRmkKLafxgh0mk234k4g14/9K4adUEP6Y+UDvpbPGdPkN9EOjc4C2nAOY/IMC7oKlF6OYzvsH7ZBt1aj9TiOK/zuiVBYEWoko+m5Y5xHFLEZsjCg68dsYUZqI3/YiirnzNf5yddu0eyPLYpYkhPeOol50BQMTxUEDiK2VXKBhE8cC9lzL7Xpnz0HDRo0FMAWWwQTt3IsZ2AaTmQtZABNwbYPJXBltgMm2nT2EDEy6SxLLvNCSJBG+lFhuThL1EBK43SaJ0GRGbUwSjR3bYzmZ1pJEaLKXFsQQveboPYZAk0+0/e0AHcP+3w6AZkFig63Ztl5B86U7G2CyreSAVlZmyLqSw8rKDNlWchQXtgsm37Z1EJlfa51vLo0OKO+lhKsWQrrOpdHBnQircmkjv+k9l0bdvv1ouXOAVpxDp9k7dw6Tc7HROQwevcE5mnj0Zywcsll9QpzDXH2HDd//AA==7VnbctowEP0aHtvxBRvyGCCknWkzmTLTlqeMgoWtVLYYWdz69V1hyXcoCQanU14ArXYt6eye1a7p2MNwc8/RIvjKPEw7luFtOvaoY1k3pgOfUrBNBO6NkQh8TrxEZGaCCfmNlVCrLYmH44KiYIwKsigKZyyK8EwUZIhzti6qzRktrrpAPq4IJjNEq9IfxBNBIu07Rib/hIkf6JVNQ82ESCsrQRwgj61zIvuuYw85YyL5FW6GmErsNC6J3XjPbLoxjiNxjAGdPgb4/uXp6cGeoM338Qh/iz/01d7EVh8Ye3B+NYxYBF8DzpaRh+VjDBgxLgLmswjRL4wtQGiC8AULsVXeQ0vBQBSIkKpZvCHipzT/6KjRNDcz2qgn7wZbPYgE3+aM5HCan8vMdiNtN2eRUBsxAeNBLBAXtzISsgPtZGNCqbJJQJAn34utEsVsyWf4AKA6RhH3sTigZ6cRAMzBLMRwBrDjmCJBVsV9IBXDfqqXuRl+KE+/wuvquStEl2qlHSKVWMgcL321DojAkwXanX8NbC86uQw84jM1dDKMV5gLvDmMchUVZWB1FZVULjE1tdY5ZipRkCOlNmscR6uC413kvXcUu0YRRcdtGcWbaw5qNgfZR+Ygp80cZFe48xigGIPIlGck4VLugkUH6GS8nk5NZKESf1Ji5PhjupckkK6VrgxqikHOkQzqtXqL9y7qdjPn9CwE6t3+Fve9NVQadHvvSLdbJ7pdmT4yAjvMLmanmFjsfilhJPtSVlnwAK5om1NbSIX4wDqlMqrbN0qxmDwxi8z0jCcE64U7jTMHa9O5pvFgSku4cpXXcDClNbgOWrfUhf5F33EvEHxOpdb4PJeFRiw3BvUDTyoNe1wtNgIWPi/jVgqNtIjQ0Na0O2lRkS80ynmjuRvHuhYa7dw45p5QuUyl0avnjwhkuY4BLK63Am6XL+gkGhyvCAPqyKh1KRxu8Axqri9/ZZyDlaUKxXG8eyKKdl8cQ9TQml76HfGxW1P4X5iP9rvlY9uv4U7ly3G3atoLNnyrltdx+4dv1dPU9SnOWwBWXzrqht/6txp+12i94e9WsJxWYIMDiyI2seDsFx4yCvk6JeEc+FcSIUp8qIZGMwAIg3wg4SMzRG/VREg8Ty5T64yiu86Rh0v9i12Xh2vcYZ3NHdX68uH/cYdVvhZr6HFZd7hXd+QarLquoRl3wDD7+zK5J7L/gO27Pw==7V1bd6M2EP41eUwO4u7HTTa73dOkzWl6TncfCRCbFiMX403cX19hC2NGYGRuEg55iRHiNvq+uUgzcKXdLd+/xs5q8Yg9P7xSFe/9Svt8papImdnkX9qy3bfYKm2Yx4FHO+UNz8F/fnYkbd0Enr8udEwwDpNgVWx0cRT5blJoc+IYvxW7veKweNWVM/eZhmfXCdnWvwIvWdCnMJS8/Rc/mC+yKyOF7lk6WWfasF44Hn47atLur7S7GONk/2v5fueHqfAyueyP+1Kx93BjsR8lPAd8/T0y439n+ta7/vPeDu9/ffi0vNbpzf10wg19Ynq3yTYTQYw3keenZ1GutNu3RZD4zyvHTfe+kUEnbYtkGZItRH6+4iiho4jUdDsIwzsc4pg0RDgi7bf0in6c+O+Vz4IOEiLQ8vHST+It6UIPMOltb4tgectHyMq6LI5G59DRoaiYH86cC478oLI7Q46IEeNzsNyETkIeXDVDcv3bl/TXPP31FGPXX6+7FXQHUrWLUj1I+UisdolUzd6EajMi8j3CTrpJ4VSUGo6TBZ7jyAkfMF5RWf3tJ8mWCsvZJLgoSf89SL6nh98YdOvH0Z7P7/TMu41tthGRB/x+vPEjP0O6mR+228qOg6O2Tpw4+ZTqqfyBdm1fglRWu2PWSYz/OWiffKxTWZweaaIrnXjuJ6dkbJVDIvYJeIOfxQuUjTA99AkH5NIHKEEszRQAkjXexK5Pj8pxQkThbI+6rdIO6zOuYyoAdvsz5iA8PGNzXKqXQHZVl4zt2iVIVVckk6p+CVI1TMmkalyCVC1VMqnqM0asd7HvJP5JNyC3+Pd56627iX/uZIvEuwi5V/Cj4BR05yLwOwR7u3tiEExex6Gl39AKKSbLv83LdbIgaPGk4xlwURAnzfTelBcb8000G5pmFifNVJE0s8ZEM+jQiucZOykw8WxontmcPNNE8sweE89giCOeZ2w8PvFsaJ7NOHmmi+QZG19IzDMY9IrnGTtDM/FsaJ5lKxi1RDNEEg2xIYbETIMTIcKZhljH+zdnSSSkKk/BiuXbeuGs0p8edjfL3UPXSfFlL/KHlwEdhZkuWKzqsOtJ6EgX5ZrpPG2EetBGbdaT6vUTb2Br2C0VVPmCkGYB0CHlRjn6AyesWIZiTmuC4JJZz9o/b+v1LBNyZoj1LJ0NVA+z2Nkk9rco8WN3EaxCMqp0J7nYYT/DLKI3kiIv9rgDyQAl+QFOGMwjsun66TVJQ6qFAtcJP9Edy8Dzdl5EmZIr0rcHtVafe1Cm1LTeJs2rvTLFiVKTscRRsFuYGFD3neOJCfao9LZTbOVM1uF0lMqnMTojtX5ywMvd8vbjP6BXXYgh+AFRa5n6yYioxUOFKeoMD8aEh2Z46MdREY4Hc8JDMzzMLhMP1oSHhil0ymUCgp1Wf3SCSNZJHDjbIHqyQWcXdViCRV6GWTd01uvALYrpdCzTlDtnzs52yBSzfASPhsgoGaKsTRCf2JxTGAFCDPXs2RsXkX0GsmJsTr72l9JXklM0PqnCJAjxYi3JIRmfWKFxES/WkpSB8YkVLnGKF2vJCvH4xArXs4SLNTvxkVifvj3dM4L7sOvGufd3HLTkk+Y9LdaYvI6bIXQ12WKV3R++F8RpvaeImLXB+l6TeLUp0uSAjFUxTUJ1lHKjUje2pVuPgCNpGXzeeH6irCN+fV37vXjsM/kzQQfDcgHJKQhOg1kEdKtqIinirsltI00toA51E6NW+UOdrzIXKaOAyvHT3WfG6e7IAP2LhenFNewhaWiyNJQ4KwnoNdPmc+J6m9Ay1cmJk9KJ03iduIrpt4Fyb1n4TDZweBvIC5YaG6jcKKZp9GEC1argtWMbCK9TZwRh/zorqM5Af00WK1hSaC+vFYQTmuLNIFtRP5lBGcygwWsG2+b3tDOD8hdRfAQzyAuWWjNoGln5WdvE5aoFsY7tHrxOnd2D/YHdE2jHSpZB5bVjcAVJvB1j1zsnOyaDHRusgKIdfEb1bgC40CiefeySxsQ+GdjHW8dstM0CbQefUVUyw/Vo4ezLasSmgiee4dPA8OkG3/D1VvJkqQ30pJypytVqrqDXPWe96CUhU+PUi3Y/RRAIagZI+Z5znC1tQlJHSDI+OJIup9xONJKsD46kyynUE42k2QdHEhujs9AaQwlKASCcCOsSR9wvm6vA0TAFLk3hVlvgYsLE4J4LXCw2un3Acwa6cr71BsHkqpKXCelDBrs2G+yOR5ow9hQvTTaHbDzS1OHaj3BpqiOWJnwnkHhpsmvL45GmrcgmTTbRo7Lq4OJrCuodNjlyHO2KWvyuqw4Ok5sZWi0+F4k9EQIngp+oqnXa+l9pt9mV9okJopjAm7lin1a3nTFBm3XEBPjmPimZwCZNTEwQxQTeRIm9k9k/EwyjIyYY0LjIyAR2bmligigm8CYt2BVfOeiaCRZ0apoywYTGRUYmsKk8j5s0Dsrev0nirXj37mbPSZwdM+KNm2zikpTfMAxW6x1eaIzmhnjj1QdoEOwlSQYlk+ztZmbAXIJREq8dZiSPAza7r4ANKdLmVrdSRqigiurzpJskcovQRvVltoqqmAWQXXdUZGTA1yzDXBVuBQVPBKvPO/rOqQVnR2qytmH/umolE9bsIkmyvJHCBp8TqYcnNa+zXVswoRt6sW6wK07rsOCOc9WKVQ4wEIUWqyNOG3D+uIbTsL80lRhIOf1hBVkTlwsuURM2d0gwpHCHs0Lz/hE6nZ4ix0c0ispVto+ydwgGpAjNQz/c6FhfEis6E52OX0GAf8apRFSFBHFj+dQPzPAXL1d0Oh9zUlNdqCnuj5FRPSHOaLE5MJ/JldM7+Ba94nGRzZaOa+y8C0u+PM+QQrVBkuE5c7L8OK9/lV3FovIwaX2V5Tdnp/VB4HC+Ke/suMYsv07Vfdkw3VA/r79V079dd3A3V2d+gYtsxhgnx92J0lg8Ys9Pe/wP -------------------------------------------------------------------------------- /docs/javascripts/mathjax.js: -------------------------------------------------------------------------------- 1 | window.MathJax = { 2 | tex: { 3 | inlineMath: [["\\(", "\\)"]], 4 | displayMath: [["\\[", "\\]"]], 5 | processEscapes: true, 6 | processEnvironments: true 7 | }, 8 | options: { 9 | ignoreHtmlClass: ".*|", 10 | processHtmlClass: "arithmatex" 11 | } 12 | }; 13 | 14 | document$.subscribe(() => { 15 | MathJax.startup.output.clearCache() 16 | MathJax.typesetClear() 17 | MathJax.texReset() 18 | MathJax.typesetPromise() 19 | }) 20 | -------------------------------------------------------------------------------- /docs/mkdocs.yml: -------------------------------------------------------------------------------- 1 | site_name: Documents for LegoSim 2 | 3 | nav: 4 | - index.md 5 | - 01-quick-start.md 6 | - Benchmarks: 7 | - APIs: 8 | - 02-benchmarks/021-apis/index.md 9 | - APIs for CPU: apiProject1/group__apis__for__cpu.md 10 | - APIs for GPU: apiProject1/group__apis__for__cuda.md 11 | - 02-benchmarks/022-yaml.md 12 | - Synchronization Protocol: 13 | - 03-sync-proto/index.md 14 | - 03-sync-proto/030-net-proto.md 15 | - 03-sync-proto/031-communication.md 16 | - 03-sync-proto/032-barrier.md 17 | - 03-sync-proto/033-lock.md 18 | - 03-sync-proto/034-launch.md 19 | - 03-sync-proto/035-cycle.md 20 | - Importing Simulators: 21 | - 04-import-sim/index.md 22 | - 04-import-sim/041-snipersim.md 23 | - 04-import-sim/043-gem5.md 24 | - 04-import-sim/042-gpgpusim.md 25 | - Synchronization protocol interface: apiProject1/group__sync__proto.md 26 | - Pipe communication interface: apiProject1/group__pipe__comm.md 27 | - interchiplet Documents: 28 | - Command Line Parser: apiProject1/group__cmdline.md 29 | - Yaml Configuration Interface: apiProject1/group__benchmark__yaml.md 30 | - Network benchmark interface: apiProject1/group__net__bench.md 31 | - Network latency information interface: apiProject1/group__net__delay.md 32 | - Command handler structures: apiProject1/group__cmd__handler__struct.md 33 | - Command handler functions: apiProject1/group__cmd__handler__func.md 34 | - Source Codes: 35 | - pages list: apiProject1/pages.md 36 | - modules: 37 | - modules list: apiProject1/modules.md 38 | - namespace: 39 | - namespaces list: apiProject1/namespaces.md 40 | - namespace_members: apiProject1/namespace_members.md 41 | - namespace_member_functions: apiProject1/namespace_member_functions.md 42 | - namespace_member_variables: apiProject1/namespace_member_variables.md 43 | - namespace_member_typedefs: apiProject1/namespace_member_typedefs.md 44 | - namespace_member_enums: apiProject1/namespace_member_enums.md 45 | - InterChiplet: apiProject1/namespaceInterChiplet.md 46 | - classes: 47 | - classes list: apiProject1/annotated.md 48 | - classes index: apiProject1/classes.md 49 | - classes hierarchy: apiProject1/hierarchy.md 50 | - class_members: apiProject1/class_members.md 51 | - class_member_functions: apiProject1/class_member_functions.md 52 | - class_member_variables: apiProject1/class_member_variables.md 53 | - class_member_typedefs: apiProject1/class_member_typedefs.md 54 | - class_member_enums: apiProject1/class_member_enums.md 55 | - links: apiProject1/links.md 56 | - BenchmarkConfig: apiProject1/classBenchmarkConfig.md 57 | - CmdLineOptions: apiProject1/classCmdLineOptions.md 58 | - NetworkBenchItem: apiProject1/classNetworkBenchItem.md 59 | - NetworkBenchList: apiProject1/classNetworkBenchList.md 60 | - NetworkDelayItem: apiProject1/classNetworkDelayItem.md 61 | - NetworkDelayMap: apiProject1/classNetworkDelayMap.md 62 | - NetworkDelayStruct: apiProject1/classNetworkDelayStruct.md 63 | - PipeComm: apiProject1/classInterChiplet_1_1PipeComm.md 64 | - PipeCommUnit: apiProject1/classInterChiplet_1_1PipeCommUnit.md 65 | - ProcessConfig: apiProject1/classProcessConfig.md 66 | - ProcessStruct: apiProject1/classProcessStruct.md 67 | - SyncBarrierStruct: apiProject1/classSyncBarrierStruct.md 68 | - SyncClockStruct: apiProject1/classSyncClockStruct.md 69 | - SyncCommand: apiProject1/classInterChiplet_1_1SyncCommand.md 70 | - SyncCommStruct: apiProject1/classSyncCommStruct.md 71 | - SyncLaunchStruct: apiProject1/classSyncLaunchStruct.md 72 | - SyncLockStruct: apiProject1/classSyncLockStruct.md 73 | - SyncPipeStruct: apiProject1/classSyncPipeStruct.md 74 | - SyncStruct: apiProject1/classSyncStruct.md 75 | - files: 76 | - file list: apiProject1/files.md 77 | - functions: apiProject1/functions.md 78 | - macros: apiProject1/macros.md 79 | - variables: apiProject1/variables.md 80 | - interchiplet: apiProject1/dir_a2025b34133129e5724d121abe9a4a4a.md 81 | - interchiplet/includes: apiProject1/dir_943fa6db2bfb09b7dcf1f02346dde40e.md 82 | - interchiplet/srcs: apiProject1/dir_b94c70d771af9f161858c2c4e7b3d1c5.md 83 | - apis_c.h: apiProject1/apis__c_8h.md 84 | - apis_c.cpp: apiProject1/apis__c_8cpp.md 85 | - apis_cu.h: apiProject1/apis__cu_8h.md 86 | - benchmark_yaml.h: apiProject1/benchmark__yaml_8h.md 87 | - cmd_handler.h: apiProject1/cmd__handler_8h.md 88 | - cmd_handler.cpp: apiProject1/cmd__handler_8cpp.md 89 | - cmdline_options.h: apiProject1/cmdline__options_8h.md 90 | - interchiplet.cpp: apiProject1/interchiplet_8cpp.md 91 | - global_define.h: apiProject1/global__define_8h.md 92 | - net_bench.h: apiProject1/net__bench_8h.md 93 | - net_delay.h: apiProject1/net__delay_8h.md 94 | - pipe_comm.h: apiProject1/pipe__comm_8h.md 95 | - sync_protocol.h: apiProject1/sync__protocol_8h.md 96 | - sources: 97 | - apis_c.h: apiProject1/apis__c_8h_source.md 98 | - apis_c.cpp: apiProject1/apis__c_8cpp_source.md 99 | - apis_cu.h: apiProject1/apis__cu_8h_source.md 100 | - benchmark_yaml.h: apiProject1/benchmark__yaml_8h_source.md 101 | - cmd_handler.h: apiProject1/cmd__handler_8h_source.md 102 | - cmdline_options.h: apiProject1/cmdline__options_8h_source.md 103 | - interchiplet.cpp: apiProject1/interchiplet_8cpp_source.md 104 | - global_define.h: apiProject1/global__define_8h_source.md 105 | - net_bench.h: apiProject1/net__bench_8h_source.md 106 | - net_delay.h: apiProject1/net__delay_8h_source.md 107 | - pipe_comm.h: apiProject1/pipe__comm_8h_source.md 108 | - sync_protocol.h: apiProject1/sync__protocol_8h_source.md 109 | - cmd_handler.h: apiProject1/cmd__handler_8cpp_source.md 110 | 111 | theme: 112 | name: "material" 113 | features: 114 | - navigation.indexes 115 | 116 | plugins: 117 | - search 118 | - autorefs 119 | - mkdoxy: 120 | projects: 121 | apiProject1: 122 | src-dirs: 123 | ../interchiplet/includes 124 | ../interchiplet/srcs 125 | full-doc: True 126 | doxy-cfg: 127 | FILE_PATTERNS: "*.cpp *.h*" 128 | EXAMPLE_PATH: "" 129 | RECURSIVE: True 130 | 131 | markdown_extensions: 132 | - pymdownx.highlight 133 | - pymdownx.superfences: 134 | custom_fences: 135 | - name: mermaid 136 | class: mermaid 137 | format: !!python/name:pymdownx.superfences.fence_code_format 138 | - pymdownx.arithmatex: 139 | generic: true 140 | 141 | extra_javascript: 142 | - javascripts/mathjax.js 143 | - https://polyfill.io/v3/polyfill.min.js?features=es6 144 | - https://unpkg.com/mathjax@3/es5/tex-mml-chtml.js 145 | -------------------------------------------------------------------------------- /docs/requirements.txt: -------------------------------------------------------------------------------- 1 | mkdocs 2 | "mkdocstrings[python]" 3 | mkdocs-material 4 | mkdoxy -------------------------------------------------------------------------------- /global_manager_test.cpp: -------------------------------------------------------------------------------- 1 | #include "global_manager.h" 2 | #include 3 | 4 | class GlobalManagerTest : public ::testing::Test { 5 | protected: 6 | void SetUp() override { 7 | gm = std::make_unique(); 8 | } 9 | 10 | void TearDown() override { 11 | gm.reset(); 12 | } 13 | 14 | std::unique_ptr gm; 15 | }; 16 | 17 | TEST_F(GlobalManagerTest, TestIsFinish) { 18 | EXPECT_TRUE(gm->IsFinish()); 19 | } 20 | 21 | TEST_F(GlobalManagerTest, TestAddRequest) { 22 | InterChiplet::AddrType sender = {0, 0}; 23 | InterChiplet::AddrType receiver = {1, 1}; 24 | std::string data = "Hello"; 25 | double senderClock = 100.0; 26 | double frequency = 1.0; 27 | InterChiplet::SyncCommType behavior = InterChiplet::SC_SEND; 28 | 29 | gm->AddRequest(sender, receiver, data, senderClock, frequency, behavior); 30 | EXPECT_FALSE(gm->IsFinish()); 31 | } 32 | 33 | TEST_F(GlobalManagerTest, TestCheckPair) { 34 | InterChiplet::AddrType sender = {0, 0}; 35 | InterChiplet::AddrType receiver = {1, 1}; 36 | std::string data = "Hello"; 37 | double senderClock = 100.0; 38 | double frequency = 1.0; 39 | 40 | gm->AddRequest(sender, receiver, data, senderClock, frequency, InterChiplet::SC_SEND); 41 | gm->AddRequest(receiver, sender, data, senderClock, frequency, InterChiplet::SC_RECEIVE); 42 | 43 | EXPECT_TRUE(gm->CheckPair()); 44 | } 45 | 46 | int main(int argc, char **argv) { 47 | ::testing::InitGoogleTest(&argc, argv); 48 | return RUN_ALL_TESTS(); 49 | } -------------------------------------------------------------------------------- /interchiplet/.gitignore: -------------------------------------------------------------------------------- 1 | 2 | bin/ 3 | lib/ 4 | build/ 5 | -------------------------------------------------------------------------------- /interchiplet/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | 2 | cmake_minimum_required(VERSION 3.10) 3 | enable_language(CUDA) 4 | 5 | # Project name. 6 | project(interchiplet LANGUAGES CUDA CXX) 7 | 8 | # Third-party library 9 | add_subdirectory(thirdparty/yaml-cpp) 10 | add_subdirectory(thirdparty/CLI11) 11 | add_subdirectory(thirdparty/spdlog) 12 | 13 | # Parameter set. 14 | set(EXECUTABLE_OUTPUT_PATH ${PROJECT_SOURCE_DIR}/bin) 15 | set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${PROJECT_SOURCE_DIR}/lib) 16 | include_directories(includes) 17 | 18 | # Import library 19 | # find cppzmq wrapper, installed by make of cppzmq 20 | find_package(Boost REQUIRED COMPONENTS 21 | system 22 | filesystem 23 | program_options 24 | ) 25 | 26 | # Static library: interchiplet_c 27 | add_library(interchiplet_c STATIC 28 | srcs/apis_c.cpp) 29 | 30 | # Static library: interchiplet_cu 31 | # add_library(interchiplet_cu STATIC 32 | # srcs/apis_cu.cu) 33 | 34 | # Executable: interchiplet 35 | add_executable(interchiplet srcs/interchiplet.cpp 36 | srcs/cmd_handler.cpp) 37 | target_include_directories(interchiplet PUBLIC thirdparty/CLI11/include) 38 | target_include_directories(interchiplet PUBLIC thirdparty/spdlog/include) 39 | target_compile_options(interchiplet PUBLIC "-pthread") 40 | target_link_options(interchiplet PUBLIC "-pthread") 41 | target_link_libraries(interchiplet ${Boost_LIBRARIES}) 42 | target_link_libraries(interchiplet yaml-cpp::yaml-cpp) # The library or executable that require yaml-cpp library 43 | -------------------------------------------------------------------------------- /interchiplet/depreciate/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | 2 | cmake_minimum_required(VERSION 3.10) 3 | enable_language(CUDA) 4 | 5 | # Project name. 6 | project(interchiplet LANGUAGES CUDA CXX) 7 | 8 | # Parameter set. 9 | set(EXECUTABLE_OUTPUT_PATH ${PROJECT_SOURCE_DIR}/bin) 10 | set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${PROJECT_SOURCE_DIR}/lib) 11 | include_directories(includes) 12 | 13 | # Import library 14 | # find cppzmq wrapper, installed by make of cppzmq 15 | find_package(cppzmq) 16 | find_package(Boost REQUIRED COMPONENTS 17 | system 18 | filesystem 19 | ) 20 | 21 | # Static library: interchiplet_c 22 | add_library(interchiplet_c STATIC 23 | srcs/apis_c.cpp) 24 | 25 | # Static library: interchiplet_cu 26 | add_library(interchiplet_cu STATIC 27 | srcs/apis_cu.cu) 28 | 29 | # Executable: record_transfer 30 | add_executable(record_transfer srcs/record_transfer.cpp) 31 | target_link_libraries(record_transfer ${Boost_LIBRARIES}) 32 | 33 | # Executable: comm_bridge 34 | add_executable(comm_bridge srcs/comm_bridge.cpp) 35 | target_compile_options(comm_bridge PUBLIC "-pthread") 36 | target_link_options(comm_bridge PUBLIC "-pthread") 37 | #target_link_libraries(comm_bridge ${Boost_LIBRARIES}) 38 | 39 | # Executable: zmq_pro 40 | # Require cppzmq and Boost. If not found, skip. 41 | if (CPPZMQ_FOUND AND Boost_FOUND) 42 | add_executable(zmq_pro 43 | srcs/zmq_pro.cpp 44 | srcs/interchiplet_client.cpp) 45 | 46 | include_directories(${Boost_INCLUDE_DIRS}) 47 | target_link_libraries(zmq_pro cppzmq) 48 | target_link_libraries(zmq_pro ${Boost_LIBRARIES}) 49 | else() 50 | if (CPPZMQ_FOUND) 51 | message(STATUS "Cannot find Boost library, skip target zmq_pro.") 52 | elseif(Boost_FOUND) 53 | message(STATUS "Cannot find cppzmq library, skip target zmq_pro.") 54 | endif() 55 | endif() 56 | -------------------------------------------------------------------------------- /interchiplet/depreciate/interchiplet_client.cpp: -------------------------------------------------------------------------------- 1 | #include "interchiplet_client.h" 2 | 3 | #include 4 | 5 | //与popnet通信部分 6 | namespace nsInterchiplet 7 | { 8 | using namespace std; 9 | 10 | const int MSG_LEN = 1024; 11 | 12 | static zmq::context_t *zmqContext; 13 | static zmq::socket_t *zmqSocket; 14 | static deque msgBuf; 15 | int connectZmq(const string &addr) 16 | { 17 | zmqContext = new zmq::context_t(1); 18 | zmqSocket = new zmq::socket_t(*zmqContext, zmq::socket_type::pair); 19 | zmqSocket->connect(addr); 20 | zmqSocket->send(zmq::str_buffer("ready")); 21 | char buf[MSG_LEN + 1] = {0}; 22 | zmqSocket->recv(buf, MSG_LEN); 23 | return strcmp(buf, "start") == 0 ? 0 : -1; 24 | } 25 | /* int connectZmq() 26 | { 27 | return connectZmq(popnetAddr); 28 | } 29 | int disconnectZmq() 30 | { 31 | zmqSocket->disconnect(popnetAddr); 32 | return 0; 33 | } */ 34 | int closeZmq() 35 | { 36 | zmqSocket->close(); 37 | delete zmqSocket; 38 | delete zmqContext; 39 | return 0; 40 | } 41 | int readAllMsg() 42 | { 43 | auto ret = zmq::recv_multipart(*zmqSocket, std::back_inserter(msgBuf)); 44 | return (int)*ret; 45 | } 46 | int readMsg(string &str) 47 | { 48 | if (msgBuf.empty()) 49 | readAllMsg(); 50 | //str = msgBuf.front().to_string() + '\0'; 51 | char buf[MSG_LEN+1]={0}; 52 | memcpy(buf,msgBuf.front().data(),msgBuf.front().size()); 53 | str=buf; 54 | msgBuf.pop_front(); 55 | return 0; 56 | } 57 | int writeMsg(const string &str) 58 | { 59 | char tmp[MSG_LEN] = {0}; 60 | strcpy(tmp, str.c_str()); 61 | auto r = zmqSocket->send(zmq::str_buffer(tmp)/* , zmq::send_flags::dontwait */); 62 | return *r; 63 | } 64 | } // namespace nsInterchiplet 65 | -------------------------------------------------------------------------------- /interchiplet/depreciate/interchiplet_client.h: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | namespace nsInterchiplet 7 | { 8 | extern const int MSG_LEN; 9 | 10 | int connectZmq(const std::string &addr); 11 | int closeZmq(); 12 | int readMsg(std::string &str); 13 | int writeMsg(const std::string &str); 14 | } -------------------------------------------------------------------------------- /interchiplet/depreciate/record_transfer.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | using namespace std; 10 | 11 | const char CONFIG_PATH[]="changes/zmq_pro.ini"; 12 | const char SUBNET_ITEM[]="subnet_id"; 13 | 14 | int64_t subnet; 15 | void readConfig() 16 | { 17 | if(!boost::filesystem::exists(CONFIG_PATH)){ 18 | cout<<"no config file\n"; 19 | return; 20 | } 21 | boost::property_tree::ptree root,tag; 22 | boost::property_tree::ini_parser::read_ini(CONFIG_PATH,root); 23 | tag=root.get_child("config"); 24 | if(tag.count(SUBNET_ITEM)!=1)return; 25 | subnet=tag.get(SUBNET_ITEM); 26 | } 27 | 28 | int main() 29 | { 30 | readConfig(); 31 | ifstream ifs("message_record.txt"); 32 | ofstream ofs("record_"+to_string(subnet)+".txt"); 33 | string cmd; 34 | int64_t localPort,remotePort,remoteAddr,localCore; 35 | uint64_t t; 36 | while (ifs>>cmd>>localPort>>remoteAddr>>remotePort>>localCore>>t) 37 | { 38 | //命令 本地地址 本地端口 远程地址 远程端口 本地核心 时间(纳秒) 39 | ofs< 7 | #include 8 | #include 9 | std::atomic_int64_t socketNumber(0); 10 | using namespace std; 11 | namespace nsChange 12 | { 13 | // 14 | } 15 | 16 | typedef std::unordered_map> bufMap_t; 17 | bufMap_t buf; 18 | zmq::context_t zmqContext; 19 | zmq::socket_t zmqSocket; 20 | extern "C" int connectZmq(const char *addr) 21 | { 22 | zmqSocket = zmq::socket_t(zmqContext, zmq::socket_type::pair); 23 | zmqSocket.bind(addr); 24 | return 0; 25 | } 26 | extern "C" int disconnectZmq() 27 | { 28 | zmqSocket.close(); 29 | return 0; 30 | } 31 | extern "C" int readAllMsg() 32 | { 33 | std::vector msgBuf; 34 | auto ret = zmq::recv_multipart(zmqSocket, std::back_inserter(msgBuf)); 35 | for (auto &msg : msgBuf) 36 | { 37 | //将消息存入不同socket的buf上 38 | } 39 | return (int)*ret; 40 | } 41 | extern "C" const char *getBuf(int64_t socket) 42 | { 43 | bufMap_t::iterator it = buf.find(socket); 44 | if (it == buf.end()) 45 | return NULL; 46 | else if (it->second.empty()) 47 | return NULL; 48 | else 49 | return it->second.front().c_str(); 50 | } 51 | extern "C" int popBuf(int64_t socket) 52 | { 53 | bufMap_t::iterator it = buf.find(socket); 54 | if (it == buf.end()) 55 | return -1; 56 | else if (it->second.empty()) 57 | return -1; 58 | else 59 | { 60 | it->second.pop_front(); 61 | return 0; 62 | } 63 | } 64 | -------------------------------------------------------------------------------- /interchiplet/depreciate/zmq_pro.cpp: -------------------------------------------------------------------------------- 1 | #include"interchiplet_client.h" 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include 20 | #include 21 | #include 22 | #include 23 | #include 24 | #include 25 | using namespace std; 26 | 27 | ofstream logfile("changes/zmq_pro.log"); 28 | 29 | //read config 30 | int64_t subnet; 31 | string popnetAddr; 32 | int readConfig() 33 | { 34 | const char CONFIG_PATH[]="changes/zmq_pro.ini"; 35 | const char SUBNET_ITEM[]="subnet_id"; 36 | const char INTER_ADDR_ITEM[]="inter_address"; 37 | if(!boost::filesystem::exists(CONFIG_PATH)){ 38 | return -1; 39 | } 40 | boost::property_tree::ptree root,tag; 41 | boost::property_tree::ini_parser::read_ini(CONFIG_PATH,root); 42 | tag=root.get_child("config"); 43 | if(tag.count(SUBNET_ITEM)!=1||tag.count(INTER_ADDR_ITEM)!=1)return -2; 44 | subnet=tag.get(SUBNET_ITEM); 45 | popnetAddr=tag.get(INTER_ADDR_ITEM); 46 | return 0; 47 | } 48 | 49 | //与sniper通信部分 50 | namespace nsSniperConn 51 | { 52 | const size_t BUF_SIZE = 4096; 53 | const int SNIPER_PORT_BASE = 7000; 54 | int listenFd, connfd; 55 | queue lines; 56 | int openPort(int port) 57 | { 58 | const int LISTEN_QUEUE_LEN = 10; 59 | listenFd = socket(AF_INET, SOCK_STREAM, 0); 60 | if (listenFd < 0) 61 | return listenFd; 62 | sockaddr_in serverAddr; 63 | memset(&serverAddr, 0, sizeof(serverAddr)); 64 | serverAddr.sin_family = AF_INET; 65 | serverAddr.sin_addr.s_addr = htonl(INADDR_ANY); 66 | serverAddr.sin_port = htons(port); 67 | int r = bind(listenFd, (sockaddr *)&serverAddr, sizeof(serverAddr)); 68 | if (r < 0) 69 | return -1; 70 | r = listen(listenFd, LISTEN_QUEUE_LEN); 71 | if(r<0)return -2; 72 | return 0; 73 | } 74 | int getConnection() 75 | { 76 | connfd = accept(listenFd, (sockaddr *)NULL, NULL); 77 | return connfd; 78 | } 79 | int receiveLine() 80 | { 81 | static int remain = 0; 82 | static char buf[BUF_SIZE+1] = {0}; 83 | if (lines.empty()) 84 | { 85 | char *end = buf + remain; 86 | char *pos = end; 87 | int r; 88 | while (pos == end) 89 | { 90 | r = recv(connfd, end, BUF_SIZE - remain, 0); 91 | if (r <= 0) 92 | return r; 93 | remain += r; 94 | end = buf + remain; 95 | pos = find(buf, end, '\n'); 96 | //assert(remain < BUF_SIZE); 97 | if(remain>=BUF_SIZE)throw "Message too long"; 98 | } 99 | *pos = '\0'; 100 | lines.push(buf); 101 | char *pos0 = pos + 1; 102 | for (;;) 103 | { 104 | pos = find(pos0, end, '\n'); 105 | if (pos == end) 106 | break; 107 | *pos = '\0'; 108 | lines.push(pos0); 109 | pos0 = pos + 1; 110 | } 111 | remain = distance(pos0, end); 112 | copy(pos0, end, buf); 113 | return lines.size(); 114 | } 115 | return 0; 116 | } 117 | int disconnect() 118 | { 119 | return close(connfd) | close(listenFd); 120 | } 121 | } // namespace nsSniperConn 122 | 123 | int main() 124 | { 125 | int r; 126 | r=readConfig(); 127 | logfile<<"readConfig: "<>cmd; 152 | if(cmd=="send"){ 153 | getline(ss,d); 154 | nsInterchiplet::writeMsg("send "+d); 155 | //控制发送速率,避免丢消息 156 | this_thread::sleep_for(1ms); 157 | cout<<"send_ret"< 4 | 5 | #include 6 | 7 | namespace InterChiplet { 8 | typedef decltype(syscall(0)) syscall_return_t; 9 | 10 | /** 11 | * @defgroup apis_for_cpu APIs for CPU 12 | * @brief APIs for CPU. 13 | * @{ 14 | */ 15 | /** 16 | * @brief Launch application to remote chiplet. 17 | * @param __dst_x Destination address in X-axis. 18 | * @param __dst_y Destination address in Y-axis. 19 | * @param __src_x Source address in X-axis. 20 | * @param __src_y Source address in Y-axis. 21 | */ 22 | syscall_return_t launch(int64_t __dst_x, int64_t __dst_y, int64_t __src_x, int64_t __src_y); 23 | 24 | /** 25 | * @brief Wait launch from remote chiplet. 26 | * @param __dst_x Destination address in X-axis. 27 | * @param __dst_y Destination address in Y-axis. 28 | * @param __src_x Source address in X-axis. Return value. 29 | * @param __src_y Source address in Y-axis. Return value. 30 | */ 31 | syscall_return_t waitLaunch(int64_t __dst_x, int64_t __dst_y, int64_t* __src_x, int64_t* __src_y); 32 | 33 | /** 34 | * @brief Barrier. 35 | * @param __uid Barrier ID. 36 | * @param __src_x Source address in X-axis. 37 | * @param __src_y Source address in Y-axis. 38 | * @param __count Number of threads to barrier. 39 | */ 40 | syscall_return_t barrier(int64_t __uid, int64_t __src_x, int64_t __src_y, int64_t __count = 0); 41 | 42 | /** 43 | * @brief Lock mutex. 44 | * @param __uid Mutex ID. 45 | * @param __src_x Source address in X-axis. 46 | * @param __src_y Source address in Y-axis. 47 | */ 48 | syscall_return_t lock(int64_t __uid, int64_t __src_x, int64_t __src_y); 49 | 50 | /** 51 | * @brief Unlock mutex. 52 | * @param __uid Mutex ID. 53 | * @param __src_x Source address in X-axis. 54 | * @param __src_y Source address in Y-axis. 55 | */ 56 | syscall_return_t unlock(int64_t __uid, int64_t __src_x, int64_t __src_y); 57 | 58 | /** 59 | * @brief Send data to remote chiplet. 60 | * @param __dst_x Destination address in X-axis. 61 | * @param __dst_y Destination address in Y-axis. 62 | * @param __src_x Source address in X-axis. 63 | * @param __src_y Source address in Y-axis. 64 | * @param __addr Data address. 65 | * @param __nbyte Number of bytes. 66 | */ 67 | syscall_return_t sendMessage(int64_t __dst_x, int64_t __dst_y, int64_t __src_x, int64_t __src_y, 68 | void* __addr, int64_t __nbyte); 69 | 70 | /** 71 | * @brief Read data from remote chiplet. 72 | * @param __dst_x Destination address in X-axis. 73 | * @param __dst_y Destination address in Y-axis. 74 | * @param __src_x Source address in X-axis. 75 | * @param __src_y Source address in Y-axis. 76 | * @param __addr Data address. 77 | * @param __nbyte Number of bytes. 78 | */ 79 | syscall_return_t receiveMessage(int64_t __dst_x, int64_t __dst_y, int64_t __src_x, int64_t __src_y, 80 | void* __addr, int64_t __nbyte); 81 | 82 | /** 83 | * @} 84 | */ 85 | } // namespace InterChiplet 86 | -------------------------------------------------------------------------------- /interchiplet/includes/apis_cu.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "cuda_runtime_api.h" 4 | 5 | /** 6 | * @defgroup apis_for_cuda 7 | * @brief APIs for CUDA. 8 | * @{ 9 | */ 10 | /** 11 | * @brief Launch application to remote chiplet. 12 | * @param __dst_x Destination address in X-axis. 13 | * @param __dst_y Destination address in Y-axis. 14 | * @param __src_x Source address in X-axis. 15 | * @param __src_y Source address in Y-axis. 16 | */ 17 | extern __host__ cudaError_t CUDARTAPI launch(int __dst_x, int __dst_y, int __src_x, int __src_y); 18 | 19 | /** 20 | * @brief Wait launch from remote chiplet. 21 | * @param __dst_x Destination address in X-axis. 22 | * @param __dst_y Destination address in Y-axis. 23 | * @param __src_x Source address in X-axis. 24 | * @param __src_y Source address in Y-axis. 25 | */ 26 | extern __host__ cudaError_t CUDARTAPI waitLaunch(int __dst_x, int __dst_y, int* __src_x, 27 | int* __src_y); 28 | 29 | /** 30 | * @brief Barrier. 31 | * @param __uid Barrier ID. 32 | * @param __src_x Source address in X-axis. 33 | * @param __src_y Source address in Y-axis. 34 | * @param __count Number of threads to barrier. 35 | */ 36 | extern __host__ cudaError_t CUDARTAPI barrier(int __uid, int __src_x, int __src_y, int __count = 0); 37 | 38 | /** 39 | * @brief Lock mutex. 40 | * @param __uid Mutex ID. 41 | * @param __src_x Source address in X-axis. 42 | * @param __src_y Source address in Y-axis. 43 | */ 44 | extern __host__ cudaError_t CUDARTAPI lock(int __uid, int __src_x, int __src_y); 45 | 46 | /** 47 | * @brief Unlock mutex. 48 | * @param __uid Mutex ID. 49 | * @param __src_x Source address in X-axis. 50 | * @param __src_y Source address in Y-axis. 51 | */ 52 | extern __host__ cudaError_t CUDARTAPI unlock(int __uid, int __src_x, int __src_y); 53 | 54 | /** 55 | * @brief Send data to remote chiplet. 56 | * @param __dst_x Destination address in X-axis. 57 | * @param __dst_y Destination address in Y-axis. 58 | * @param __src_x Source address in X-axis. 59 | * @param __src_y Source address in Y-axis. 60 | * @param __addr Data address. 61 | * @param __nbyte Number of bytes. 62 | */ 63 | extern __host__ cudaError_t CUDARTAPI sendMessage(int __dst_x, int __dst_y, int __src_x, 64 | int __srx_y, void* __addr, int __nbyte); 65 | 66 | /** 67 | * @brief Read data from remote chiplet. 68 | * @param __dst_x Destination address in X-axis. 69 | * @param __dst_y Destination address in Y-axis. 70 | * @param __src_x Source address in X-axis. 71 | * @param __src_y Source address in Y-axis. 72 | * @param __addr Data address. 73 | * @param __nbyte Number of bytes. 74 | */ 75 | extern __host__ cudaError_t CUDARTAPI receiveMessage(int __dst_x, int __dst_y, int __src_x, 76 | int __srx_y, void* __addr, int __nbyte); 77 | /** 78 | * @} 79 | */ -------------------------------------------------------------------------------- /interchiplet/includes/benchmark_yaml.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | 6 | #include "yaml-cpp/yaml.h" 7 | namespace fs = boost::filesystem; 8 | 9 | /** 10 | * @defgroup benchmark_yaml 11 | * @brief YAML configuration file interface. 12 | * @{ 13 | */ 14 | /** 15 | * @brief Data structure to configure one simulation process. 16 | */ 17 | class ProcessConfig { 18 | public: 19 | /** 20 | * @brief Construct ProcessConfig. 21 | * @param __cmd Command of simulation process. 22 | * @param __args Arguments of simulation process. 23 | * @param __log Path of logging name. 24 | * @param __to_stdout True means redirect output of this process to standard output. 25 | * @param __clock_rate the rate of inter-simulator cycle convert. 26 | * @param __pre_copy Files copy to sub-directory of simulator before executing. 27 | */ 28 | ProcessConfig(const std::string& __cmd, const std::vector& __args, 29 | const std::string& __log, bool __to_stdout, double __clock_rate, 30 | const std::string& __pre_copy) 31 | : m_command(__cmd), 32 | m_args(__args), 33 | m_log_file(__log), 34 | m_to_stdout(__to_stdout), 35 | m_clock_rate(__clock_rate), 36 | m_pre_copy(__pre_copy) {} 37 | 38 | public: 39 | /** 40 | * @brief Command of simulation process. 41 | */ 42 | std::string m_command; 43 | /** 44 | * @brief Arguments of simulation process. 45 | */ 46 | std::vector m_args; 47 | /** 48 | * @brief Path of logging name. 49 | */ 50 | std::string m_log_file; 51 | /** 52 | * @brief True means redirect output of this process to standard output. 53 | */ 54 | bool m_to_stdout; 55 | /** 56 | * @brief the rate of inter-simulator cycle convert. 57 | */ 58 | double m_clock_rate; 59 | /** 60 | * @brief Files copy to sub-directory of simulator before executing. 61 | */ 62 | std::string m_pre_copy; 63 | }; 64 | 65 | /** 66 | * @brief Benchmark configuration structure. 67 | */ 68 | class BenchmarkConfig { 69 | public: 70 | /** 71 | * @brief Parse YAML configuration file to get benchmark configuration. 72 | * @param file_name Path of YAML configuration file. 73 | */ 74 | BenchmarkConfig(const std::string& file_name) { 75 | // Get environment variables. 76 | m_benchmark_root = fs::canonical(fs::path(file_name)).parent_path().string(); 77 | if (getenv("SIMULATOR_ROOT") == NULL) { 78 | std::cerr << "The environment variable SIMULATOR_ROOT is not defined.\n"; 79 | exit(EXIT_FAILURE); 80 | } else { 81 | m_simulator_root = getenv("SIMULATOR_ROOT"); 82 | } 83 | 84 | // Parse YAML file. 85 | YAML::Node config; 86 | try { 87 | config = YAML::LoadFile(file_name); 88 | } catch (const std::exception& e) { 89 | std::cerr << "Error: " << e.what() << "\n"; 90 | exit(EXIT_FAILURE); 91 | } 92 | 93 | // Parse YAML Tree. 94 | yaml_parse(config); 95 | 96 | // Extend environment variables. 97 | extend_env_var(); 98 | } 99 | 100 | /** 101 | * @brief Parse YAML configuration tree. 102 | * @param config Top node of YAML Tree. 103 | */ 104 | void yaml_parse(const YAML::Node& config) { 105 | m_phase1_proc_cfg_list = yaml_parse_phase(config["phase1"]); 106 | m_phase2_proc_cfg_list = yaml_parse_phase(config["phase2"]); 107 | } 108 | 109 | private: 110 | /** 111 | * @brief Parse YAML configuration tree from "phase1" or "phase2". 112 | * @param config "phase1" or "phase2" node of YAML. 113 | */ 114 | std::vector yaml_parse_phase(const YAML::Node& config) { 115 | std::vector proc_list; 116 | for (YAML::const_iterator it = config.begin(); it != config.end(); it++) { 117 | proc_list.push_back(yaml_parse_process(*it)); 118 | } 119 | return proc_list; 120 | } 121 | 122 | /** 123 | * @brief Parse YAML configuration tree below "phase1" or "phase2". 124 | * @param config node below "phase1" or "phase2" of YAML. 125 | */ 126 | ProcessConfig yaml_parse_process(const YAML::Node& config) { 127 | std::string pre_copy; 128 | if (config["pre_copy"]) { 129 | pre_copy = config["pre_copy"].as(); 130 | } 131 | return ProcessConfig(config["cmd"].as(), 132 | config["args"].as >(), 133 | config["log"].as(), config["is_to_stdout"].as(), 134 | config["clock_rate"].as(), pre_copy); 135 | } 136 | 137 | /** 138 | * @brief Extend Environment Variables. 139 | * 140 | * Replace $SIMULATOR_ROOT and $BENCHMARK_ROOT with absoluate address. 141 | */ 142 | void extend_env_var() { 143 | for (ProcessConfig& config : m_phase1_proc_cfg_list) { 144 | extend_env_var_proc(config); 145 | } 146 | for (ProcessConfig& config : m_phase2_proc_cfg_list) { 147 | extend_env_var_proc(config); 148 | } 149 | } 150 | 151 | /** 152 | * @brief Extend Environment Variables in one process configuration. 153 | * 154 | * Replace $SIMULATOR_ROOT and $BENCHMARK_ROOT with absoluate address. 155 | */ 156 | void extend_env_var_proc(ProcessConfig& proc_config) { 157 | extend_env_var_string(proc_config.m_command); 158 | extend_env_var_string(proc_config.m_log_file); 159 | for (std::string& arg : proc_config.m_args) { 160 | extend_env_var_string(arg); 161 | } 162 | } 163 | 164 | /** 165 | * @brief Extend Environment Variables in one string. 166 | * 167 | * Replace $SIMULATOR_ROOT and $BENCHMARK_ROOT with absoluate address. 168 | */ 169 | void extend_env_var_string(std::string& __str) { 170 | std::size_t find_pos; 171 | while ((find_pos = __str.find("$SIMULATOR_ROOT")) != std::string::npos) { 172 | __str = __str.replace(find_pos, 15, m_simulator_root); 173 | } 174 | while ((find_pos = __str.find("$BENCHMARK_ROOT")) != std::string::npos) { 175 | __str = __str.replace(find_pos, 15, m_benchmark_root); 176 | } 177 | } 178 | 179 | public: 180 | /** 181 | * @brief Environments. 182 | */ 183 | std::string m_benchmark_root; 184 | std::string m_simulator_root; 185 | /** 186 | * @brief List of configuration structures of phase 1. 187 | */ 188 | std::vector m_phase1_proc_cfg_list; 189 | /** 190 | * @brief List of configuration structures of phase 2. 191 | */ 192 | std::vector m_phase2_proc_cfg_list; 193 | }; 194 | /** 195 | * @} 196 | */ 197 | -------------------------------------------------------------------------------- /interchiplet/includes/cmdline_options.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | #include "CLI/CLI.hpp" 6 | 7 | /** 8 | * @defgroup cmdline 9 | * @brief Command line parser. 10 | * @{ 11 | */ 12 | /** 13 | * @brief Options from command line. 14 | */ 15 | class CmdLineOptions { 16 | public: 17 | /** 18 | * @brief Constructor. 19 | */ 20 | CmdLineOptions() 21 | : m_bench(), m_cwd(), m_timeout_threshold(5), m_err_rate_threshold(0.005), m_debug(false) {} 22 | 23 | /** 24 | * @brief Read options from command line. 25 | * @param argc Number of argument. 26 | * @param argv String of argument. 27 | */ 28 | int parse(int argc, const char* argv[]) { 29 | CLI::App app{"Lego Chiplet Simulator"}; 30 | app.add_option("bench", m_bench, "Benchmark configuration file (.yml)") 31 | ->required() 32 | ->check(CLI::ExistingFile); 33 | app.add_option("-t,--timeout", m_timeout_threshold, "Time out threshold, in time of round.") 34 | ->check(CLI::PositiveNumber); 35 | app.add_option("-e,--error", m_err_rate_threshold, "Error rate when quit simulation."); 36 | app.add_option("--cwd", m_cwd, "Woring directory for simulation.") 37 | ->check(CLI::ExistingPath); 38 | app.add_flag("--debug", m_debug, "Print debug information."); 39 | 40 | try { 41 | app.parse(argc, argv); 42 | } catch (const CLI::ParseError& e) { 43 | int ret = app.exit(e); 44 | exit(ret); 45 | } 46 | 47 | return 0; 48 | } 49 | 50 | public: 51 | /** 52 | * @brief Path of benchmark configuration yaml. 53 | */ 54 | std::string m_bench; 55 | /** 56 | * @brief New working directory. 57 | */ 58 | std::string m_cwd; 59 | 60 | /** 61 | * @brief Timeout threshold, in term of round. 62 | */ 63 | long m_timeout_threshold; 64 | /** 65 | * @brief Error rate threshold, used to quit iteration. 66 | */ 67 | double m_err_rate_threshold; 68 | 69 | /** 70 | * @brief Print debug information. 71 | */ 72 | bool m_debug; 73 | }; 74 | /** 75 | * @} 76 | */ 77 | -------------------------------------------------------------------------------- /interchiplet/includes/global_define.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | 7 | namespace InterChiplet { 8 | /** 9 | * @brief Syscall ID used in CPU/GPU. 10 | */ 11 | enum SysCallID { 12 | // SYSCALL_TEST_CHANGE = 500, // Test 13 | // SYSCALL_REG_FUNC = 503, // Send register function to pin (depreciate) 14 | // SYSCALL_CONNECT = 504, // Setup connection. 15 | // SYSCALL_DISCONNECT = 505, // Stop connection. 16 | // SYSCALL_GET_LOCAL_ADDR = 506, // Get address of current processor. 17 | // SYSCALL_CHECK_REMOTE_READ = 507, // Check remote read 18 | 19 | SYSCALL_LAUNCH = 501, // Launch request. 20 | SYSCALL_WAITLAUNCH = 502, // Waiit launch request. 21 | SYSCALL_BARRIER = 503, // Enter barrier. 22 | SYSCALL_LOCK = 504, // Lock mutex. 23 | SYSCALL_UNLOCK = 505, // Unlock mutex. 24 | SYSCALL_REMOTE_READ = 506, // Read cross chiplet 25 | SYSCALL_REMOTE_WRITE = 507, // Write cross chiplet 26 | }; 27 | 28 | /** 29 | * @brief Time type used between simulators. 30 | */ 31 | typedef unsigned long long TimeType; 32 | 33 | /** 34 | * @brief Time type used by interchiplet module. 35 | */ 36 | typedef double InnerTimeType; 37 | 38 | /** 39 | * @brief Address type; 40 | */ 41 | typedef std::vector AddrType; 42 | 43 | #define DIM_X(addr) (addr[0]) 44 | #define DIM_Y(addr) (addr[1]) 45 | #define UNSPECIFIED_ADDR(addr) ((addr[0]) < 0 && (addr[1]) < 0) 46 | 47 | /** 48 | * @brief Type of synchronization command between simulators. 49 | */ 50 | enum SyncCommType { 51 | SC_CYCLE, 52 | SC_SEND, 53 | SC_RECEIVE, 54 | SC_BARRIER, 55 | SC_LOCK, 56 | SC_UNLOCK, 57 | SC_LAUNCH, 58 | SC_WAITLAUNCH, 59 | SC_READ, 60 | SC_WRITE, 61 | SC_SYNC, 62 | SC_RESULT, 63 | }; 64 | 65 | /** 66 | * @brief Behavior descriptor of synchronization protocol. 67 | */ 68 | enum SyncProtocolDesc { 69 | /** 70 | * @brief Acknowledge. bit 0. 71 | */ 72 | SPD_ACK = 0x01, 73 | /** 74 | * @brief Synchronization before data transmission. bit 1. 75 | */ 76 | SPD_PRE_SYNC = 0x02, 77 | /** 78 | * @brief Synchronization after data transmission. bit 2. 79 | */ 80 | SPD_POST_SYNC = 0x04, 81 | /** 82 | * @brief Launch behavior. bit 16. 83 | */ 84 | SPD_LAUNCH = 0x10000, 85 | /** 86 | * @brief Barrier behavior. bit 17. 87 | */ 88 | SPD_BARRIER = 0x20000, 89 | /** 90 | * @brief Lock behavior. bit 18. 91 | */ 92 | SPD_LOCK = 0x40000, 93 | /** 94 | * @brief Lock behavior. bit 19. 95 | */ 96 | SPD_UNLOCK = 0x80000, 97 | }; 98 | 99 | /** 100 | * @brief Structure of synchronization command. 101 | */ 102 | class SyncCommand { 103 | public: 104 | /** 105 | * @brief Type of synchronization command. 106 | */ 107 | SyncCommType m_type; 108 | /** 109 | * @brief Cycle to send/receive command. 110 | */ 111 | InnerTimeType m_cycle; 112 | /** 113 | * @brief Cycle convert rate. 114 | */ 115 | double m_clock_rate; 116 | /** 117 | * @brief Source address. 118 | */ 119 | AddrType m_src; 120 | /** 121 | * @brief Destiantion address in X-axis. 122 | */ 123 | AddrType m_dst; 124 | /** 125 | * @brief Number of bytes to write. 126 | */ 127 | int m_nbytes; 128 | /** 129 | * @brief Descriptor of synchronization behavior. 130 | */ 131 | long m_desc; 132 | 133 | /** 134 | * @brief List of result strings. 135 | */ 136 | std::vector m_res_list; 137 | 138 | /** 139 | * @brief File descriptor to write response of this command. 140 | * 141 | * For example, if one entity presents READ command, the SYNC command to response this READ 142 | * command should to send to this file descriptor. 143 | */ 144 | int m_stdin_fd; 145 | }; 146 | } // namespace InterChiplet 147 | -------------------------------------------------------------------------------- /interchiplet/includes/net_bench.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | 6 | #include "global_define.h" 7 | 8 | #define PAC_PAYLOAD_BIT 512 9 | #define PAC_PAYLOAD_BYTE (PAC_PAYLOAD_BIT / 8) 10 | 11 | /** 12 | * @defgroup net_bench 13 | * @brief Network benchmark interface. 14 | * @{ 15 | */ 16 | /** 17 | * @brief Structure of one package in network. 18 | */ 19 | class NetworkBenchItem { 20 | public: 21 | /** 22 | * @brief Package injection cycle from the source side. 23 | */ 24 | InterChiplet::InnerTimeType m_src_cycle; 25 | /** 26 | * @brief Package injection cycle from the destination side. 27 | */ 28 | InterChiplet::InnerTimeType m_dst_cycle; 29 | /** 30 | * @brief Packate id. (Not used yet.) 31 | */ 32 | uint64_t m_id; 33 | /** 34 | * @brief Source address. 35 | */ 36 | InterChiplet::AddrType m_src; 37 | /** 38 | * @brief Destination address. 39 | */ 40 | InterChiplet::AddrType m_dst; 41 | /** 42 | * @brief Size of package in bytes. 43 | */ 44 | int m_pac_size; 45 | /** 46 | * @brief Synchronization protocol descriptor. 47 | */ 48 | long m_desc; 49 | 50 | public: 51 | /** 52 | * @brief Construct Empty NetworkBenchItem. 53 | */ 54 | NetworkBenchItem() {} 55 | 56 | /** 57 | * @brief Construct NetworkBenchItem from SyncCommand. 58 | * @param __src_cmd Structure of source command. 59 | * @param __dst_cmd Structure of destination command. 60 | */ 61 | NetworkBenchItem(const InterChiplet::SyncCommand& __src_cmd, 62 | const InterChiplet::SyncCommand& __dst_cmd) 63 | : m_src_cycle(__src_cmd.m_cycle), 64 | m_dst_cycle(__dst_cmd.m_cycle), 65 | m_dst(__src_cmd.m_dst), 66 | m_src(__src_cmd.m_src), 67 | m_pac_size(1), 68 | m_desc(__src_cmd.m_desc | __dst_cmd.m_desc) { 69 | // Calculate the number of flit. 70 | // One head flit is required any way. 71 | m_pac_size = __src_cmd.m_nbytes / PAC_PAYLOAD_BYTE + 72 | ((__src_cmd.m_nbytes % PAC_PAYLOAD_BYTE) > 0 ? 1 : 0) + 1; 73 | } 74 | 75 | /** 76 | * @brief Construct NetworkBenchItem from SyncCommand. 77 | * @param __src_cmd Structure of source command. 78 | */ 79 | NetworkBenchItem(const InterChiplet::SyncCommand& __src_cmd) 80 | : m_src_cycle(__src_cmd.m_cycle), 81 | m_dst_cycle(__src_cmd.m_cycle), 82 | m_dst(__src_cmd.m_dst), 83 | m_src(__src_cmd.m_src), 84 | m_pac_size(1), 85 | m_desc(__src_cmd.m_desc) { 86 | // Calculate the number of flit. 87 | // One head flit is required any way. 88 | m_pac_size = __src_cmd.m_nbytes / PAC_PAYLOAD_BYTE + 89 | ((__src_cmd.m_nbytes % PAC_PAYLOAD_BYTE) > 0 ? 1 : 0) + 1; 90 | } 91 | 92 | /** 93 | * @brief Overloading operator <<. 94 | * 95 | * Write NetworkBenchItem to output stream. 96 | */ 97 | friend std::ostream& operator<<(std::ostream& os, const NetworkBenchItem& __item) { 98 | os << __item.m_src_cycle << " " << __item.m_dst_cycle << " " << DIM_X(__item.m_src) << " " 99 | << DIM_Y(__item.m_src) << " " << DIM_X(__item.m_dst) << " " << DIM_Y(__item.m_dst) << " " 100 | << __item.m_pac_size << " " << __item.m_desc; 101 | return os; 102 | } 103 | 104 | /** 105 | * @brief Overloading operator >>. 106 | * 107 | * Read NetworkBenchItem from input stream. 108 | */ 109 | friend std::istream& operator>>(std::istream& os, NetworkBenchItem& __item) { 110 | os >> __item.m_src_cycle >> __item.m_dst_cycle; 111 | long src_x, src_y, dst_x, dst_y; 112 | os >> src_x >> src_y >> dst_x >> dst_y; 113 | __item.m_src.push_back(src_x); 114 | __item.m_src.push_back(src_y); 115 | __item.m_dst.push_back(dst_x); 116 | __item.m_dst.push_back(dst_y); 117 | os >> __item.m_pac_size >> __item.m_desc; 118 | return os; 119 | } 120 | }; 121 | 122 | /** 123 | * @brief List of network benchmark item. 124 | */ 125 | class NetworkBenchList : public std::multimap { 126 | public: 127 | /** 128 | * @brief Construct NetworkBenchList. 129 | */ 130 | NetworkBenchList() : std::multimap() {} 131 | 132 | /** 133 | * @brief Insert item into list. 134 | * 135 | * Take the start cycle on source side as ordering key. 136 | */ 137 | void insert(const NetworkBenchItem& __item) { 138 | std::multimap::insert( 139 | std::pair(__item.m_src_cycle, __item)); 140 | } 141 | 142 | /** 143 | * @brief Dump benchmark list to specified file. 144 | * @param __file_name Path to benchmark file. 145 | * @param __clock_rate Clock ratio (Simulator clock/Interchiplet clock). 146 | */ 147 | void dumpBench(const std::string& __file_name, double __clock_rate) { 148 | std::ofstream bench_of(__file_name, std::ios::out); 149 | for (auto& it : *this) { 150 | bench_of << static_cast(it.second.m_src_cycle * __clock_rate) 151 | << " " 152 | << static_cast(it.second.m_dst_cycle * __clock_rate) 153 | << " " << DIM_X(it.second.m_src) << " " << DIM_Y(it.second.m_src) << " " 154 | << DIM_X(it.second.m_dst) << " " << DIM_Y(it.second.m_dst) << " " 155 | << it.second.m_pac_size << " " << it.second.m_desc << std::endl; 156 | } 157 | bench_of.flush(); 158 | bench_of.close(); 159 | } 160 | }; 161 | /** 162 | * @} 163 | */ 164 | -------------------------------------------------------------------------------- /interchiplet/srcs/apis_c.cpp: -------------------------------------------------------------------------------- 1 | 2 | #include "apis_c.h" 3 | 4 | #include "global_define.h" 5 | 6 | namespace InterChiplet { 7 | syscall_return_t barrier(int64_t __uid, int64_t __src_x, int64_t __src_y, int64_t __count) { 8 | int ret_code = syscall(SYSCALL_BARRIER, __uid, __src_x, __src_y, __count); 9 | return ret_code; 10 | } 11 | 12 | syscall_return_t lock(int64_t __uid, int64_t __src_x, int64_t __src_y) { 13 | int ret_code = syscall(SYSCALL_LOCK, __uid, __src_x, __src_y); 14 | return ret_code; 15 | } 16 | 17 | syscall_return_t unlock(int64_t __uid, int64_t __src_x, int64_t __src_y) { 18 | int ret_code = syscall(SYSCALL_UNLOCK, __uid, __src_x, __src_y); 19 | return ret_code; 20 | } 21 | 22 | syscall_return_t launch(int64_t __dst_x, int64_t __dst_y, int64_t __src_x, 23 | int64_t __src_y) { 24 | int ret_code = syscall(SYSCALL_LAUNCH, __dst_x, __dst_y, __src_x, __src_y); 25 | return ret_code; 26 | } 27 | 28 | syscall_return_t waitLaunch(int64_t __dst_x, int64_t __dst_y, int64_t* __src_x, 29 | int64_t* __src_y) { 30 | int ret_code = syscall(SYSCALL_WAITLAUNCH, __dst_x, __dst_y, __src_x, __src_y); 31 | return ret_code; 32 | } 33 | 34 | syscall_return_t sendMessage(int64_t __dst_x, int64_t __dst_y, int64_t __src_x, int64_t __src_y, 35 | void* __addr, int64_t __nbyte) { 36 | int ret_code = 37 | syscall(SYSCALL_REMOTE_WRITE, __dst_x, __dst_y, __src_x, __src_y, __addr, __nbyte); 38 | return ret_code; 39 | } 40 | syscall_return_t receiveMessage(int64_t __dst_x, int64_t __dst_y, int64_t __src_x, int64_t __src_y, 41 | void* __addr, int64_t __nbyte) { 42 | int ret_code = 43 | syscall(SYSCALL_REMOTE_READ, __dst_x, __dst_y, __src_x, __src_y, __addr, __nbyte); 44 | return ret_code; 45 | } 46 | } // namespace InterChiplet 47 | -------------------------------------------------------------------------------- /patch.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | if [[ -n "${SIMULATOR_ROOT}" ]]; then 4 | echo "SIMULATOR_ROOT is: ${SIMULATOR_ROOT}" 5 | else 6 | echo "The environment variable SIMULATOR_ROOT is not defined." 7 | exit 8 | fi 9 | 10 | cd ${SIMULATOR_ROOT} 11 | rm -rf .changed_files 12 | mkdir .changed_files 13 | 14 | # Pathc for Sniper 15 | cd ${SIMULATOR_ROOT}/snipersim 16 | git diff > ../interchiplet/patch/snipersim.diff 17 | snipersim_changed_file_list="$(git diff --name-only)" 18 | 19 | cd ${SIMULATOR_ROOT} 20 | file_list=($snipersim_changed_file_list) 21 | echo ${#file_list[@]} "Files has changed." 22 | for item in "${file_list[@]}"; do 23 | echo $item 24 | cp --parent snipersim/$item .changed_files/ 25 | done 26 | 27 | # Patch for GPGPUSim 28 | cd ${SIMULATOR_ROOT}/gpgpu-sim 29 | git diff > ../interchiplet/patch/gpgpu-sim.diff 30 | gpgpu_sim_changed_file_list="$(git diff --name-only)" 31 | 32 | cd ${SIMULATOR_ROOT} 33 | file_list=($gpgpu_sim_changed_file_list) 34 | echo ${#file_list[@]} "Files has changed." 35 | for item in "${file_list[@]}"; do 36 | echo $item 37 | cp --parent gpgpu-sim/$item .changed_files/ 38 | done 39 | 40 | # Patch for GEM5 41 | cd ${SIMULATOR_ROOT}/gem5 42 | git diff > ../interchiplet/patch/gem5.diff 43 | gem5_changed_file_list="$(git diff --name-only)" 44 | 45 | cd ${SIMULATOR_ROOT} 46 | file_list=($gem5_changed_file_list) 47 | echo ${#file_list[@]} "Files has changed." 48 | for item in "${file_list[@]}"; do 49 | echo $item 50 | cp --parent gem5/$item .changed_files/ 51 | done 52 | -------------------------------------------------------------------------------- /setup_env.sh: -------------------------------------------------------------------------------- 1 | export PATH=$PATH:/usr/local/cuda/bin 2 | #export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/cuda/lib:/usr/local/cuda/lib64 3 | export CUDA_INSTALL_PATH=/usr/local/cuda 4 | export SIMULATOR_ROOT="$(pwd)" 5 | 6 | source gpgpu-sim/setup_environment 7 | -------------------------------------------------------------------------------- /test/test_benchmark_config.cpp: -------------------------------------------------------------------------------- 1 | #include "C:\Users\win10\Desktop\Code\scheme3\includes\benchmark_yaml.h" 2 | #include 3 | #include 4 | 5 | TEST(BenchmarkConfigTest, TestParseConfig) { 6 | std::ofstream file("test_config.yaml"); 7 | file << "cmd: sim_command\n" 8 | << "args: [arg1, arg2]\n" 9 | << "log: log.txt\n" 10 | << "is_to_stdout: true\n" 11 | << "clock_rate: 1.0\n" 12 | << "pre_copy: pre_copy\n" 13 | << "chip_frequency: 1000"; 14 | file.close(); 15 | 16 | BenchmarkConfig config("test_config.yaml"); 17 | EXPECT_EQ(config.m_proc_cfg_list.size(), 1); 18 | EXPECT_EQ(config.m_proc_cfg_list[0].m_command, "sim_command"); 19 | EXPECT_EQ(config.m_proc_cfg_list[0].m_args.size(), 2); 20 | EXPECT_EQ(config.m_proc_cfg_list[0].m_log_file, "log.txt"); 21 | EXPECT_EQ(config.m_proc_cfg_list[0].m_to_stdout, true); 22 | EXPECT_EQ(config.m_proc_cfg_list[0].m_clock_rate, 1.0); 23 | EXPECT_EQ(config.m_proc_cfg_list[0].m_pre_copy, "pre_copy"); 24 | EXPECT_EQ(config.m_proc_cfg_list[0].m_chip_frequency, 1000); 25 | } -------------------------------------------------------------------------------- /test/test_cmd_handler.cpp: -------------------------------------------------------------------------------- 1 | #include "cmd_handler.h" 2 | #include "global_manager.h" 3 | #include 4 | 5 | class CmdHandlerTest : public ::testing::Test { 6 | protected: 7 | GlobalManager gm; 8 | 9 | void SetUp() override { 10 | gm = GlobalManager(); 11 | } 12 | }; 13 | 14 | TEST_F(CmdHandlerTest, TestHandleSendCmd) { 15 | InterChiplet::SyncCommand cmd; 16 | cmd.m_type = InterChiplet::SC_SEND; 17 | cmd.m_src = {"Sim1"}; 18 | cmd.m_dst = {"Sim2"}; 19 | cmd.m_cycle = 10.0; 20 | cmd.m_clock_rate = 1.0; 21 | cmd.m_nbytes = 100; 22 | handle_send_cmd(cmd, &gm); 23 | EXPECT_EQ(gm.requestList.size(), 1); 24 | } 25 | 26 | TEST_F(CmdHandlerTest, TestHandleReceiveCmd) { 27 | InterChiplet::SyncCommand cmd; 28 | cmd.m_type = InterChiplet::SC_RECEIVE; 29 | cmd.m_src = {"Sim2"}; 30 | cmd.m_dst = {"Sim1"}; 31 | cmd.m_cycle = 10.0; 32 | cmd.m_clock_rate = 1.0; 33 | cmd.m_nbytes = 100; 34 | handle_receive_cmd(cmd, &gm); 35 | EXPECT_EQ(gm.requestList.size(), 1); 36 | } -------------------------------------------------------------------------------- /test/test_cmdline_options: -------------------------------------------------------------------------------- 1 | #include "cmdline_options.h" 2 | #include 3 | 4 | TEST(CmdLineOptionsTest, TestParseOptions) { 5 | const char* argv[] = {"program", "--bench", "bench.yaml", "--cwd", "/tmp", "--timeout", "10", "--error", "0.01", "--debug"}; 6 | int argc = sizeof(argv) / sizeof(argv[0]); 7 | 8 | CmdLineOptions options; 9 | options.parse(argc, argv); 10 | 11 | EXPECT_EQ(options.m_bench, "bench.yaml"); 12 | EXPECT_EQ(options.m_cwd, "/tmp"); 13 | EXPECT_EQ(options.m_timeout_threshold, 10); 14 | EXPECT_EQ(options.m_err_rate_threshold, 0.01); 15 | EXPECT_EQ(options.m_debug, true); 16 | } 17 | -------------------------------------------------------------------------------- /test/test_global_manager.cpp: -------------------------------------------------------------------------------- 1 | #include "C:\Users\win10\Desktop\Code\scheme3\includes\global_manager.h" 2 | #include 3 | 4 | class GlobalManagerTest : public ::testing::Test { 5 | protected: 6 | GlobalManager gm; 7 | 8 | void SetUp() override { 9 | gm = GlobalManager(); 10 | } 11 | }; 12 | 13 | TEST_F(GlobalManagerTest, TestAddRequest) { 14 | gm.AddRequest("Sim1", "Sim2", "Data", 10.0, 1.0, Behavior::SEND); 15 | gm.AddRequest("Sim2", "Sim1", "Data", 10.0, 1.0, Behavior::RECEIVE); 16 | EXPECT_EQ(gm.requestList.size(), 2); 17 | } 18 | 19 | TEST_F(GlobalManagerTest, TestCheckPair) { 20 | gm.AddRequest("Sim1", "Sim2", "Data", 10.0, 1.0, Behavior::SEND); 21 | gm.AddRequest("Sim2", "Sim1", "Data", 10.0, 1.0, Behavior::RECEIVE); 22 | EXPECT_TRUE(gm.CheckPair()); 23 | } 24 | 25 | TEST_F(GlobalManagerTest, TestUpdateWaterline) { 26 | gm.processClocks["Sim1"] = 5.0; 27 | gm.processClocks["Sim2"] = 10.0; 28 | gm.updateWaterline(); 29 | EXPECT_EQ(gm.waterLine, 5.0); 30 | } 31 | 32 | TEST_F(GlobalManagerTest, TestCalculateNetworkDelay) { 33 | gm.processClocks["Sim1"] = 0.0; 34 | gm.processClocks["Sim2"] = 0.0; 35 | gm.calculateNetworkDelay("Sim1", "Sim2", 100); 36 | EXPECT_GT(gm.processClocks["Sim1"], 0.0); 37 | EXPECT_GT(gm.processClocks["Sim2"], 0.0); 38 | } -------------------------------------------------------------------------------- /test/test_network_lists.cpp: -------------------------------------------------------------------------------- 1 | #include "net_bench.h" 2 | #include "net_delay.h" 3 | #include 4 | 5 | TEST(NetworkBenchListTest, TestInsertAndDump) { 6 | InterChiplet::NetworkBenchList bench_list; 7 | InterChiplet::NetworkBenchItem item(10.0, {1, 2}, {3, 4}, 100, 1); 8 | bench_list.insert(item); 9 | bench_list.dumpBench("bench.txt", 1.0); 10 | 11 | std::ifstream file("bench.txt"); 12 | std::string content((std::istreambuf_iterator(file)), std::istreambuf_iterator()); 13 | EXPECT_NE(content.find("10 10 1 3 100 1"), std::string::npos); 14 | } 15 | 16 | TEST(NetworkDelayListTest, TestInsertAndDump) { 17 | InterChiplet::NetworkDelayList delay_list; 18 | InterChiplet::NetworkDelayItem item(10.0, {1, 2}, {3, 4}, 1, {1.0, 2.0}); 19 | delay_list.insert(10.0, item); 20 | delay_list.dumpDelay("delay.txt"); 21 | 22 | std::ifstream file("delay.txt"); 23 | std::string content((std::istreambuf_iterator(file)), std::istreambuf_iterator()); 24 | EXPECT_NE(content.find("10 1 3 1 2 1.0 2.0"), std::string::npos); 25 | } -------------------------------------------------------------------------------- /test/test_pipe_comm.cpp: -------------------------------------------------------------------------------- 1 | #include "pipe_comm.h" 2 | #include 3 | #include 4 | 5 | TEST(PipeCommTest, TestPipeCommunication) { 6 | const char* pipe_name = "/tmp/test_pipe"; 7 | InterChiplet::PipeComm pipe_comm; 8 | 9 | // Write data to pipe 10 | std::string data = "Hello, World!"; 11 | pipe_comm.write_data(pipe_name, data.c_str(), data.size()); 12 | 13 | // Read data from pipe 14 | char buffer[1024]; 15 | int bytes_read = pipe_comm.read_data(pipe_name, buffer, sizeof(buffer) - 1); 16 | buffer[bytes_read] = '\0'; 17 | EXPECT_EQ(std::string(buffer), data); 18 | } -------------------------------------------------------------------------------- /test/test_process_struct.cpp: -------------------------------------------------------------------------------- 1 | #include "interchiplets.cpp" 2 | #include 3 | 4 | TEST(ProcessStructTest, TestProcessStruct) { 5 | ProcessConfig config("sim_command", {"arg1", "arg2"}, "log.txt", true, 1.0, "pre_copy", 1000); 6 | ProcessStruct proc_struct(config); 7 | EXPECT_EQ(proc_struct.m_command, "sim_command"); 8 | EXPECT_EQ(proc_struct.m_args.size(), 2); 9 | EXPECT_EQ(proc_struct.m_log_file, "log.txt"); 10 | EXPECT_EQ(proc_struct.m_to_stdout, true); 11 | EXPECT_EQ(proc_struct.m_clock_rate, 1.0); 12 | EXPECT_EQ(proc_struct.m_pre_copy, "pre_copy"); 13 | EXPECT_EQ(proc_struct.m_chip_frequency, 1000); 14 | } -------------------------------------------------------------------------------- /test_benchmark_config.cpp: -------------------------------------------------------------------------------- 1 | #include "C:\Users\win10\Desktop\Code\scheme3\includes\benchmark_yaml.h" 2 | #include 3 | #include 4 | 5 | TEST(BenchmarkConfigTest, TestParseConfig) { 6 | std::ofstream file("test_config.yaml"); 7 | file << "cmd: sim_command\n" 8 | << "args: [arg1, arg2]\n" 9 | << "log: log.txt\n" 10 | << "is_to_stdout: true\n" 11 | << "clock_rate: 1.0\n" 12 | << "pre_copy: pre_copy\n" 13 | << "chip_frequency: 1000"; 14 | file.close(); 15 | 16 | BenchmarkConfig config("test_config.yaml"); 17 | EXPECT_EQ(config.m_proc_cfg_list.size(), 1); 18 | EXPECT_EQ(config.m_proc_cfg_list[0].m_command, "sim_command"); 19 | EXPECT_EQ(config.m_proc_cfg_list[0].m_args.size(), 2); 20 | EXPECT_EQ(config.m_proc_cfg_list[0].m_log_file, "log.txt"); 21 | EXPECT_EQ(config.m_proc_cfg_list[0].m_to_stdout, true); 22 | EXPECT_EQ(config.m_proc_cfg_list[0].m_clock_rate, 1.0); 23 | EXPECT_EQ(config.m_proc_cfg_list[0].m_pre_copy, "pre_copy"); 24 | EXPECT_EQ(config.m_proc_cfg_list[0].m_chip_frequency, 1000); 25 | } -------------------------------------------------------------------------------- /test_cmd_handler.cpp: -------------------------------------------------------------------------------- 1 | #include "cmd_handler.h" 2 | #include "global_manager.h" 3 | #include 4 | 5 | class CmdHandlerTest : public ::testing::Test { 6 | protected: 7 | GlobalManager gm; 8 | 9 | void SetUp() override { 10 | gm = GlobalManager(); 11 | } 12 | }; 13 | 14 | TEST_F(CmdHandlerTest, TestHandleSendCmd) { 15 | InterChiplet::SyncCommand cmd; 16 | cmd.m_type = InterChiplet::SC_SEND; 17 | cmd.m_src = {"Sim1"}; 18 | cmd.m_dst = {"Sim2"}; 19 | cmd.m_cycle = 10.0; 20 | cmd.m_clock_rate = 1.0; 21 | cmd.m_nbytes = 100; 22 | handle_send_cmd(cmd, &gm); 23 | EXPECT_EQ(gm.requestList.size(), 1); 24 | } 25 | 26 | TEST_F(CmdHandlerTest, TestHandleReceiveCmd) { 27 | InterChiplet::SyncCommand cmd; 28 | cmd.m_type = InterChiplet::SC_RECEIVE; 29 | cmd.m_src = {"Sim2"}; 30 | cmd.m_dst = {"Sim1"}; 31 | cmd.m_cycle = 10.0; 32 | cmd.m_clock_rate = 1.0; 33 | cmd.m_nbytes = 100; 34 | handle_receive_cmd(cmd, &gm); 35 | EXPECT_EQ(gm.requestList.size(), 1); 36 | } --------------------------------------------------------------------------------