├── .changed_files
    ├── gem5
    │   └── src
    │   │   ├── arch
    │   │       ├── arm
    │   │       │   └── linux
    │   │       │   │   └── se_workload.cc
    │   │       └── x86
    │   │       │   └── linux
    │   │       │       ├── syscall_tbl32.cc
    │   │       │       └── syscall_tbl64.cc
    │   │   ├── cpu
    │   │       ├── o3
    │   │       │   └── cpu.cc
    │   │       └── simple
    │   │       │   ├── atomic.cc
    │   │       │   └── timing.cc
    │   │   └── sim
    │   │       ├── eventq.cc
    │   │       ├── eventq.hh
    │   │       ├── sim_events.cc
    │   │       ├── syscall_emul.cc
    │   │       └── syscall_emul.hh
    ├── gpgpu-sim
    │   ├── libcuda
    │   │   ├── Makefile
    │   │   └── cuda_runtime_api.cc
    │   └── src
    │   │   └── gpgpu-sim
    │   │       ├── gpu-sim.cc
    │   │       └── gpu-sim.h
    └── snipersim
    │   ├── common
    │       └── core
    │       │   ├── core.cc
    │       │   └── syscall_model.cc
    │   └── sift
    │       └── recorder
    │           └── syscall_modeling.cc
├── .gitignore
├── .gitmodules
├── README.md
├── apply_patch.sh
├── benchmark
    ├── .gitignore
    ├── MLP
    │   ├── config_fermi_islip.icnt
    │   ├── gpuwattch_gtx480.xml
    │   ├── makefile
    │   ├── mlp.cpp
    │   ├── mlp.cu
    │   ├── mlp.yml
    │   ├── readData.py
    │   ├── temp_data.json
    │   └── wine.csv
    ├── matmul
    │   ├── makefile
    │   ├── matmul.cpp
    │   ├── matmul.cu
    │   └── matmul.yml
    └── test
    │   ├── gem5-arm
    │       ├── barrier
    │       │   ├── makefile
    │       │   └── test.yml
    │       ├── launch
    │       │   ├── makefile
    │       │   └── test.yml
    │       ├── lock
    │       │   ├── makefile
    │       │   └── test.yml
    │       └── pipe
    │       │   ├── makefile
    │       │   └── test.yml
    │   ├── gem5
    │       ├── barrier
    │       │   ├── makefile
    │       │   ├── test_atomic.yml
    │       │   ├── test_o3.yml
    │       │   └── test_timing.yml
    │       ├── launch
    │       │   ├── makefile
    │       │   ├── test_atomic.yml
    │       │   ├── test_o3.yml
    │       │   └── test_timing.yml
    │       ├── lock
    │       │   ├── makefile
    │       │   ├── test_atomic.yml
    │       │   ├── test_o3.yml
    │       │   └── test_timing.yml
    │       └── pipe
    │       │   ├── makefile
    │       │   ├── test_atomic.yml
    │       │   ├── test_o3.yml
    │       │   └── test_timing.yml
    │   ├── gpgpu-sim
    │       ├── barrier
    │       │   ├── makefile
    │       │   ├── matmul.cpp
    │       │   ├── matmul.cu
    │       │   └── matmul.yml
    │       ├── launch
    │       │   ├── makefile
    │       │   ├── matmul.cpp
    │       │   ├── matmul.cu
    │       │   └── matmul.yml
    │       ├── lock
    │       │   ├── makefile
    │       │   ├── matmul.cpp
    │       │   ├── matmul.cu
    │       │   └── matmul.yml
    │       └── pipe
    │       │   ├── makefile
    │       │   ├── matmul.cpp
    │       │   ├── matmul.cu
    │       │   └── matmul.yml
    │   └── snipersim
    │       ├── barrier
    │           ├── makefile
    │           ├── test.cpp
    │           └── test.yml
    │       ├── launch
    │           ├── makefile
    │           ├── test.cpp
    │           └── test.yml
    │       ├── lock
    │           ├── makefile
    │           ├── test.cpp
    │           └── test.yml
    │       └── pipe
    │           ├── makefile
    │           ├── test.cpp
    │           └── test.yml
├── docs
    ├── .gitignore
    ├── docs
    │   ├── 01-quick-start.md
    │   ├── 02-benchmarks
    │   │   ├── 021-apis
    │   │   │   └── index.md
    │   │   └── 022-yaml.md
    │   ├── 03-sync-proto
    │   │   ├── 030-net-proto.md
    │   │   ├── 031-communication.md
    │   │   ├── 032-barrier.md
    │   │   ├── 033-lock.md
    │   │   ├── 034-launch.md
    │   │   ├── 035-cycle.md
    │   │   └── index.md
    │   ├── 04-import-sim
    │   │   ├── 041-snipersim.md
    │   │   ├── 042-gpgpusim.md
    │   │   ├── 043-gem5.md
    │   │   └── index.md
    │   ├── images
    │   │   ├── Chiplet Simulator.drawio
    │   │   ├── architect_example_1.drawio.svg
    │   │   ├── architect_example_2.drawio.svg
    │   │   ├── interchiplet_architect.drawio.svg
    │   │   ├── interchiplet_dataflow.drawio.svg
    │   │   └── iteration_flow.drawio.svg
    │   └── index.md
    ├── javascripts
    │   └── mathjax.js
    ├── mkdocs.yml
    └── requirements.txt
├── global_manager_test.cpp
├── interchiplet
    ├── .gitignore
    ├── CMakeLists.txt
    ├── depreciate
    │   ├── CMakeLists.txt
    │   ├── interchiplet_client.cpp
    │   ├── interchiplet_client.h
    │   ├── record_transfer.cpp
    │   ├── sniper_change.cpp
    │   └── zmq_pro.cpp
    ├── includes
    │   ├── apis_c.h
    │   ├── apis_cu.h
    │   ├── benchmark_yaml.h
    │   ├── cmd_handler.h
    │   ├── cmdline_options.h
    │   ├── global_define.h
    │   ├── net_bench.h
    │   ├── net_delay.h
    │   ├── pipe_comm.h
    │   └── sync_protocol.h
    ├── patch
    │   ├── gem5.diff
    │   ├── gpgpu-sim.diff
    │   └── snipersim.diff
    └── srcs
    │   ├── apis_c.cpp
    │   ├── cmd_handler.cpp
    │   └── interchiplet.cpp
├── patch.sh
├── setup_env.sh
├── test
    ├── test_benchmark_config.cpp
    ├── test_cmd_handler.cpp
    ├── test_cmdline_options
    ├── test_global_manager.cpp
    ├── test_network_lists.cpp
    ├── test_pipe_comm.cpp
    └── test_process_struct.cpp
├── test_benchmark_config.cpp
└── test_cmd_handler.cpp


/.changed_files/gem5/src/sim/sim_events.cc:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright (c) 2013 ARM Limited
  3 |  * All rights reserved
  4 |  *
  5 |  * The license below extends only to copyright in the software and shall
  6 |  * not be construed as granting a license to any other intellectual
  7 |  * property including but not limited to intellectual property relating
  8 |  * to a hardware implementation of the functionality of the software
  9 |  * licensed hereunder.  You may use the software subject to the license
 10 |  * terms below provided that you ensure that this notice is replicated
 11 |  * unmodified and in its entirety in all distributions of the software,
 12 |  * modified or unmodified, in source code or in binary form.
 13 |  *
 14 |  * Copyright (c) 2002-2005 The Regents of The University of Michigan
 15 |  * Copyright (c) 2013 Advanced Micro Devices, Inc.
 16 |  * Copyright (c) 2013 Mark D. Hill and David A. Wood
 17 |  * All rights reserved.
 18 |  *
 19 |  * Redistribution and use in source and binary forms, with or without
 20 |  * modification, are permitted provided that the following conditions are
 21 |  * met: redistributions of source code must retain the above copyright
 22 |  * notice, this list of conditions and the following disclaimer;
 23 |  * redistributions in binary form must reproduce the above copyright
 24 |  * notice, this list of conditions and the following disclaimer in the
 25 |  * documentation and/or other materials provided with the distribution;
 26 |  * neither the name of the copyright holders nor the names of its
 27 |  * contributors may be used to endorse or promote products derived from
 28 |  * this software without specific prior written permission.
 29 |  *
 30 |  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 31 |  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 32 |  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 33 |  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 34 |  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 35 |  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 36 |  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 37 |  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 38 |  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 39 |  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 40 |  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 41 |  */
 42 | 
 43 | #include "sim/sim_events.hh"
 44 | 
 45 | #include <string>
 46 | 
 47 | #include "base/callback.hh"
 48 | #include "sim/eventq.hh"
 49 | #include "sim/sim_exit.hh"
 50 | #include "sim/stats.hh"
 51 | 
 52 | #include "../../../interchiplet/includes/sync_protocol.h"
 53 | 
 54 | namespace gem5
 55 | {
 56 | 
 57 | GlobalSimLoopExitEvent::GlobalSimLoopExitEvent(Tick when,
 58 |                                                const std::string &_cause,
 59 |                                                int c, Tick r)
 60 |     : GlobalEvent(when, Sim_Exit_Pri, IsExitEvent),
 61 |       cause(_cause), code(c), repeat(r)
 62 | {
 63 | }
 64 | 
 65 | GlobalSimLoopExitEvent::GlobalSimLoopExitEvent(const std::string &_cause,
 66 |                                                int c, Tick r)
 67 |     : GlobalEvent(curTick(), Minimum_Pri, IsExitEvent),
 68 |       cause(_cause), code(c), repeat(r)
 69 | {
 70 | }
 71 | 
 72 | const char *
 73 | GlobalSimLoopExitEvent::description() const
 74 | {
 75 |     return "global simulation loop exit";
 76 | }
 77 | 
 78 | //
 79 | // handle termination event
 80 | //
 81 | void
 82 | GlobalSimLoopExitEvent::process()
 83 | {
 84 |     if (repeat) {
 85 |         schedule(curTick() + repeat);
 86 |     }
 87 | }
 88 | 
 89 | void
 90 | exitSimLoop(const std::string &message, int exit_code, Tick when, Tick repeat,
 91 |             bool serialize)
 92 | {
 93 |     // Send exit cycle.
 94 |     InterChiplet::sendCycleCmd(curTick());
 95 | 
 96 |     warn_if(serialize && (when != curTick() || repeat),
 97 |             "exitSimLoop called with a delay and auto serialization. This is "
 98 |             "currently unsupported.");
 99 | 
100 |     new GlobalSimLoopExitEvent(when + simQuantum, message, exit_code, repeat);
101 | }
102 | 
103 | void
104 | exitSimLoopNow(const std::string &message, int exit_code, Tick repeat,
105 |                bool serialize)
106 | {
107 |     // Send exit cycle.
108 |     InterChiplet::sendCycleCmd(curTick());
109 | 
110 |     new GlobalSimLoopExitEvent(message, exit_code, repeat);
111 | }
112 | 
113 | LocalSimLoopExitEvent::LocalSimLoopExitEvent(const std::string &_cause, int c,
114 |                                    Tick r)
115 |     : Event(Sim_Exit_Pri, IsExitEvent),
116 |       cause(_cause), code(c), repeat(r)
117 | {
118 | }
119 | 
120 | //
121 | // handle termination event
122 | //
123 | void
124 | LocalSimLoopExitEvent::process()
125 | {
126 |     exitSimLoop(cause, 0);
127 | }
128 | 
129 | 
130 | const char *
131 | LocalSimLoopExitEvent::description() const
132 | {
133 |     return "simulation loop exit";
134 | }
135 | 
136 | void
137 | LocalSimLoopExitEvent::serialize(CheckpointOut &cp) const
138 | {
139 |     Event::serialize(cp);
140 | 
141 |     SERIALIZE_SCALAR(cause);
142 |     SERIALIZE_SCALAR(code);
143 |     SERIALIZE_SCALAR(repeat);
144 | }
145 | 
146 | void
147 | LocalSimLoopExitEvent::unserialize(CheckpointIn &cp)
148 | {
149 |     Event::unserialize(cp);
150 | 
151 |     UNSERIALIZE_SCALAR(cause);
152 |     UNSERIALIZE_SCALAR(code);
153 |     UNSERIALIZE_SCALAR(repeat);
154 | }
155 | 
156 | //
157 | // constructor: automatically schedules at specified time
158 | //
159 | CountedExitEvent::CountedExitEvent(const std::string &_cause, int &counter)
160 |     : Event(Sim_Exit_Pri), cause(_cause), downCounter(counter)
161 | {
162 |     // catch stupid mistakes
163 |     assert(downCounter > 0);
164 | }
165 | 
166 | 
167 | //
168 | // handle termination event
169 | //
170 | void
171 | CountedExitEvent::process()
172 | {
173 |     if (--downCounter == 0) {
174 |         exitSimLoop(cause, 0);
175 |     }
176 | }
177 | 
178 | 
179 | const char *
180 | CountedExitEvent::description() const
181 | {
182 |     return "counted exit";
183 | }
184 | 
185 | } // namespace gem5
186 | 


--------------------------------------------------------------------------------
/.changed_files/gpgpu-sim/libcuda/Makefile:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2009 by Tor M. Aamodt, Ali Bakhoda and the 
  2 | # University of British Columbia
  3 | # Vancouver, BC  V6T 1Z4
  4 | # All Rights Reserved.
  5 | # 
  6 | # THIS IS A LEGAL DOCUMENT BY DOWNLOADING GPGPU-SIM, YOU ARE AGREEING TO THESE
  7 | # TERMS AND CONDITIONS.
  8 | # 
  9 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 10 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 11 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 12 | # ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNERS OR CONTRIBUTORS BE
 13 | # LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 14 | # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 15 | # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 16 | # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 17 | # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 18 | # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 19 | # POSSIBILITY OF SUCH DAMAGE.
 20 | # 
 21 | # NOTE: The files libcuda/cuda_runtime_api.c and src/cuda-sim/cuda-math.h
 22 | # are derived from the CUDA Toolset available from http://www.nvidia.com/cuda
 23 | # (property of NVIDIA).  The files benchmarks/BlackScholes/ and 
 24 | # benchmarks/template/ are derived from the CUDA SDK available from 
 25 | # http://www.nvidia.com/cuda (also property of NVIDIA).  The files from 
 26 | # src/intersim/ are derived from Booksim (a simulator provided with the 
 27 | # textbook "Principles and Practices of Interconnection Networks" available 
 28 | # from http://cva.stanford.edu/books/ppin/). As such, those files are bound by 
 29 | # the corresponding legal terms and conditions set forth separately (original 
 30 | # copyright notices are left in files from these sources and where we have 
 31 | # modified a file our copyright notice appears before the original copyright 
 32 | # notice).  
 33 | # 
 34 | # Using this version of GPGPU-Sim requires a complete installation of CUDA 
 35 | # which is distributed seperately by NVIDIA under separate terms and 
 36 | # conditions.  To use this version of GPGPU-Sim with OpenCL requires a
 37 | # recent version of NVIDIA's drivers which support OpenCL.
 38 | # 
 39 | # Redistribution and use in source and binary forms, with or without
 40 | # modification, are permitted provided that the following conditions are met:
 41 | # 
 42 | # 1. Redistributions of source code must retain the above copyright notice,
 43 | # this list of conditions and the following disclaimer.
 44 | # 
 45 | # 2. Redistributions in binary form must reproduce the above copyright notice,
 46 | # this list of conditions and the following disclaimer in the documentation
 47 | # and/or other materials provided with the distribution.
 48 | # 
 49 | # 3. Neither the name of the University of British Columbia nor the names of
 50 | # its contributors may be used to endorse or promote products derived from
 51 | # this software without specific prior written permission.
 52 | # 
 53 | # 4. This version of GPGPU-SIM is distributed freely for non-commercial use only.  
 54 | #  
 55 | # 5. No nonprofit user may place any restrictions on the use of this software,
 56 | # including as modified by the user, by any other authorized user.
 57 | # 
 58 | # 6. GPGPU-SIM was developed primarily by Tor M. Aamodt, Wilson W. L. Fung, 
 59 | # Ali Bakhoda, George L. Yuan, at the University of British Columbia, 
 60 | # Vancouver, BC V6T 1Z4
 61 | 
 62 | 
 63 | include ../version_detection.mk
 64 | 
 65 | ifeq ($(OPENGL_SUPPORT),1) 
 66 | 	GL = -DOPENGL_SUPPORT
 67 | endif
 68 | 
 69 | ifeq ($(GNUC_CPP0X), 1)
 70 |     CXXFLAGS = -std=c++0x
 71 | endif
 72 | 
 73 | 
 74 | CPP = g++ $(SNOW)
 75 | CC = gcc $(SNOW)
 76 | CREATELIBRARY    =  1
 77 | DEBUG ?= 0
 78 | ifeq ($(DEBUG),1)
 79 | 	CXXFLAGS +=  -Wall -Wno-unused-function -Wno-sign-compare -g -fPIC $(GL)
 80 | 	CCFLAGS += -Wall -Wno-unused-function -Wno-sign-compare -ggdb -fPIC
 81 | else
 82 | 	CXXFLAGS += -O3 -g -Wall -Wno-unused-function -Wno-sign-compare -fPIC $(GL)
 83 | 	CCFLAGS += -Wall -Wno-unused-function -Wno-sign-compare -fPIC
 84 | endif
 85 | 
 86 | PROG     =cuda
 87 | 
 88 | CXX_SRCS =  cuda_runtime_api.cc
 89 | LEX_SRCS = cuobjdump.l
 90 | YACC_SRCS = cuobjdump.y
 91 | CXXFLAGS += -DCUDART_VERSION=$(CUDART_VERSION)
 92 | LEX			= flex
 93 | LEXFLAGS	= -B 
 94 | YACC		= bison
 95 | YFLAGS		= -t -d -v --report=all
 96 | 
 97 | .PHONY: clean
 98 | 
 99 | OUTPUT_DIR=$(SIM_OBJ_FILES_DIR)/libcuda
100 | 
101 | OBJS = $(CXX_SRCS:%.cc=$(OUTPUT_DIR)/%.o)
102 | OBJS += $(LEX_SRCS:%.l=$(OUTPUT_DIR)/%_lexer.o)
103 | OBJS += $(YACC_SRCS:%.y=$(OUTPUT_DIR)/%_parser.o)
104 | 
105 | #--- Make rules ---
106 | lib$(PROG).a: $(OBJS)
107 | 	echo $(OBJS)
108 | 	ar rcs $(OUTPUT_DIR)/lib$(PROG).a $(OBJS)
109 | 
110 | $(OUTPUT_DIR)/%.o: %.cc
111 | 	$(CPP) $(CXXFLAGS) -I./ -I$(OUTPUT_DIR) -I$(CUDA_INSTALL_PATH)/include -I$(SIMULATOR_ROOT)/interchiplet/includes -c $< -o $@
112 | 
113 | $(OUTPUT_DIR)/%.o: %.c
114 | 	$(CPP) $(CCFLAGS) -I./ -I$(OUTPUT_DIR) -I$(CUDA_INSTALL_PATH)/include  -c $< -o $@
115 | 
116 | $(OUTPUT_DIR)/%.o: $(OUTPUT_DIR)/%.c
117 | 	$(CPP) $(CCFLAGS) -I./ -I$(OUTPUT_DIR) -I$(CUDA_INSTALL_PATH)/include  -c $< -o $@
118 | 
119 | $(OUTPUT_DIR)/cuobjdump_parser.c: cuobjdump.y
120 | 	$(YACC) $(YFLAGS) -p cuobjdump_  -o$@ $< --file-prefix=$(OUTPUT_DIR)/cuobjdump
121 | 
122 | $(OUTPUT_DIR)/cuobjdump_lexer.c: cuobjdump.l $(OUTPUT_DIR)/cuobjdump_parser.h
123 | 	$(LEX) $(LEXFLAGS) -P cuobjdump_ -o$@ $<
124 | 	
125 | # The next rule means just get parser.c and you will get parser.h with it
126 | # in other words, get parser.c and do nothing to get parser.h
127 | $(OUTPUT_DIR)/%_parser.h: $(OUTPUT_DIR)/%_parser.c
128 | 	:
129 | $(OUTPUT_DIR)/Makefile.makedepend: depend
130 | 
131 | depend:
132 | 	touch $(OUTPUT_DIR)/Makefile.makedepend
133 | 	makedepend -f$(OUTPUT_DIR)/Makefile.makedepend -p$(OUTPUT_DIR)/ $(CXX_SRCS) 2> /dev/null
134 | 
135 | clean:
136 | 	rm -f $(PROG)
137 | 	rm -f *.o
138 | 	rm -f lib$(PROG).a
139 | 	rm -f *_parser.*
140 | 	rm -f *_lexer.*
141 | 	rm -f Makefile.makedepend Makefile.makedepend.bak
142 | 
143 | include $(OUTPUT_DIR)/Makefile.makedepend
144 | 
145 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | .vscode
2 | *.o
3 | 
4 | interchiplet/bin
5 | interchiplet/lib
6 | interchiplet/build
7 | 
8 | gpgpu-sim_distribution_backup
9 | 


--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
 1 | [submodule "snipersim"]
 2 | 	path = snipersim
 3 | 	url = https://github.com/snipersim/snipersim.git
 4 | [submodule "gpgpu-sim"]
 5 | 	path = gpgpu-sim
 6 | 	url = https://github.com/gpgpu-sim/gpgpu-sim_distribution.git
 7 | [submodule "popnet_chiplet"]
 8 | 	path = popnet_chiplet
 9 | 	url = https://github.com/baikeina/popnet_chiplet.git
10 | [submodule "interchiplet/thirdparty/spdlog"]
11 | 	path = interchiplet/thirdparty/spdlog
12 | 	url = https://github.com/gabime/spdlog.git
13 | [submodule "interchiplet/thirdparty/CLI11"]
14 | 	path = interchiplet/thirdparty/CLI11
15 | 	url = https://github.com/CLIUtils/CLI11.git
16 | [submodule "interchiplet/thirdparty/yaml-cpp"]
17 | 	path = interchiplet/thirdparty/yaml-cpp
18 | 	url = https://github.com/jbeder/yaml-cpp.git
19 | [submodule "gem5"]
20 | 	path = gem5
21 | 	url = https://github.com/gem5/gem5.git
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | 
  2 | For the detail documents in English, please access [Document for LegoSim](https://fcas-zju.github.io/Chiplet_Heterogeneous_newVersion/).
  3 | 
  4 | # 安装
  5 | 
  6 | ## 下载仓库并设置环境
  7 | 
  8 | 1. 从github上下载仓库。
  9 | 
 10 |     ```
 11 |     git clone --single-branch --branch master_v2 https://github.com/FCAS-SCUT/Chiplet_Heterogeneous_newVersion.git
 12 |     ```
 13 | 
 14 |     进入仿真器根目录，以下的示例命名都假设从仿真器根目录开始执行。
 15 | 
 16 | 2. 初始化并更新submodule。
 17 | 
 18 |     ```
 19 |     git submodule init
 20 |     git submodule update
 21 |     ```
 22 | 
 23 | 3. 运行脚本，初始化环境变量
 24 | 
 25 |     ```
 26 |     source setup_env.sh
 27 |     ```
 28 | 
 29 |     运行成功应出现：setup_environment succeeded
 30 | 
 31 | 4. 对于snipersim和gpgpu-sim代码进行修改。
 32 | 
 33 |     ```
 34 |     ./apply_patch.sh
 35 |     ```
 36 | 
 37 |     更多细节参见下文“打包和应用Patch”章节。
 38 | 
 39 | 5. 编译安装snipersim。新版本的snipersim提供了非常自动化的编译脚本，直接执行make即可。
 40 | 
 41 |     ```
 42 |     cd snipersim
 43 |     make -j4
 44 |     ```
 45 | 
 46 | 6. 编译安装Gem5。请查看Gem5文档获取详细安装指南。LegoSim中可以运行X86和ARM架构仿真器：
 47 | 
 48 |     ```
 49 |     cd gem5
 50 |     scons build/X86/gem5.opt
 51 |     ```
 52 | 
 53 |     或者
 54 | 
 55 |     ```
 56 |     cd gem5
 57 |     scons build/ARM/gem5.opt
 58 |     ```
 59 | 
 60 | 7. 编译安装GPGPUSim。GPGPUsim安装有前置条件：
 61 | 
 62 |     1. GPGPUSim需要安装cuda。新版本的gpgpusim可以支持cuda4到cuda11的任意版本，详细信息请参见GPGPUSim的README。
 63 |     2. GPGPUSim对于编译版本有要求，建议使用GCC7。
 64 | 
 65 |     配置好Cuda和编译器，可以直接执行make。
 66 | 
 67 |     ```
 68 |     cd gpgpu-sim
 69 |     make -j4
 70 |     ```
 71 | 
 72 | 8. 编译安装popnet
 73 | 
 74 |     ```
 75 |     cd popnet_chiplet
 76 |     mkdir build
 77 |     cd build
 78 |     cmake ..
 79 |     make -j4
 80 |     ```
 81 | 
 82 | 9.  编译安装芯粒间通信程序。interchiplet提供了芯粒间通信所需要的API和实现代码。
 83 | 
 84 |     ```
 85 |     cd interchiplet
 86 |     mkdir build
 87 |     cd build
 88 |     cmake ..
 89 |     make
 90 |     ```
 91 | 
 92 |     编译完成后应在interchiplet/bin下找到record_transfer和zmq_pro，在interchiplet/lib下找到libinterchiplet_app.a。
 93 | 
 94 |     zmq_pro需要安装zmq环境。通常会在cmake步骤被忽略。
 95 | 
 96 | # 验证安装
 97 | 
 98 | 正确执行上述过程后，可以使用benchmark/matmul验证环境设置是否正确。
 99 | 
100 | 1. 设置仿真器环境
101 | 
102 |     ```
103 |     source setup_env.sh
104 |      ```
105 | 
106 | 2. 编译可执行文件
107 | 
108 |     ```
109 |     cd benchmark/matmul
110 |     make
111 |     ```
112 | 
113 | 3. 执行可执行文件。示例包含4个进程，分别是1个CPU进行和3个GPU进程。必须在benchmark/matmul进程执行。
114 | 
115 |     ```
116 |     ../../interchiplet/bin/interchiplet ./matmul.yml
117 |     ```
118 | 
119 |     执行后，可以在benchmark/matmul文件下找到一组proc_r{R}_p{P}_t{T}的文件夹，对应于第R轮执行的第P阶段的第T个线程。
120 |     在文件夹中可以找到下列文件：
121 | 
122 |     1. GPGPUSim仿真的临时文件和日志文件gpgpusim_X_X.log。
123 |     2. Sniper仿真的临时文件和sniper仿真的日志文件sniper.log。
124 |     3. Popnet的日志文件popnet.log。
125 | 
126 | 4. 清理可执行文件和输出文件。
127 | 
128 |     ```
129 |     make clean
130 |     ```
131 | 
132 | # 打包和应用Patch
133 | 
134 | 由于sniper和GPGPUSim是用submodule方式引入的，对于snipersim和gpgpu-sim的修改不会通过常规的git流程追踪。因此，工程提供了patch.sh和apply_patch.sh两个脚本通过Patch管理sniper和gpgpu-sim的修改。
135 | 
136 | patch.sh脚本用来生成Patch：
137 | 
138 | ```
139 | ./patch.sh
140 | ```
141 | 
142 | 1. 使用patch.sh脚本将snipersim和gpgpu-sim的修改分别打包到snipersim.diff和gpgpu-sim.diff文件中。diff文件保存在interchiplet/patch下面。diff文件会被git追踪。
143 | 2. patch.sh脚本还会将被修改的文件按照文件层次结构保存到.changed_files文件夹中，用于在diff文件出错时进行查看和参考。
144 | 
145 | apply_patch.sh脚本用来应用Patch：
146 | 
147 | ```
148 | ./apply_patch.sh
149 | ```
150 | 
151 | 1. 使用apply_patch.sh脚本将snipersim.diff和gpgpu-sim.diff文件应用到snipersim和gpgpu-sim，重现对于文件的修改。
152 | 2. 当apply出错时，可以参考.changed_files中的文件手动修改snipersim和gpgpu-sim的文件。
153 | 
154 | 需要说明的是：不建议用.changed_files直接覆盖snipersim和gpgpu-sim文件夹。因为snipersim和gpgpu-sim本身的演进可能会与芯粒仿真器修改相同的文件。使用Patch的方式会报告修改的冲突。如果直接覆盖，则会导致不可预见的错误。
155 | 
156 | # 添加测试程序
157 | 
158 | 测试程序统一添加到benchmark路径下，每一个测试文件有独立的文件夹。
159 | 
160 | 测试程序的文件管理推荐按照matmul组织，并且使用类似的Makefile。但是并不绝对要求。
161 | 
162 | 运行测试程序需要编写YAML配置文件。
163 | 
164 | ## YAML配置文件格式
165 | 
166 | ```
167 | # Phase 1 configuration.
168 | phase1:
169 |   # Process 0
170 |   - cmd: "$BENCHMARK_ROOT/bin/matmul_cu"
171 |     args: ["0", "1"]
172 |     log: "gpgpusim.0.1.log"
173 |     is_to_stdout: false
174 |     pre_copy: "$SIMULATOR_ROOT/gpgpu-sim/configs/tested-cfgs/SM2_GTX480/*"
175 |   # Process 1
176 |   - cmd: "$BENCHMARK_ROOT/bin/matmul_cu"
177 |     args: ["1", "0"]
178 |     log: "gpgpusim.1.0.log"
179 |     is_to_stdout: false
180 |     pre_copy: "$SIMULATOR_ROOT/gpgpu-sim/configs/tested-cfgs/SM2_GTX480/*"
181 |   ......
182 | 
183 | # Phase 2 configuration.
184 | phase2:
185 |   # Process 0
186 |   - cmd: "$SIMULATOR_ROOT/popnet/popnet"
187 |     args: ["-A", "2", "-c", "2", "-V", "3", "-B", "12", "-O", "12", "-F", "4", "-L", "1000", "-T", "10000000", "-r", "1", "-I", "../bench.txt", "-R", "0"]
188 |     log: "popnet.log"
189 |     is_to_stdout: false
190 | 
191 | ```
192 | 
193 | YAML配置文件的第一层支持的关键字是：
194 | 
195 | - `phase1`：配置第一阶段的仿真器进程。
196 | - `phase2`：配置第二阶段的仿真器进程。
197 | 
198 | 这两个关键字下面都是数组，每项对应于一个并发的仿真器进程。`phase1`和`phase2`都可以支持多个仿真进程。
199 | 
200 | 仿真器进程的配置支持如下关键字：
201 | 
202 | - `cmd`：表示仿真器的命令。字符串表示。支持环境变量`$BENCHMARK_ROOT`和`$SIMULATOR_ROOT`。
203 | - `args`：表示仿真器的参数。字符串数组表示。支持环境变量`$BENCHMARK_ROOT`和`$SIMULATOR_ROOT`。
204 | - `log`：表示日志的名称。不能使用相对路径或绝对路径。
205 | - `is_to_stdout`：表示是否将仿真器的标准输出/错误输出重定向到interchiplet的标准输出。
206 | - `pre_copy`：有些仿真器需要一些额外的文件才能启动仿真。这个关键字是字符串。如果需要复制多个文件，则用空格隔开，用引号包围。
207 | 
208 | 在YAML里面使用相对路径时，以当前路径作为基础。推荐使用环境变量构成绝对路径。
209 | 
210 | - `$BENCHMARK_ROOT`表示测试程序的路径，根据YAML文件的位置决定。
211 | - `$SIMULATOR_ROOT`表示仿真器的路径，通过setup_env.sh决定。
212 | 
213 | ## 运行InterChiplet
214 | 
215 | 仿真器的主程序是InterChiplet。在运行路径下执行下面的命令：
216 | 
217 | ```
218 | $SIMULATOR_ROOT/interchiplet/bin/interchiplet $BENCHMARK_ROOT/bench.yml
219 | ```
220 | 
221 | InterChiplet命令格式如下：
222 | 
223 | ```
224 | interchiplet <bench>.yml [--cwd <string>] [-t|--timeout <int>] [-e|--error <float>] [-h]
225 | ```
226 | 
227 | 命令参数如下：
228 | 
229 | - `<bench>.yml`指定测试程序的配置文件。
230 | - `--cwd <string>`指定执行仿真的路径。
231 | - `-t <int>`和`--timeout <int>`指定仿真退出的轮次。不论结果是否收敛，都会结束仿真。
232 | - `e <float>`和`--error <float>`指定仿真退出的条件。当仿真误差小于这个比例时，结束仿真。
233 | 
234 | 


--------------------------------------------------------------------------------
/apply_patch.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | if [[ -n "${SIMULATOR_ROOT}" ]]; then
 4 |     echo "SIMULATOR_ROOT is: ${SIMULATOR_ROOT}"
 5 | else
 6 |     echo "The environment variable SIMULATOR_ROOT is not defined."
 7 |     exit
 8 | fi
 9 | 
10 | # Pathc for Sniper
11 | cd ${SIMULATOR_ROOT}/snipersim
12 | git apply ../interchiplet/patch/snipersim.diff
13 | 
14 | # Patch for GPGPUSim
15 | cd ${SIMULATOR_ROOT}/gpgpu-sim
16 | git apply ../interchiplet/patch/gpgpu-sim.diff
17 | 
18 | # Patch for GEM5
19 | cd ${SIMULATOR_ROOT}/gem5
20 | git apply ../interchiplet/patch/gem5.diff
21 | 


--------------------------------------------------------------------------------
/benchmark/.gitignore:
--------------------------------------------------------------------------------
 1 | 
 2 | _app_cuda_version_*
 3 | _cuobjdump_list_ptx_*
 4 | *.ptx
 5 | *.ptxas
 6 | gpgpu_inst_stats.txt
 7 | 
 8 | bench.txt
 9 | delayInfo.txt
10 | buffer*
11 | *.log
12 | message_record.txt
13 | 
14 | sim.cfg
15 | sim.info
16 | sim.out
17 | sim.stats.sqlite3
18 | 
19 | checkpoint_files
20 | 
21 | obj
22 | cuobj
23 | bin
24 | proc_r*_t*
25 | 
26 | MLP/json
27 | 


--------------------------------------------------------------------------------
/benchmark/MLP/config_fermi_islip.icnt:
--------------------------------------------------------------------------------
 1 | //21*1 fly with 32 flits per packet under gpgpusim injection mode
 2 | use_map = 0;
 3 | flit_size = 40; 
 4 | 
 5 | // currently we do not use this, see subnets below
 6 | network_count = 2;
 7 | 
 8 | // Topology
 9 | topology = fly;
10 | k = 27;
11 | n = 1;
12 | 
13 | // Routing
14 | 
15 | routing_function = dest_tag;
16 | 
17 | // Flow control
18 | 
19 | num_vcs     = 1;
20 | vc_buf_size = 64;
21 | 
22 | wait_for_tail_credit = 0;
23 | 
24 | // Router architecture
25 | 
26 | vc_allocator = islip; //separable_input_first;
27 | sw_allocator = islip; //separable_input_first;
28 | alloc_iters  = 1;
29 | 
30 | credit_delay   = 0;
31 | routing_delay  = 0;
32 | vc_alloc_delay = 1;
33 | sw_alloc_delay = 1;
34 | 
35 | input_speedup     = 1;
36 | output_speedup    = 1;
37 | internal_speedup  = 2.0;
38 | 
39 | // Traffic, GPGPU-Sim does not use this
40 | 
41 | traffic                = uniform;
42 | packet_size ={{1,2,3,4},{10,20}};
43 | packet_size_rate={{1,1,1,1},{2,1}};
44 | 
45 | // Simulation - Don't change
46 | 
47 | sim_type       = gpgpusim;
48 | //sim_type = latency;
49 | injection_rate = 0.1;
50 | 
51 | subnets = 2;
52 | 
53 | // Always use read and write no matter following line
54 | //use_read_write = 1;
55 | 
56 | 
57 | read_request_subnet = 0;
58 | read_reply_subnet = 1;
59 | write_request_subnet = 0;
60 | write_reply_subnet = 1;
61 | 
62 | read_request_begin_vc = 0;
63 | read_request_end_vc = 0;
64 | write_request_begin_vc = 0;
65 | write_request_end_vc = 0;
66 | read_reply_begin_vc = 0;
67 | read_reply_end_vc = 0;
68 | write_reply_begin_vc = 0;
69 | write_reply_end_vc = 0;


--------------------------------------------------------------------------------
/benchmark/MLP/makefile:
--------------------------------------------------------------------------------
 1 | CC=g++
 2 | CFLAGS=-Wall -Werror -g -I$(SIMULATOR_ROOT)/interchiplet/includes -I$(SIMULATOR_ROOT)/benchmark/MLP/json/include
 3 | INTERCHIPLETLIB=$(SIMULATOR_ROOT)/interchiplet/lib/libinterchiplet_c.a
 4 | SNIPER_EXEC=$(SIMULATOR_ROOT)/snipersim/run-sniper
 5 | 
 6 | NVCC=nvcc
 7 | CUFLAGS=--compiler-options -Wall -I$(SIMULATOR_ROOT)/interchiplet/includes
 8 | 
 9 | BENCHMARK_ROOT=$(SIMULATOR_ROOT)/benchmark/MLP
10 | 
11 | SNIPER_SRCS=mlp.cpp
12 | SNIPER_OBJS=mlp.o
13 | SNIPER_TARGET=bin/mlp_cpu
14 | 
15 | GPGPUSIM_SRCS=mlp.cu
16 | GPGPUSIM_TARGET=bin/mlp_cu
17 | REPO_DIR=$(BENCHMARK_ROOT)/json
18 | GIT_REPO=https://github.com/nlohmann/json.git
19 | 
20 | all: sniper_target gpgpusim_target
21 | 
22 | sniper_target: $(SNIPER_OBJS)
23 | 	$(CC) -g $(SNIPER_OBJS) $(INTERCHIPLETLIB) -o $(SNIPER_TARGET) -lpthread
24 | 
25 | %.o: %.cpp
26 | 	if [ ! -d $(REPO_DIR) ]; then \
27 |         git clone $(GIT_REPO) $(REPO_DIR); \
28 |     fi
29 | 	
30 | 	$(CC) $(CFLAGS) -c $< -o $@
31 | 
32 | debug: CFLAGS += -DDEBUG -g
33 | debug: all
34 | 
35 | gpgpusim_target:
36 | 	$(NVCC) -L$(SIMULATOR_ROOT)/gpgpu-sim/lib/$(GPGPUSIM_CONFIG) --cudart shared $(GPGPUSIM_SRCS) $(CUFLAGS) -o $(GPGPUSIM_TARGET) -g
37 | 
38 | run:
39 | 	touch "start running" &
40 | 	./$(GPGPUSIM_TARGET) 0 1 > gpgpusim.0.1.log 2>&1 &
41 | 	./$(GPGPUSIM_TARGET) 0 2 > gpgpusim.0.2.log 2>&1 &
42 | 	./$(GPGPUSIM_TARGET) 0 3 > gpgpusim.0.3.log 2>&1 &
43 | 	./$(GPGPUSIM_TARGET) 0 4 > gpgpusim.0.4.log 2>&1 &
44 | 	./$(GPGPUSIM_TARGET) 1 1 > gpgpusim.1.1.log 2>&1 &
45 | 	./$(GPGPUSIM_TARGET) 1 2 > gpgpusim.1.2.log 2>&1 &
46 | 	./$(GPGPUSIM_TARGET) 1 3 > gpgpusim.1.3.log 2>&1 &
47 | 	./$(GPGPUSIM_TARGET) 1 4 > gpgpusim.1.4.log 2>&1 &
48 | 	$(SNIPER_EXEC) --curdir $(BENCHMARK_ROOT) -- $(BENCHMARK_ROOT)/$(SNIPER_TARGET) 0 0 > sniper.0.0.log 2>&1
49 | 
50 | run_cpu:
51 | 	$(SNIPER_EXEC) --curdir $(BENCHMARK_ROOT) -- $(BENCHMARK_ROOT)/$(SNIPER_TARGET) 0 0
52 | 
53 | run_gpu:
54 | 	./$(GPGPUSIM_TARGET) 0 1 > gpgpusim.0.1.log 2>&1 
55 | 
56 | 
57 | gdb: $(SNIPER_TARGET)
58 | 	cd $(BENCHMARK_ROOT) && gdb ./$(SNIPER_TARGET) -- $(BENCHMARK_ROOT)/$(SNIPER_TARGET) 0 0
59 | 
60 | valgrind: $(SNIPER_TARGET)
61 | 	cd $(BENCHMARK_ROOT) && valgrind --leak-check=full ./$(SNIPER_TARGET) -- $(BENCHMARK_ROOT)/$(SNIPER_TARGET) 0 0
62 | 
63 | clean:
64 | 	rm -rf bench* buffer* message_record.txt
65 | 	rm -rf _app_cuda_version_* _cuobjdump_list_ptx_* gpgpusim_power_report_* *.ptx *.ptxas gpgpusim.*.log gpgpu_inst_stats.txt gpuRead* cpuRead*  
66 | 	rm -rf sim.cfg sim.info sim.out sim.stats.sqlite3
67 | 	rm -rf checkpoint_files
68 | 
69 | cleanall:
70 | 	make clean 
71 | 	rm -rf $(SNIPER_OBJS) $(GPGPUSIM_TARGET) $(SNIPER_TARGET)
72 | 	rm -f start* sniper*
73 | 
74 | kill:
75 | 	pkill -f mlp_cu
76 | 	pkill -f mlp_cpu
77 | 


--------------------------------------------------------------------------------
/benchmark/MLP/mlp.cu:
--------------------------------------------------------------------------------
 1 | #include <math.h>
 2 | #include <stdio.h>
 3 | #include <sys/time.h>
 4 | 
 5 | #include <fstream>
 6 | #include <iostream>
 7 | #include <string>
 8 | 
 9 | #include "apis_cu.h"
10 | #include "cuda_runtime.h"
11 | #include "device_launch_parameters.h"
12 | #define BLOCK_DIM 10
13 | 
14 | __global__ void matrix_mul_gpu(int64_t *M, int64_t *N, int64_t *P, int64_t widthA, int64_t heightA,
15 |                                int64_t widthB) {
16 |     int64_t i = threadIdx.x + blockDim.x * blockIdx.x;
17 |     int64_t j = threadIdx.y + blockDim.y * blockIdx.y;
18 |     if (i < widthB && j < heightA) {
19 |         int64_t sum = 0;
20 |         for (int64_t k = 0; k < widthA; k++) {
21 |             int64_t a = M[j * widthA + k];
22 |             int64_t b = N[k * widthB + i];
23 |             sum += a * b;
24 |         }
25 |         P[j * widthB + i] = sum;
26 |     }
27 | }
28 | 
29 | int Row_A = 0, Col_A = 0, Row_B = 0, Col_B = 0;
30 | int main(int argc, char **argv) {
31 |     while (1) {
32 |         char *fileName = new char[100];
33 |         // 读取本进程所代表的chiplet编号
34 |         int srcX = atoi(argv[1]);
35 |         int srcY = atoi(argv[2]);
36 |         int64_t *size_A = new int64_t[2];
37 |         int64_t *size_B = new int64_t[2];
38 |         int64_t *Size_A, *Size_B;
39 |         cudaMalloc((void **)&Size_A, sizeof(int64_t) * 2);
40 |         cudaMalloc((void **)&Size_B, sizeof(int64_t) * 2);
41 | 
42 |         receiveMessage(srcX, srcY, 0, 0, Size_A, sizeof(int64_t) * 2);
43 |         receiveMessage(srcX, srcY, 0, 0, Size_B, sizeof(int64_t) * 2);
44 | 
45 |         cudaMemcpy(size_A, Size_A, sizeof(int64_t) * 2, cudaMemcpyDeviceToHost);
46 |         cudaMemcpy(size_B, Size_B, sizeof(int64_t) * 2, cudaMemcpyDeviceToHost);
47 |         Row_A = size_A[0];
48 |         Col_A = size_A[1];
49 |         Row_B = size_B[0];
50 |         Col_B = size_B[1];
51 |         int64_t *C = (int64_t *)malloc(sizeof(int64_t) * Col_B * Row_A);
52 |         int64_t *A = (int64_t *)malloc(sizeof(int64_t) * Row_A * Col_A);
53 | 
54 |         int64_t *d_dataA, *d_dataB, *d_dataC;
55 |         cudaMalloc((void **)&d_dataA, sizeof(int64_t) * Row_A * Col_A);
56 |         cudaMalloc((void **)&d_dataB, sizeof(int64_t) * Row_B * Col_B);
57 |         cudaMalloc((void **)&d_dataC, sizeof(int64_t) * Col_B * Row_A);
58 | 
59 |         receiveMessage(srcX, srcY, 0, 0, d_dataA, Col_A * Row_A * sizeof(int64_t));
60 |         receiveMessage(srcX, srcY, 0, 0, d_dataB, Col_B * Row_B * sizeof(int64_t));
61 | 
62 |         cudaMemcpy(A, d_dataA, sizeof(int64_t) * Col_A * Row_A, cudaMemcpyDeviceToHost);
63 |         for (int64_t i = 0; i < Row_A * Col_A; i++) {
64 |             std::cout << A[i];
65 |             if (i % Col_A == 0 && i != 0)
66 |                 std::cout << std::endl;
67 |             else
68 |                 std::cout << " ";
69 |         }
70 |         // calculate
71 |         dim3 threadPerBlock(BLOCK_DIM, BLOCK_DIM);
72 |         // dim3 blockNumber(1);
73 |         dim3 blockNumber((Col_B + threadPerBlock.x - 1) / threadPerBlock.x,
74 |                          (Row_A + threadPerBlock.y - 1) / threadPerBlock.y);
75 |         matrix_mul_gpu<<<blockNumber, threadPerBlock>>>(d_dataA, d_dataB, d_dataC, Col_A, Row_A,
76 |                                                         Col_B);
77 |         cudaMemcpy(C, d_dataC, sizeof(int64_t) * Row_A * Col_B, cudaMemcpyDeviceToHost);
78 |         for (int64_t i = 0; i < Row_A * Col_B; i++) {
79 |             std::cout << C[i];
80 |             if (i % Col_B == 0 && i != 0)
81 |                 std::cout << std::endl;
82 |             else
83 |                 std::cout << " ";
84 |         }
85 |         sendMessage(0, 0, srcX, srcY, d_dataC, Row_A * Col_B * sizeof(int64_t));
86 |         cudaFree(d_dataA);
87 |         cudaFree(d_dataB);
88 |         cudaFree(d_dataC);
89 |     }
90 |     return 0;
91 | }


--------------------------------------------------------------------------------
/benchmark/MLP/mlp.yml:
--------------------------------------------------------------------------------
 1 | # Phase 1 configuration.
 2 | phase1:
 3 |   # Process 0
 4 |   - cmd: "$BENCHMARK_ROOT/bin/mlp_cu"
 5 |     args: ["0", "1"]
 6 |     log: "gpgpusim.0.1.log"
 7 |     is_to_stdout: false
 8 |     clock_rate: 1
 9 |     pre_copy: "$SIMULATOR_ROOT/gpgpu-sim/configs/tested-cfgs/SM2_GTX480/*"
10 |   # Process 1
11 |   - cmd: "$BENCHMARK_ROOT/bin/mlp_cu"
12 |     args: ["0", "2"]
13 |     log: "gpgpusim.0.2.log"
14 |     is_to_stdout: false
15 |     clock_rate: 1
16 |     pre_copy: "$SIMULATOR_ROOT/gpgpu-sim/configs/tested-cfgs/SM2_GTX480/*"
17 |   # Process 2
18 |   - cmd: "$BENCHMARK_ROOT/bin/mlp_cu"
19 |     args: ["0", "3"]
20 |     log: "gpgpusim.0.3.log"
21 |     is_to_stdout: false
22 |     clock_rate: 1
23 |     pre_copy: "$SIMULATOR_ROOT/gpgpu-sim/configs/tested-cfgs/SM2_GTX480/*"
24 |   # Process 3
25 |   - cmd: "$BENCHMARK_ROOT/bin/mlp_cu"
26 |     args: ["0", "4"]
27 |     log: "gpgpusim.0.4.log"
28 |     is_to_stdout: false
29 |     clock_rate: 1
30 |     pre_copy: "$SIMULATOR_ROOT/gpgpu-sim/configs/tested-cfgs/SM2_GTX480/*"
31 |   # Process 4
32 |   - cmd: "$BENCHMARK_ROOT/bin/mlp_cu"
33 |     args: ["1", "1"]
34 |     log: "gpgpusim.1.1.log"
35 |     is_to_stdout: false
36 |     clock_rate: 1
37 |     pre_copy: "$SIMULATOR_ROOT/gpgpu-sim/configs/tested-cfgs/SM2_GTX480/*"
38 |   # Process 5
39 |   - cmd: "$BENCHMARK_ROOT/bin/mlp_cu"
40 |     args: ["1", "2"]
41 |     log: "gpgpusim.1.2.log"
42 |     is_to_stdout: false
43 |     clock_rate: 1
44 |     pre_copy: "$SIMULATOR_ROOT/gpgpu-sim/configs/tested-cfgs/SM2_GTX480/*"
45 |   # Process 6
46 |   - cmd: "$BENCHMARK_ROOT/bin/mlp_cu"
47 |     args: ["1", "3"]
48 |     log: "gpgpusim.1.3.log"
49 |     is_to_stdout: false
50 |     clock_rate: 1
51 |     pre_copy: "$SIMULATOR_ROOT/gpgpu-sim/configs/tested-cfgs/SM2_GTX480/*"
52 |   # Process 7
53 |   - cmd: "$BENCHMARK_ROOT/bin/mlp_cu"
54 |     args: ["1", "4"]
55 |     log: "gpgpusim.1.4.log"
56 |     is_to_stdout: false
57 |     clock_rate: 1
58 |     pre_copy: "$SIMULATOR_ROOT/gpgpu-sim/configs/tested-cfgs/SM2_GTX480/*"
59 |   # Process 8
60 |   - cmd: "$SIMULATOR_ROOT/snipersim/run-sniper"
61 |     args: ["--", "$BENCHMARK_ROOT/bin/mlp_cpu", "0", "0"]
62 |     log: "sniper.0.0.log"
63 |     is_to_stdout: false
64 |     clock_rate: 1
65 | 
66 | # Phase 2 configuration.
67 | phase2:
68 |   # Process 0
69 |   - cmd: "$SIMULATOR_ROOT/popnet_chiplet/build/popnet"
70 |     args: ["-A", "2", "-c", "2", "-V", "3", "-B", "12", "-O", "12", "-F", "4", "-L", "1000", "-T", "10000000", "-r", "1", "-I", "../bench.txt", "-R", "0", "-D", "../delayInfo.txt", "-P"]
71 |     log: "popnet.log"
72 |     is_to_stdout: false
73 |     clock_rate: 1
74 | 
75 | # File configuration. (Not used yet)
76 | bench_file: "./bench.txt"
77 | delayinfo_file: "./delayInfo.txt"
78 | 


--------------------------------------------------------------------------------
/benchmark/MLP/readData.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from sklearn.datasets import load_breast_cancer,load_iris
 3 | from sklearn.model_selection import train_test_split
 4 | from sklearn.preprocessing import StandardScaler
 5 | from sklearn.metrics import accuracy_score,r2_score
 6 | from sklearn.preprocessing import OneHotEncoder
 7 | import matplotlib.pyplot as plt
 8 | import json
 9 | import pandas as pd
10 | import subprocess
11 | 
12 | data = pd.read_csv('./wine.csv')
13 | print(data)
14 | target_data = data['Class']
15 | feature_data = data.drop(columns='Class')
16 | scaler = StandardScaler()
17 | feature_data = scaler.fit_transform(feature_data)
18 | x_train, x_test, y_train, y_test = train_test_split(feature_data, target_data, test_size=0.2, random_state=2)
19 | y_train,y_test=np.array(y_train),np.array(y_test)
20 | data_dict = {'x_train': x_train.tolist(), 'x_test': x_test.tolist(), 'y_train': y_train.tolist(), 'y_test': y_test.tolist()}
21 | with open('temp_data.json', 'w') as f:
22 |     json.dump(data_dict, f)


--------------------------------------------------------------------------------
/benchmark/matmul/makefile:
--------------------------------------------------------------------------------
 1 | # Project environment
 2 | # SIMULATOR_ROOT, defined by setup_env.sh
 3 | BENCHMARK_ROOT=$(SIMULATOR_ROOT)/benchmark/matmul
 4 | 
 5 | # Compiler environment of C/C++
 6 | CC=g++
 7 | CFLAGS=-Wall -Werror -g -I$(SIMULATOR_ROOT)/interchiplet/includes
 8 | INTERCHIPLET_C_LIB=$(SIMULATOR_ROOT)/interchiplet/lib/libinterchiplet_c.a
 9 | 
10 | # C/C++ Source file
11 | C_SRCS=matmul.cpp
12 | C_OBJS=obj/matmul.o
13 | C_TARGET=bin/matmul_c
14 | 
15 | # Compiler environment of CUDA
16 | NVCC=nvcc
17 | CUFLAGS=--compiler-options -Wall -I$(SIMULATOR_ROOT)/interchiplet/includes
18 | 
19 | # CUDA Source file
20 | CUDA_SRCS=matmul.cu
21 | CUDA_OBJS=cuobj/matmul.o
22 | CUDA_TARGET=bin/matmul_cu
23 | 
24 | all: bin_dir obj_dir cuobj_dir C_target CUDA_target
25 | 
26 | # C language target
27 | C_target: $(C_OBJS)
28 | 	$(CC) $(C_OBJS) $(INTERCHIPLET_C_LIB) -o $(C_TARGET)
29 | 
30 | # CUDA language target
31 | CUDA_target: $(CUDA_OBJS)
32 | 	$(NVCC) -L$(SIMULATOR_ROOT)/gpgpu-sim/lib/$(GPGPUSIM_CONFIG) --cudart shared $(CUDA_OBJS) -o $(CUDA_TARGET)
33 | 
34 | # Rule for C object
35 | obj/%.o: %.cpp
36 | 	$(CC) $(CFLAGS) -c $< -o $@
37 | 
38 | # Rule for Cuda object
39 | cuobj/%.o: %.cu
40 | 	$(NVCC) $(CUFLAGS) -c $< -o $@
41 | 
42 | # Directory for binary files.
43 | bin_dir:
44 | 	mkdir -p bin
45 | 
46 | # Directory for object files for C.
47 | obj_dir:
48 | 	mkdir -p obj
49 | 
50 | # Directory for object files for CUDA.
51 | cuobj_dir:
52 | 	mkdir -p cuobj
53 | 
54 | # Clean generated files.
55 | clean:
56 | 	rm -rf bench.txt delayInfo.txt buffer* message_record.txt
57 | 	rm -rf proc_r*_t* *.log
58 | 	rm -rf obj cuobj bin
59 | 


--------------------------------------------------------------------------------
/benchmark/matmul/matmul.cpp:
--------------------------------------------------------------------------------
 1 | #include <fstream>
 2 | #include <iostream>
 3 | 
 4 | #include "apis_c.h"
 5 | 
 6 | #define Row 100
 7 | #define Col 100
 8 | 
 9 | int idX, idY;
10 | 
11 | int main(int argc, char **argv) {
12 |     idX = atoi(argv[1]);
13 |     idY = atoi(argv[2]);
14 | 
15 |     int64_t *A = (int64_t *)malloc(sizeof(int64_t) * Row * Col);
16 |     int64_t *B = (int64_t *)malloc(sizeof(int64_t) * Row * Col);
17 |     int64_t *C1 = (int64_t *)malloc(sizeof(int64_t) * Col);
18 |     int64_t *C2 = (int64_t *)malloc(sizeof(int64_t) * Col);
19 |     int64_t *C3 = (int64_t *)malloc(sizeof(int64_t) * Col);
20 | 
21 |     for (int i = 0; i < Row * Col; i++) {
22 |         A[i] = rand() % 51;
23 |         B[i] = rand() % 51;
24 |     }
25 | 
26 |     InterChiplet::sendMessage(0, 1, idX, idY, A, 10000 * sizeof(int64_t));
27 |     InterChiplet::sendMessage(1, 0, idX, idY, A, 10000 * sizeof(int64_t));
28 |     InterChiplet::sendMessage(1, 1, idX, idY, A, 10000 * sizeof(int64_t));
29 | 
30 |     InterChiplet::sendMessage(0, 1, idX, idY, B, 10000 * sizeof(int64_t));
31 |     InterChiplet::sendMessage(1, 0, idX, idY, B, 10000 * sizeof(int64_t));
32 |     InterChiplet::sendMessage(1, 1, idX, idY, B, 10000 * sizeof(int64_t));
33 | 
34 |     InterChiplet::receiveMessage(idX, idY, 0, 1, C1, 100 * sizeof(int64_t));
35 |     InterChiplet::receiveMessage(idX, idY, 1, 0, C2, 100 * sizeof(int64_t));
36 |     InterChiplet::receiveMessage(idX, idY, 1, 1, C3, 100 * sizeof(int64_t));
37 | 
38 |     for (int i = 0; i < 100; i++) {
39 |         C1[i] += C2[i];
40 |         C1[i] += C3[i];
41 |     }
42 | }
43 | 


--------------------------------------------------------------------------------
/benchmark/matmul/matmul.cu:
--------------------------------------------------------------------------------
 1 | #include <math.h>
 2 | #include <stdint.h>
 3 | #include <stdio.h>
 4 | #include <sys/time.h>
 5 | 
 6 | #include <fstream>
 7 | #include <iostream>
 8 | #include <string>
 9 | 
10 | #include "apis_cu.h"
11 | #include "cuda_runtime.h"
12 | #include "device_launch_parameters.h"
13 | 
14 | /**
15 |  * 本示例程序为：通过4个GPU chiplet
16 |  * 计算随机数矩阵A（400 * 100）与随机数矩阵B（100 * 400）相乘结果。
17 |  * 由矩阵乘法原理可知，我们可将计算任务划分为4个100*100的矩阵相乘，并将结果相加。
18 |  */
19 | 
20 | #define Row 100
21 | #define Col 100
22 | 
23 | /**
24 |  * 矩阵乘法的核心函数，由每个线程都会运行一次本函数，
25 |  * 根据线程编号不同计算出位于结果矩阵不同位置的数据。
26 |  */
27 | 
28 | __global__ void matrix_mul_gpu(int64_t* M, int64_t* N, int64_t* P, int width) {
29 |     int sumNum = threadIdx.x + threadIdx.y * 10;
30 |     int i = threadIdx.x;
31 |     int j = threadIdx.y;
32 |     int64_t sum = 0;
33 |     for (int k = 0; k < width; k++) {
34 |         int64_t a = M[j * width + k];
35 |         int64_t b = N[k * width + i];
36 |         sum += a * b;
37 |     }
38 |     P[sumNum] = sum;
39 | }
40 | 
41 | int main(int argc, char** argv) {
42 |     // 读取本进程所代表的chiplet编号
43 | 
44 |     int idX = atoi(argv[1]);
45 |     int idY = atoi(argv[2]);
46 |     int64_t *d_dataA, *d_dataB, *d_dataC;
47 |     cudaMalloc((void**)&d_dataA, sizeof(int64_t) * Row * Col);
48 |     cudaMalloc((void**)&d_dataB, sizeof(int64_t) * Row * Col);
49 |     cudaMalloc((void**)&d_dataC, sizeof(int64_t) * Col);
50 | 
51 |     receiveMessage(idX, idY, 0, 0, d_dataA, sizeof(int64_t) * Row * Col);
52 |     receiveMessage(idX, idY, 0, 0, d_dataB, sizeof(int64_t) * Row * Col);
53 | 
54 |     // calculate
55 |     dim3 threadPerBlock(10, 10);
56 |     dim3 blockNumber(1);
57 |     matrix_mul_gpu<<<blockNumber, threadPerBlock>>>(d_dataA, d_dataB, d_dataC, Col);
58 | 
59 |     sendMessage(0, 0, idX, idY, d_dataC, 100 * sizeof(int64_t));
60 |     cudaFree(d_dataA);
61 |     cudaFree(d_dataB);
62 |     cudaFree(d_dataC);
63 |     return 0;
64 | }
65 | 


--------------------------------------------------------------------------------
/benchmark/matmul/matmul.yml:
--------------------------------------------------------------------------------
 1 | # Phase 1 configuration.
 2 | phase1:
 3 |   # Process 0
 4 |   - cmd: "$BENCHMARK_ROOT/bin/matmul_cu"
 5 |     args: ["0", "1"]
 6 |     log: "gpgpusim.0.1.log"
 7 |     is_to_stdout: false
 8 |     clock_rate: 1
 9 |     pre_copy: "$SIMULATOR_ROOT/gpgpu-sim/configs/tested-cfgs/SM2_GTX480/*"
10 |   # Process 1
11 |   - cmd: "$BENCHMARK_ROOT/bin/matmul_cu"
12 |     args: ["1", "0"]
13 |     log: "gpgpusim.1.0.log"
14 |     is_to_stdout: false
15 |     clock_rate: 1
16 |     pre_copy: "$SIMULATOR_ROOT/gpgpu-sim/configs/tested-cfgs/SM2_GTX480/*"
17 |   # Process 2
18 |   - cmd: "$BENCHMARK_ROOT/bin/matmul_cu"
19 |     args: ["1", "1"]
20 |     log: "gpgpusim.1.1.log"
21 |     is_to_stdout: false
22 |     clock_rate: 1
23 |     pre_copy: "$SIMULATOR_ROOT/gpgpu-sim/configs/tested-cfgs/SM2_GTX480/*"
24 |   # Process 3
25 |   - cmd: "$SIMULATOR_ROOT/snipersim/run-sniper"
26 |     args: ["--", "$BENCHMARK_ROOT/bin/matmul_c", "0", "0"]
27 |     log: "sniper.0.0.log"
28 |     is_to_stdout: false
29 |     clock_rate: 1
30 | 
31 | # Phase 2 configuration.
32 | phase2:
33 |   # Process 0
34 |   - cmd: "$SIMULATOR_ROOT/popnet_chiplet/build/popnet"
35 |     args: ["-A", "2", "-c", "2", "-V", "3", "-B", "12", "-O", "12", "-F", "4", "-L", "1000", "-T", "10000000", "-r", "1", "-I", "../bench.txt", "-R", "0", "-D", "../delayInfo.txt", "-P"]
36 |     log: "popnet_0.log"
37 |     is_to_stdout: false
38 |     clock_rate: 1
39 | 
40 | # File configuration. (Not used yet)
41 | bench_file: "./bench.txt"
42 | delayinfo_file: "./delayInfo.txt"
43 | 


--------------------------------------------------------------------------------
/benchmark/test/gem5-arm/barrier/makefile:
--------------------------------------------------------------------------------
 1 | # Project environment
 2 | # SIMULATOR_ROOT, defined by setup_env.sh
 3 | BENCHMARK_ROOT=$(SIMULATOR_ROOT)/benchmark/test/gem5/barrier
 4 | 
 5 | # Compiler environment of C/C++
 6 | CC=aarch64-linux-gnu-g++
 7 | CFLAGS=-Wall -Werror -g -I$(SIMULATOR_ROOT)/interchiplet/includes
 8 | 
 9 | # C/C++ Source file
10 | C_SRCS=test.cpp
11 | C_OBJS=obj/test.o
12 | C_TARGET=bin/test_c
13 | 
14 | all: bin_dir obj_dir C_target
15 | 
16 | # C language target
17 | C_target: $(C_OBJS) obj/interchiplet.o
18 | 	$(CC) $(C_OBJS) obj/interchiplet.o -o $(C_TARGET)
19 | 
20 | # Interchiplet library
21 | obj/interchiplet.o: ../../../../interchiplet/srcs/apis_c.cpp
22 | 	$(CC) $(CFLAGS) -c $< -o $@
23 | 
24 | # Rule for C object
25 | obj/%.o: ../../snipersim/barrier/%.cpp
26 | 	$(CC) $(CFLAGS) -c $< -o $@
27 | 
28 | # Directory for binary files.
29 | bin_dir:
30 | 	mkdir -p bin
31 | 
32 | # Directory for object files for C.
33 | obj_dir:
34 | 	mkdir -p obj
35 | 
36 | # Clean generated files.
37 | clean:
38 | 	rm -rf bench.txt delayInfo.txt buffer* message_record.txt
39 | 	rm -rf proc_r*_t* *.log
40 | 	rm -rf obj bin
41 | 


--------------------------------------------------------------------------------
/benchmark/test/gem5-arm/barrier/test.yml:
--------------------------------------------------------------------------------
 1 | # Phase 1 configuration.
 2 | phase1:
 3 |   # Process 0
 4 |   - cmd: "$SIMULATOR_ROOT/gem5/build/ARM/gem5.opt"
 5 |     args: ["$SIMULATOR_ROOT/gem5/configs/deprecated/example/se.py", "--interp-dir", "/usr/aarch64-linux-gnu", "--redirects", "/lib=/usr/aarch64-linux-gnu/lib", "--cmd", "$BENCHMARK_ROOT/bin/test_c", "-o", "0 0"]
 6 |     log: "gem5.0.0.log"
 7 |     is_to_stdout: false
 8 |     clock_rate: 1
 9 |   # Process 1
10 |   - cmd: "$SIMULATOR_ROOT/gem5/build/ARM/gem5.opt"
11 |     args: ["$SIMULATOR_ROOT/gem5/configs/deprecated/example/se.py", "--interp-dir", "/usr/aarch64-linux-gnu", "--redirects", "/lib=/usr/aarch64-linux-gnu/lib", "--cmd", "$BENCHMARK_ROOT/bin/test_c", "-o", "0 1"]
12 |     log: "gem5.0.1.log"
13 |     is_to_stdout: false
14 |     clock_rate: 1
15 |   # Process 2
16 |   - cmd: "$SIMULATOR_ROOT/gem5/build/ARM/gem5.opt"
17 |     args: ["$SIMULATOR_ROOT/gem5/configs/deprecated/example/se.py", "--interp-dir", "/usr/aarch64-linux-gnu", "--redirects", "/lib=/usr/aarch64-linux-gnu/lib", "--cmd", "$BENCHMARK_ROOT/bin/test_c", "-o", "1 0"]
18 |     log: "gem5.1.0.log"
19 |     is_to_stdout: false
20 |     clock_rate: 1
21 |   # Process 3
22 |   - cmd: "$SIMULATOR_ROOT/gem5/build/ARM/gem5.opt"
23 |     args: ["$SIMULATOR_ROOT/gem5/configs/deprecated/example/se.py", "--interp-dir", "/usr/aarch64-linux-gnu", "--redirects", "/lib=/usr/aarch64-linux-gnu/lib", "--cmd", "$BENCHMARK_ROOT/bin/test_c", "-o", "1 1"]
24 |     log: "gem5.1.1.log"
25 |     is_to_stdout: false
26 |     clock_rate: 1
27 | 
28 | # Phase 2 configuration.
29 | phase2:
30 |   # Process 0
31 |   - cmd: "$SIMULATOR_ROOT/popnet_chiplet/build/popnet"
32 |     args: ["-A", "2", "-c", "2", "-V", "3", "-B", "12", "-O", "12", "-F", "4", "-L", "1000", "-T", "10000000", "-r", "1", "-I", "../bench.txt", "-R", "0", "-D", "../delayInfo.txt", "-P"]
33 |     log: "popnet_0.log"
34 |     is_to_stdout: false
35 |     clock_rate: 1
36 | 
37 | # File configuration. (Not used yet)
38 | bench_file: "./bench.txt"
39 | delayinfo_file: "./delayInfo.txt"
40 | 


--------------------------------------------------------------------------------
/benchmark/test/gem5-arm/launch/makefile:
--------------------------------------------------------------------------------
 1 | # Project environment
 2 | # SIMULATOR_ROOT, defined by setup_env.sh
 3 | BENCHMARK_ROOT=$(SIMULATOR_ROOT)/benchmark/test/gem5/launch
 4 | 
 5 | # Compiler environment of C/C++
 6 | CC=aarch64-linux-gnu-g++
 7 | CFLAGS=-Wall -Werror -g -I$(SIMULATOR_ROOT)/interchiplet/includes
 8 | 
 9 | # C/C++ Source file
10 | C_SRCS=test.cpp
11 | C_OBJS=obj/test.o
12 | C_TARGET=bin/test_c
13 | 
14 | all: bin_dir obj_dir C_target
15 | 
16 | # C language target
17 | C_target: $(C_OBJS) obj/interchiplet.o
18 | 	$(CC) $(C_OBJS) obj/interchiplet.o -o $(C_TARGET)
19 | 
20 | # Interchiplet library
21 | obj/interchiplet.o: ../../../../interchiplet/srcs/apis_c.cpp
22 | 	$(CC) $(CFLAGS) -c $< -o $@
23 | 
24 | # Rule for C object
25 | obj/%.o: ../../snipersim/launch/%.cpp
26 | 	$(CC) $(CFLAGS) -c $< -o $@
27 | 
28 | # Directory for binary files.
29 | bin_dir:
30 | 	mkdir -p bin
31 | 
32 | # Directory for object files for C.
33 | obj_dir:
34 | 	mkdir -p obj
35 | 
36 | # Clean generated files.
37 | clean:
38 | 	rm -rf bench.txt delayInfo.txt buffer* message_record.txt
39 | 	rm -rf proc_r*_t* *.log
40 | 	rm -rf obj bin
41 | 


--------------------------------------------------------------------------------
/benchmark/test/gem5-arm/launch/test.yml:
--------------------------------------------------------------------------------
 1 | # Phase 1 configuration.
 2 | phase1:
 3 |   # Process 0
 4 |   - cmd: "$SIMULATOR_ROOT/gem5/build/ARM/gem5.opt"
 5 |     args: ["$SIMULATOR_ROOT/gem5/configs/deprecated/example/se.py", "--interp-dir", "/usr/aarch64-linux-gnu", "--redirects", "/lib=/usr/aarch64-linux-gnu/lib", "--cmd", "$BENCHMARK_ROOT/bin/test_c", "-o", "0 0"]
 6 |     log: "gem5.0.0.log"
 7 |     is_to_stdout: false
 8 |     clock_rate: 1
 9 |   # Process 1
10 |   - cmd: "$SIMULATOR_ROOT/gem5/build/ARM/gem5.opt"
11 |     args: ["$SIMULATOR_ROOT/gem5/configs/deprecated/example/se.py", "--interp-dir", "/usr/aarch64-linux-gnu", "--redirects", "/lib=/usr/aarch64-linux-gnu/lib", "--cmd", "$BENCHMARK_ROOT/bin/test_c", "-o", "0 1"]
12 |     log: "gem5.0.1.log"
13 |     is_to_stdout: false
14 |     clock_rate: 1
15 |   # Process 2
16 |   - cmd: "$SIMULATOR_ROOT/gem5/build/ARM/gem5.opt"
17 |     args: ["$SIMULATOR_ROOT/gem5/configs/deprecated/example/se.py", "--interp-dir", "/usr/aarch64-linux-gnu", "--redirects", "/lib=/usr/aarch64-linux-gnu/lib", "--cmd", "$BENCHMARK_ROOT/bin/test_c", "-o", "1 0"]
18 |     log: "gem5.1.0.log"
19 |     is_to_stdout: false
20 |     clock_rate: 1
21 |   # Process 3
22 |   - cmd: "$SIMULATOR_ROOT/gem5/build/ARM/gem5.opt"
23 |     args: ["$SIMULATOR_ROOT/gem5/configs/deprecated/example/se.py", "--interp-dir", "/usr/aarch64-linux-gnu", "--redirects", "/lib=/usr/aarch64-linux-gnu/lib", "--cmd", "$BENCHMARK_ROOT/bin/test_c", "-o", "1 1"]
24 |     log: "gem5.1.1.log"
25 |     is_to_stdout: false
26 |     clock_rate: 1
27 | 
28 | # Phase 2 configuration.
29 | phase2:
30 |   # Process 0
31 |   - cmd: "$SIMULATOR_ROOT/popnet_chiplet/build/popnet"
32 |     args: ["-A", "2", "-c", "2", "-V", "3", "-B", "12", "-O", "12", "-F", "4", "-L", "1000", "-T", "10000000", "-r", "1", "-I", "../bench.txt", "-R", "0", "-D", "../delayInfo.txt", "-P"]
33 |     log: "popnet_0.log"
34 |     is_to_stdout: false
35 |     clock_rate: 1
36 | 
37 | # File configuration. (Not used yet)
38 | bench_file: "./bench.txt"
39 | delayinfo_file: "./delayInfo.txt"
40 | 


--------------------------------------------------------------------------------
/benchmark/test/gem5-arm/lock/makefile:
--------------------------------------------------------------------------------
 1 | # Project environment
 2 | # SIMULATOR_ROOT, defined by setup_env.sh
 3 | BENCHMARK_ROOT=$(SIMULATOR_ROOT)/benchmark/test/gem5/lock
 4 | 
 5 | # Compiler environment of C/C++
 6 | CC=aarch64-linux-gnu-g++
 7 | CFLAGS=-Wall -Werror -g -I$(SIMULATOR_ROOT)/interchiplet/includes
 8 | 
 9 | # C/C++ Source file
10 | C_SRCS=test.cpp
11 | C_OBJS=obj/test.o
12 | C_TARGET=bin/test_c
13 | 
14 | all: bin_dir obj_dir C_target
15 | 
16 | # C language target
17 | C_target: $(C_OBJS) obj/interchiplet.o
18 | 	$(CC) $(C_OBJS) obj/interchiplet.o -o $(C_TARGET)
19 | 
20 | # Interchiplet library
21 | obj/interchiplet.o: ../../../../interchiplet/srcs/apis_c.cpp
22 | 	$(CC) $(CFLAGS) -c $< -o $@
23 | 
24 | # Rule for C object
25 | obj/%.o: ../../snipersim/lock/%.cpp
26 | 	$(CC) $(CFLAGS) -c $< -o $@
27 | 
28 | # Directory for binary files.
29 | bin_dir:
30 | 	mkdir -p bin
31 | 
32 | # Directory for object files for C.
33 | obj_dir:
34 | 	mkdir -p obj
35 | 
36 | # Clean generated files.
37 | clean:
38 | 	rm -rf bench.txt delayInfo.txt buffer* message_record.txt
39 | 	rm -rf proc_r*_t* *.log
40 | 	rm -rf obj bin
41 | 


--------------------------------------------------------------------------------
/benchmark/test/gem5-arm/lock/test.yml:
--------------------------------------------------------------------------------
 1 | # Phase 1 configuration.
 2 | phase1:
 3 |   # Process 0
 4 |   - cmd: "$SIMULATOR_ROOT/gem5/build/ARM/gem5.opt"
 5 |     args: ["$SIMULATOR_ROOT/gem5/configs/deprecated/example/se.py", "--interp-dir", "/usr/aarch64-linux-gnu", "--redirects", "/lib=/usr/aarch64-linux-gnu/lib", "--cmd", "$BENCHMARK_ROOT/bin/test_c", "-o", "0 0"]
 6 |     log: "gem5.0.0.log"
 7 |     is_to_stdout: false
 8 |     clock_rate: 1
 9 |   # Process 1
10 |   - cmd: "$SIMULATOR_ROOT/gem5/build/ARM/gem5.opt"
11 |     args: ["$SIMULATOR_ROOT/gem5/configs/deprecated/example/se.py", "--interp-dir", "/usr/aarch64-linux-gnu", "--redirects", "/lib=/usr/aarch64-linux-gnu/lib", "--cmd", "$BENCHMARK_ROOT/bin/test_c", "-o", "0 1"]
12 |     log: "gem5.0.1.log"
13 |     is_to_stdout: false
14 |     clock_rate: 1
15 |   # Process 2
16 |   - cmd: "$SIMULATOR_ROOT/gem5/build/ARM/gem5.opt"
17 |     args: ["$SIMULATOR_ROOT/gem5/configs/deprecated/example/se.py", "--interp-dir", "/usr/aarch64-linux-gnu", "--redirects", "/lib=/usr/aarch64-linux-gnu/lib", "--cmd", "$BENCHMARK_ROOT/bin/test_c", "-o", "1 0"]
18 |     log: "gem5.1.0.log"
19 |     is_to_stdout: false
20 |     clock_rate: 1
21 |   # Process 3
22 |   - cmd: "$SIMULATOR_ROOT/gem5/build/ARM/gem5.opt"
23 |     args: ["$SIMULATOR_ROOT/gem5/configs/deprecated/example/se.py", "--interp-dir", "/usr/aarch64-linux-gnu", "--redirects", "/lib=/usr/aarch64-linux-gnu/lib", "--cmd", "$BENCHMARK_ROOT/bin/test_c", "-o", "1 1"]
24 |     log: "gem5.1.1.log"
25 |     is_to_stdout: false
26 |     clock_rate: 1
27 | 
28 | # Phase 2 configuration.
29 | phase2:
30 |   # Process 0
31 |   - cmd: "$SIMULATOR_ROOT/popnet_chiplet/build/popnet"
32 |     args: ["-A", "2", "-c", "2", "-V", "3", "-B", "12", "-O", "12", "-F", "4", "-L", "1000", "-T", "10000000", "-r", "1", "-I", "../bench.txt", "-R", "0", "-D", "../delayInfo.txt", "-P"]
33 |     log: "popnet_0.log"
34 |     is_to_stdout: false
35 |     clock_rate: 1
36 | 
37 | # File configuration. (Not used yet)
38 | bench_file: "./bench.txt"
39 | delayinfo_file: "./delayInfo.txt"
40 | 


--------------------------------------------------------------------------------
/benchmark/test/gem5-arm/pipe/makefile:
--------------------------------------------------------------------------------
 1 | # Project environment
 2 | # SIMULATOR_ROOT, defined by setup_env.sh
 3 | BENCHMARK_ROOT=$(SIMULATOR_ROOT)/benchmark/test/gem5/pipe
 4 | 
 5 | # Compiler environment of C/C++
 6 | CC=aarch64-linux-gnu-g++
 7 | CFLAGS=-Wall -Werror -g -I$(SIMULATOR_ROOT)/interchiplet/includes
 8 | 
 9 | # C/C++ Source file
10 | C_SRCS=test.cpp
11 | C_OBJS=obj/test.o
12 | C_TARGET=bin/test_c
13 | 
14 | all: bin_dir obj_dir C_target
15 | 
16 | # C language target
17 | C_target: $(C_OBJS) obj/interchiplet.o
18 | 	$(CC) $(C_OBJS) obj/interchiplet.o -o $(C_TARGET)
19 | 
20 | # Interchiplet library
21 | obj/interchiplet.o: ../../../../interchiplet/srcs/apis_c.cpp
22 | 	$(CC) $(CFLAGS) -c $< -o $@
23 | 
24 | # Rule for C object
25 | obj/%.o: ../../snipersim/pipe/%.cpp
26 | 	$(CC) $(CFLAGS) -c $< -o $@
27 | 
28 | # Directory for binary files.
29 | bin_dir:
30 | 	mkdir -p bin
31 | 
32 | # Directory for object files for C.
33 | obj_dir:
34 | 	mkdir -p obj
35 | 
36 | # Clean generated files.
37 | clean:
38 | 	rm -rf bench.txt delayInfo.txt buffer* message_record.txt
39 | 	rm -rf proc_r*_t* *.log
40 | 	rm -rf obj bin
41 | 


--------------------------------------------------------------------------------
/benchmark/test/gem5-arm/pipe/test.yml:
--------------------------------------------------------------------------------
 1 | # Phase 1 configuration.
 2 | phase1:
 3 |   # Process 0
 4 |   - cmd: "$SIMULATOR_ROOT/gem5/build/ARM/gem5.opt"
 5 |     args: ["$SIMULATOR_ROOT/gem5/configs/deprecated/example/se.py", "--interp-dir", "/usr/aarch64-linux-gnu", "--redirects", "/lib=/usr/aarch64-linux-gnu/lib", "--cmd", "$BENCHMARK_ROOT/bin/test_c", "-o", "0 0"]
 6 |     log: "gem5.0.0.log"
 7 |     is_to_stdout: false
 8 |     clock_rate: 1
 9 |   # Process 1
10 |   - cmd: "$SIMULATOR_ROOT/gem5/build/ARM/gem5.opt"
11 |     args: ["$SIMULATOR_ROOT/gem5/configs/deprecated/example/se.py", "--interp-dir", "/usr/aarch64-linux-gnu", "--redirects", "/lib=/usr/aarch64-linux-gnu/lib", "--cmd", "$BENCHMARK_ROOT/bin/test_c", "-o", "0 1"]
12 |     log: "gem5.0.1.log"
13 |     is_to_stdout: false
14 |     clock_rate: 1
15 |   # Process 2
16 |   - cmd: "$SIMULATOR_ROOT/gem5/build/ARM/gem5.opt"
17 |     args: ["$SIMULATOR_ROOT/gem5/configs/deprecated/example/se.py", "--interp-dir", "/usr/aarch64-linux-gnu", "--redirects", "/lib=/usr/aarch64-linux-gnu/lib", "--cmd", "$BENCHMARK_ROOT/bin/test_c", "-o", "1 0"]
18 |     log: "gem5.1.0.log"
19 |     is_to_stdout: false
20 |     clock_rate: 1
21 |   # Process 3
22 |   - cmd: "$SIMULATOR_ROOT/gem5/build/ARM/gem5.opt"
23 |     args: ["$SIMULATOR_ROOT/gem5/configs/deprecated/example/se.py", "--interp-dir", "/usr/aarch64-linux-gnu", "--redirects", "/lib=/usr/aarch64-linux-gnu/lib", "--cmd", "$BENCHMARK_ROOT/bin/test_c", "-o", "1 1"]
24 |     log: "gem5.1.1.log"
25 |     is_to_stdout: false
26 |     clock_rate: 1
27 | 
28 | # Phase 2 configuration.
29 | phase2:
30 |   # Process 0
31 |   - cmd: "$SIMULATOR_ROOT/popnet_chiplet/build/popnet"
32 |     args: ["-A", "2", "-c", "2", "-V", "3", "-B", "12", "-O", "12", "-F", "4", "-L", "1000", "-T", "10000000", "-r", "1", "-I", "../bench.txt", "-R", "0", "-D", "../delayInfo.txt", "-P"]
33 |     log: "popnet_0.log"
34 |     is_to_stdout: false
35 |     clock_rate: 1
36 | 
37 | # File configuration. (Not used yet)
38 | bench_file: "./bench.txt"
39 | delayinfo_file: "./delayInfo.txt"
40 | 


--------------------------------------------------------------------------------
/benchmark/test/gem5/barrier/makefile:
--------------------------------------------------------------------------------
 1 | # Project environment
 2 | # SIMULATOR_ROOT, defined by setup_env.sh
 3 | BENCHMARK_ROOT=$(SIMULATOR_ROOT)/benchmark/test/gem5/barrier
 4 | 
 5 | # Compiler environment of C/C++
 6 | CC=g++
 7 | CFLAGS=-Wall -Werror -g -I$(SIMULATOR_ROOT)/interchiplet/includes
 8 | INTERCHIPLET_C_LIB=$(SIMULATOR_ROOT)/interchiplet/lib/libinterchiplet_c.a
 9 | 
10 | # C/C++ Source file
11 | C_SRCS=test.cpp
12 | C_OBJS=obj/test.o
13 | C_TARGET=bin/test_c
14 | 
15 | all: bin_dir obj_dir C_target
16 | 
17 | # C language target
18 | C_target: $(C_OBJS)
19 | 	$(CC) $(C_OBJS) $(INTERCHIPLET_C_LIB) -o $(C_TARGET)
20 | 
21 | # Rule for C object
22 | obj/%.o: ../../snipersim/barrier/%.cpp
23 | 	$(CC) $(CFLAGS) -c $< -o $@
24 | 
25 | # Directory for binary files.
26 | bin_dir:
27 | 	mkdir -p bin
28 | 
29 | # Directory for object files for C.
30 | obj_dir:
31 | 	mkdir -p obj
32 | 
33 | # Clean generated files.
34 | clean:
35 | 	rm -rf bench.txt delayInfo.txt buffer* message_record.txt
36 | 	rm -rf proc_r*_t* *.log
37 | 	rm -rf obj bin
38 | 


--------------------------------------------------------------------------------
/benchmark/test/gem5/barrier/test_atomic.yml:
--------------------------------------------------------------------------------
 1 | # Phase 1 configuration.
 2 | phase1:
 3 |   # Process 0
 4 |   - cmd: "$SIMULATOR_ROOT/gem5/build/X86/gem5.opt"
 5 |     args: ["$SIMULATOR_ROOT/gem5/configs/deprecated/example/se.py", "--cmd", "$BENCHMARK_ROOT/bin/test_c", "-o", "0 0"]
 6 |     log: "gem5.0.0.log"
 7 |     is_to_stdout: false
 8 |     clock_rate: 500
 9 |   # Process 1
10 |   - cmd: "$SIMULATOR_ROOT/gem5/build/X86/gem5.opt"
11 |     args: ["$SIMULATOR_ROOT/gem5/configs/deprecated/example/se.py", "--cmd", "$BENCHMARK_ROOT/bin/test_c", "-o", "0 1"]
12 |     log: "gem5.0.1.log"
13 |     is_to_stdout: false
14 |     clock_rate: 500
15 |   # Process 2
16 |   - cmd: "$SIMULATOR_ROOT/gem5/build/X86/gem5.opt"
17 |     args: ["$SIMULATOR_ROOT/gem5/configs/deprecated/example/se.py", "--cmd", "$BENCHMARK_ROOT/bin/test_c", "-o", "1 0"]
18 |     log: "gem5.1.0.log"
19 |     is_to_stdout: false
20 |     clock_rate: 500
21 |   # Process 3
22 |   - cmd: "$SIMULATOR_ROOT/gem5/build/X86/gem5.opt"
23 |     args: ["$SIMULATOR_ROOT/gem5/configs/deprecated/example/se.py", "--cmd", "$BENCHMARK_ROOT/bin/test_c", "-o", "1 1"]
24 |     log: "gem5.1.1.log"
25 |     is_to_stdout: false
26 |     clock_rate: 500
27 | 
28 | # Phase 2 configuration.
29 | phase2:
30 |   # Process 0
31 |   - cmd: "$SIMULATOR_ROOT/popnet_chiplet/build/popnet"
32 |     args: ["-A", "2", "-c", "2", "-V", "3", "-B", "12", "-O", "12", "-F", "4", "-L", "1000", "-T", "10000000", "-r", "1", "-I", "../bench.txt", "-R", "0", "-D", "../delayInfo.txt", "-P"]
33 |     log: "popnet_0.log"
34 |     is_to_stdout: false
35 |     clock_rate: 1
36 | 
37 | # File configuration. (Not used yet)
38 | bench_file: "./bench.txt"
39 | delayinfo_file: "./delayInfo.txt"
40 | 


--------------------------------------------------------------------------------
/benchmark/test/gem5/barrier/test_o3.yml:
--------------------------------------------------------------------------------
 1 | # Phase 1 configuration.
 2 | phase1:
 3 |   # Process 0
 4 |   - cmd: "$SIMULATOR_ROOT/gem5/build/X86/gem5.opt"
 5 |     args: ["$SIMULATOR_ROOT/gem5/configs/deprecated/example/se.py", "--cpu-type", "O3CPU", "--caches", "--cmd", "$BENCHMARK_ROOT/bin/test_c", "-o", "0 0"]
 6 |     log: "gem5.0.0.log"
 7 |     is_to_stdout: false
 8 |     clock_rate: 500
 9 |   # Process 1
10 |   - cmd: "$SIMULATOR_ROOT/gem5/build/X86/gem5.opt"
11 |     args: ["$SIMULATOR_ROOT/gem5/configs/deprecated/example/se.py", "--cpu-type", "O3CPU", "--caches", "--cmd", "$BENCHMARK_ROOT/bin/test_c", "-o", "0 1"]
12 |     log: "gem5.0.1.log"
13 |     is_to_stdout: false
14 |     clock_rate: 500
15 |   # Process 2
16 |   - cmd: "$SIMULATOR_ROOT/gem5/build/X86/gem5.opt"
17 |     args: ["$SIMULATOR_ROOT/gem5/configs/deprecated/example/se.py", "--cpu-type", "O3CPU", "--caches", "--cmd", "$BENCHMARK_ROOT/bin/test_c", "-o", "1 0"]
18 |     log: "gem5.1.0.log"
19 |     is_to_stdout: false
20 |     clock_rate: 500
21 |   # Process 3
22 |   - cmd: "$SIMULATOR_ROOT/gem5/build/X86/gem5.opt"
23 |     args: ["$SIMULATOR_ROOT/gem5/configs/deprecated/example/se.py", "--cpu-type", "O3CPU", "--caches", "--cmd", "$BENCHMARK_ROOT/bin/test_c", "-o", "1 1"]
24 |     log: "gem5.1.1.log"
25 |     is_to_stdout: false
26 |     clock_rate: 500
27 | 
28 | # Phase 2 configuration.
29 | phase2:
30 |   # Process 0
31 |   - cmd: "$SIMULATOR_ROOT/popnet_chiplet/build/popnet"
32 |     args: ["-A", "2", "-c", "2", "-V", "3", "-B", "12", "-O", "12", "-F", "4", "-L", "1000", "-T", "10000000", "-r", "1", "-I", "../bench.txt", "-R", "0", "-D", "../delayInfo.txt", "-P"]
33 |     log: "popnet_0.log"
34 |     is_to_stdout: false
35 |     clock_rate: 1
36 | 
37 | # File configuration. (Not used yet)
38 | bench_file: "./bench.txt"
39 | delayinfo_file: "./delayInfo.txt"
40 | 


--------------------------------------------------------------------------------
/benchmark/test/gem5/barrier/test_timing.yml:
--------------------------------------------------------------------------------
 1 | # Phase 1 configuration.
 2 | phase1:
 3 |   # Process 0
 4 |   - cmd: "$SIMULATOR_ROOT/gem5/build/X86/gem5.opt"
 5 |     args: ["$SIMULATOR_ROOT/gem5/configs/deprecated/example/se.py", "--cpu-type", "TimingSimpleCPU", "--caches", "--cmd", "$BENCHMARK_ROOT/bin/test_c", "-o", "0 0"]
 6 |     log: "gem5.0.0.log"
 7 |     is_to_stdout: false
 8 |     clock_rate: 500
 9 |   # Process 1
10 |   - cmd: "$SIMULATOR_ROOT/gem5/build/X86/gem5.opt"
11 |     args: ["$SIMULATOR_ROOT/gem5/configs/deprecated/example/se.py", "--cpu-type", "TimingSimpleCPU", "--caches", "--cmd", "$BENCHMARK_ROOT/bin/test_c", "-o", "0 1"]
12 |     log: "gem5.0.1.log"
13 |     is_to_stdout: false
14 |     clock_rate: 500
15 |   # Process 2
16 |   - cmd: "$SIMULATOR_ROOT/gem5/build/X86/gem5.opt"
17 |     args: ["$SIMULATOR_ROOT/gem5/configs/deprecated/example/se.py", "--cpu-type", "TimingSimpleCPU", "--caches", "--cmd", "$BENCHMARK_ROOT/bin/test_c", "-o", "1 0"]
18 |     log: "gem5.1.0.log"
19 |     is_to_stdout: false
20 |     clock_rate: 500
21 |   # Process 3
22 |   - cmd: "$SIMULATOR_ROOT/gem5/build/X86/gem5.opt"
23 |     args: ["$SIMULATOR_ROOT/gem5/configs/deprecated/example/se.py", "--cpu-type", "TimingSimpleCPU", "--caches", "--cmd", "$BENCHMARK_ROOT/bin/test_c", "-o", "1 1"]
24 |     log: "gem5.1.1.log"
25 |     is_to_stdout: false
26 |     clock_rate: 500
27 | 
28 | # Phase 2 configuration.
29 | phase2:
30 |   # Process 0
31 |   - cmd: "$SIMULATOR_ROOT/popnet_chiplet/build/popnet"
32 |     args: ["-A", "2", "-c", "2", "-V", "3", "-B", "12", "-O", "12", "-F", "4", "-L", "1000", "-T", "10000000", "-r", "1", "-I", "../bench.txt", "-R", "0", "-D", "../delayInfo.txt", "-P"]
33 |     log: "popnet_0.log"
34 |     is_to_stdout: false
35 |     clock_rate: 1
36 | 
37 | # File configuration. (Not used yet)
38 | bench_file: "./bench.txt"
39 | delayinfo_file: "./delayInfo.txt"
40 | 


--------------------------------------------------------------------------------
/benchmark/test/gem5/launch/makefile:
--------------------------------------------------------------------------------
 1 | # Project environment
 2 | # SIMULATOR_ROOT, defined by setup_env.sh
 3 | BENCHMARK_ROOT=$(SIMULATOR_ROOT)/benchmark/test/gem5/launch
 4 | 
 5 | # Compiler environment of C/C++
 6 | CC=g++
 7 | CFLAGS=-Wall -Werror -g -I$(SIMULATOR_ROOT)/interchiplet/includes
 8 | INTERCHIPLET_C_LIB=$(SIMULATOR_ROOT)/interchiplet/lib/libinterchiplet_c.a
 9 | 
10 | # C/C++ Source file
11 | C_SRCS=test.cpp
12 | C_OBJS=obj/test.o
13 | C_TARGET=bin/test_c
14 | 
15 | all: bin_dir obj_dir C_target
16 | 
17 | # C language target
18 | C_target: $(C_OBJS)
19 | 	$(CC) $(C_OBJS) $(INTERCHIPLET_C_LIB) -o $(C_TARGET)
20 | 
21 | # Rule for C object
22 | obj/%.o: ../../snipersim/launch/%.cpp
23 | 	$(CC) $(CFLAGS) -c $< -o $@
24 | 
25 | # Directory for binary files.
26 | bin_dir:
27 | 	mkdir -p bin
28 | 
29 | # Directory for object files for C.
30 | obj_dir:
31 | 	mkdir -p obj
32 | 
33 | # Clean generated files.
34 | clean:
35 | 	rm -rf bench.txt delayInfo.txt buffer* message_record.txt
36 | 	rm -rf proc_r*_t* *.log
37 | 	rm -rf obj bin
38 | 


--------------------------------------------------------------------------------
/benchmark/test/gem5/launch/test_atomic.yml:
--------------------------------------------------------------------------------
 1 | # Phase 1 configuration.
 2 | phase1:
 3 |   # Process 0
 4 |   - cmd: "$SIMULATOR_ROOT/gem5/build/X86/gem5.opt"
 5 |     args: ["$SIMULATOR_ROOT/gem5/configs/deprecated/example/se.py", "--cmd", "$BENCHMARK_ROOT/bin/test_c", "-o", "0 0"]
 6 |     log: "gem5.0.0.log"
 7 |     is_to_stdout: false
 8 |     clock_rate: 500
 9 |   # Process 1
10 |   - cmd: "$SIMULATOR_ROOT/gem5/build/X86/gem5.opt"
11 |     args: ["$SIMULATOR_ROOT/gem5/configs/deprecated/example/se.py", "--cmd", "$BENCHMARK_ROOT/bin/test_c", "-o", "0 1"]
12 |     log: "gem5.0.1.log"
13 |     is_to_stdout: false
14 |     clock_rate: 500
15 |   # Process 2
16 |   - cmd: "$SIMULATOR_ROOT/gem5/build/X86/gem5.opt"
17 |     args: ["$SIMULATOR_ROOT/gem5/configs/deprecated/example/se.py", "--cmd", "$BENCHMARK_ROOT/bin/test_c", "-o", "1 0"]
18 |     log: "gem5.1.0.log"
19 |     is_to_stdout: false
20 |     clock_rate: 500
21 |   # Process 3
22 |   - cmd: "$SIMULATOR_ROOT/gem5/build/X86/gem5.opt"
23 |     args: ["$SIMULATOR_ROOT/gem5/configs/deprecated/example/se.py", "--cmd", "$BENCHMARK_ROOT/bin/test_c", "-o", "1 1"]
24 |     log: "gem5.1.1.log"
25 |     is_to_stdout: false
26 |     clock_rate: 500
27 | 
28 | # Phase 2 configuration.
29 | phase2:
30 |   # Process 0
31 |   - cmd: "$SIMULATOR_ROOT/popnet_chiplet/build/popnet"
32 |     args: ["-A", "2", "-c", "2", "-V", "3", "-B", "12", "-O", "12", "-F", "4", "-L", "1000", "-T", "10000000", "-r", "1", "-I", "../bench.txt", "-R", "0", "-D", "../delayInfo.txt", "-P"]
33 |     log: "popnet_0.log"
34 |     is_to_stdout: false
35 |     clock_rate: 1
36 | 
37 | # File configuration. (Not used yet)
38 | bench_file: "./bench.txt"
39 | delayinfo_file: "./delayInfo.txt"
40 | 


--------------------------------------------------------------------------------
/benchmark/test/gem5/launch/test_o3.yml:
--------------------------------------------------------------------------------
 1 | # Phase 1 configuration.
 2 | phase1:
 3 |   # Process 0
 4 |   - cmd: "$SIMULATOR_ROOT/gem5/build/X86/gem5.opt"
 5 |     args: ["$SIMULATOR_ROOT/gem5/configs/deprecated/example/se.py", "--cpu-type", "O3CPU", "--caches", "--cmd", "$BENCHMARK_ROOT/bin/test_c", "-o", "0 0"]
 6 |     log: "gem5.0.0.log"
 7 |     is_to_stdout: false
 8 |     clock_rate: 500
 9 |   # Process 1
10 |   - cmd: "$SIMULATOR_ROOT/gem5/build/X86/gem5.opt"
11 |     args: ["$SIMULATOR_ROOT/gem5/configs/deprecated/example/se.py", "--cpu-type", "O3CPU", "--caches", "--cmd", "$BENCHMARK_ROOT/bin/test_c", "-o", "0 1"]
12 |     log: "gem5.0.1.log"
13 |     is_to_stdout: false
14 |     clock_rate: 500
15 |   # Process 2
16 |   - cmd: "$SIMULATOR_ROOT/gem5/build/X86/gem5.opt"
17 |     args: ["$SIMULATOR_ROOT/gem5/configs/deprecated/example/se.py", "--cpu-type", "O3CPU", "--caches", "--cmd", "$BENCHMARK_ROOT/bin/test_c", "-o", "1 0"]
18 |     log: "gem5.1.0.log"
19 |     is_to_stdout: false
20 |     clock_rate: 500
21 |   # Process 3
22 |   - cmd: "$SIMULATOR_ROOT/gem5/build/X86/gem5.opt"
23 |     args: ["$SIMULATOR_ROOT/gem5/configs/deprecated/example/se.py", "--cpu-type", "O3CPU", "--caches", "--cmd", "$BENCHMARK_ROOT/bin/test_c", "-o", "1 1"]
24 |     log: "gem5.1.1.log"
25 |     is_to_stdout: false
26 |     clock_rate: 500
27 | 
28 | # Phase 2 configuration.
29 | phase2:
30 |   # Process 0
31 |   - cmd: "$SIMULATOR_ROOT/popnet_chiplet/build/popnet"
32 |     args: ["-A", "2", "-c", "2", "-V", "3", "-B", "12", "-O", "12", "-F", "4", "-L", "1000", "-T", "10000000", "-r", "1", "-I", "../bench.txt", "-R", "0", "-D", "../delayInfo.txt", "-P"]
33 |     log: "popnet_0.log"
34 |     is_to_stdout: false
35 |     clock_rate: 1
36 | 
37 | # File configuration. (Not used yet)
38 | bench_file: "./bench.txt"
39 | delayinfo_file: "./delayInfo.txt"
40 | 


--------------------------------------------------------------------------------
/benchmark/test/gem5/launch/test_timing.yml:
--------------------------------------------------------------------------------
 1 | # Phase 1 configuration.
 2 | phase1:
 3 |   # Process 0
 4 |   - cmd: "$SIMULATOR_ROOT/gem5/build/X86/gem5.opt"
 5 |     args: ["$SIMULATOR_ROOT/gem5/configs/deprecated/example/se.py", "--cpu-type", "TimingSimpleCPU", "--caches", "--cmd", "$BENCHMARK_ROOT/bin/test_c", "-o", "0 0"]
 6 |     log: "gem5.0.0.log"
 7 |     is_to_stdout: false
 8 |     clock_rate: 500
 9 |   # Process 1
10 |   - cmd: "$SIMULATOR_ROOT/gem5/build/X86/gem5.opt"
11 |     args: ["$SIMULATOR_ROOT/gem5/configs/deprecated/example/se.py", "--cpu-type", "TimingSimpleCPU", "--caches", "--cmd", "$BENCHMARK_ROOT/bin/test_c", "-o", "0 1"]
12 |     log: "gem5.0.1.log"
13 |     is_to_stdout: false
14 |     clock_rate: 500
15 |   # Process 2
16 |   - cmd: "$SIMULATOR_ROOT/gem5/build/X86/gem5.opt"
17 |     args: ["$SIMULATOR_ROOT/gem5/configs/deprecated/example/se.py", "--cpu-type", "TimingSimpleCPU", "--caches", "--cmd", "$BENCHMARK_ROOT/bin/test_c", "-o", "1 0"]
18 |     log: "gem5.1.0.log"
19 |     is_to_stdout: false
20 |     clock_rate: 500
21 |   # Process 3
22 |   - cmd: "$SIMULATOR_ROOT/gem5/build/X86/gem5.opt"
23 |     args: ["$SIMULATOR_ROOT/gem5/configs/deprecated/example/se.py", "--cpu-type", "TimingSimpleCPU", "--caches", "--cmd", "$BENCHMARK_ROOT/bin/test_c", "-o", "1 1"]
24 |     log: "gem5.1.1.log"
25 |     is_to_stdout: false
26 |     clock_rate: 500
27 | 
28 | # Phase 2 configuration.
29 | phase2:
30 |   # Process 0
31 |   - cmd: "$SIMULATOR_ROOT/popnet_chiplet/build/popnet"
32 |     args: ["-A", "2", "-c", "2", "-V", "3", "-B", "12", "-O", "12", "-F", "4", "-L", "1000", "-T", "10000000", "-r", "1", "-I", "../bench.txt", "-R", "0", "-D", "../delayInfo.txt", "-P"]
33 |     log: "popnet_0.log"
34 |     is_to_stdout: false
35 |     clock_rate: 1
36 | 
37 | # File configuration. (Not used yet)
38 | bench_file: "./bench.txt"
39 | delayinfo_file: "./delayInfo.txt"
40 | 


--------------------------------------------------------------------------------
/benchmark/test/gem5/lock/makefile:
--------------------------------------------------------------------------------
 1 | # Project environment
 2 | # SIMULATOR_ROOT, defined by setup_env.sh
 3 | BENCHMARK_ROOT=$(SIMULATOR_ROOT)/benchmark/test/gem5/lock
 4 | 
 5 | # Compiler environment of C/C++
 6 | CC=g++
 7 | CFLAGS=-Wall -Werror -g -I$(SIMULATOR_ROOT)/interchiplet/includes
 8 | INTERCHIPLET_C_LIB=$(SIMULATOR_ROOT)/interchiplet/lib/libinterchiplet_c.a
 9 | 
10 | # C/C++ Source file
11 | C_SRCS=test.cpp
12 | C_OBJS=obj/test.o
13 | C_TARGET=bin/test_c
14 | 
15 | all: bin_dir obj_dir C_target
16 | 
17 | # C language target
18 | C_target: $(C_OBJS)
19 | 	$(CC) $(C_OBJS) $(INTERCHIPLET_C_LIB) -o $(C_TARGET)
20 | 
21 | # Rule for C object
22 | obj/%.o: ../../snipersim/lock/%.cpp
23 | 	$(CC) $(CFLAGS) -c $< -o $@
24 | 
25 | # Directory for binary files.
26 | bin_dir:
27 | 	mkdir -p bin
28 | 
29 | # Directory for object files for C.
30 | obj_dir:
31 | 	mkdir -p obj
32 | 
33 | # Clean generated files.
34 | clean:
35 | 	rm -rf bench.txt delayInfo.txt buffer* message_record.txt
36 | 	rm -rf proc_r*_t* *.log
37 | 	rm -rf obj bin
38 | 


--------------------------------------------------------------------------------
/benchmark/test/gem5/lock/test_atomic.yml:
--------------------------------------------------------------------------------
 1 | # Phase 1 configuration.
 2 | phase1:
 3 |   # Process 0
 4 |   - cmd: "$SIMULATOR_ROOT/gem5/build/X86/gem5.opt"
 5 |     args: ["$SIMULATOR_ROOT/gem5/configs/deprecated/example/se.py", "--cmd", "$BENCHMARK_ROOT/bin/test_c", "-o", "0 0"]
 6 |     log: "gem5.0.0.log"
 7 |     is_to_stdout: false
 8 |     clock_rate: 500
 9 |   # Process 1
10 |   - cmd: "$SIMULATOR_ROOT/gem5/build/X86/gem5.opt"
11 |     args: ["$SIMULATOR_ROOT/gem5/configs/deprecated/example/se.py", "--cmd", "$BENCHMARK_ROOT/bin/test_c", "-o", "0 1"]
12 |     log: "gem5.0.1.log"
13 |     is_to_stdout: false
14 |     clock_rate: 500
15 |   # Process 2
16 |   - cmd: "$SIMULATOR_ROOT/gem5/build/X86/gem5.opt"
17 |     args: ["$SIMULATOR_ROOT/gem5/configs/deprecated/example/se.py", "--cmd", "$BENCHMARK_ROOT/bin/test_c", "-o", "1 0"]
18 |     log: "gem5.1.0.log"
19 |     is_to_stdout: false
20 |     clock_rate: 500
21 |   # Process 3
22 |   - cmd: "$SIMULATOR_ROOT/gem5/build/X86/gem5.opt"
23 |     args: ["$SIMULATOR_ROOT/gem5/configs/deprecated/example/se.py", "--cmd", "$BENCHMARK_ROOT/bin/test_c", "-o", "1 1"]
24 |     log: "gem5.1.1.log"
25 |     is_to_stdout: false
26 |     clock_rate: 500
27 | 
28 | # Phase 2 configuration.
29 | phase2:
30 |   # Process 0
31 |   - cmd: "$SIMULATOR_ROOT/popnet_chiplet/build/popnet"
32 |     args: ["-A", "2", "-c", "2", "-V", "3", "-B", "12", "-O", "12", "-F", "4", "-L", "1000", "-T", "10000000", "-r", "1", "-I", "../bench.txt", "-R", "0", "-D", "../delayInfo.txt", "-P"]
33 |     log: "popnet_0.log"
34 |     is_to_stdout: false
35 |     clock_rate: 1
36 | 
37 | # File configuration. (Not used yet)
38 | bench_file: "./bench.txt"
39 | delayinfo_file: "./delayInfo.txt"
40 | 


--------------------------------------------------------------------------------
/benchmark/test/gem5/lock/test_o3.yml:
--------------------------------------------------------------------------------
 1 | # Phase 1 configuration.
 2 | phase1:
 3 |   # Process 0
 4 |   - cmd: "$SIMULATOR_ROOT/gem5/build/X86/gem5.opt"
 5 |     args: ["$SIMULATOR_ROOT/gem5/configs/deprecated/example/se.py", "--cpu-type", "O3CPU", "--caches", "--cmd", "$BENCHMARK_ROOT/bin/test_c", "-o", "0 0"]
 6 |     log: "gem5.0.0.log"
 7 |     is_to_stdout: false
 8 |     clock_rate: 500
 9 |   # Process 1
10 |   - cmd: "$SIMULATOR_ROOT/gem5/build/X86/gem5.opt"
11 |     args: ["$SIMULATOR_ROOT/gem5/configs/deprecated/example/se.py", "--cpu-type", "O3CPU", "--caches", "--cmd", "$BENCHMARK_ROOT/bin/test_c", "-o", "0 1"]
12 |     log: "gem5.0.1.log"
13 |     is_to_stdout: false
14 |     clock_rate: 500
15 |   # Process 2
16 |   - cmd: "$SIMULATOR_ROOT/gem5/build/X86/gem5.opt"
17 |     args: ["$SIMULATOR_ROOT/gem5/configs/deprecated/example/se.py", "--cpu-type", "O3CPU", "--caches", "--cmd", "$BENCHMARK_ROOT/bin/test_c", "-o", "1 0"]
18 |     log: "gem5.1.0.log"
19 |     is_to_stdout: false
20 |     clock_rate: 500
21 |   # Process 3
22 |   - cmd: "$SIMULATOR_ROOT/gem5/build/X86/gem5.opt"
23 |     args: ["$SIMULATOR_ROOT/gem5/configs/deprecated/example/se.py", "--cpu-type", "O3CPU", "--caches", "--cmd", "$BENCHMARK_ROOT/bin/test_c", "-o", "1 1"]
24 |     log: "gem5.1.1.log"
25 |     is_to_stdout: false
26 |     clock_rate: 500
27 | 
28 | # Phase 2 configuration.
29 | phase2:
30 |   # Process 0
31 |   - cmd: "$SIMULATOR_ROOT/popnet_chiplet/build/popnet"
32 |     args: ["-A", "2", "-c", "2", "-V", "3", "-B", "12", "-O", "12", "-F", "4", "-L", "1000", "-T", "10000000", "-r", "1", "-I", "../bench.txt", "-R", "0", "-D", "../delayInfo.txt", "-P"]
33 |     log: "popnet_0.log"
34 |     is_to_stdout: false
35 |     clock_rate: 1
36 | 
37 | # File configuration. (Not used yet)
38 | bench_file: "./bench.txt"
39 | delayinfo_file: "./delayInfo.txt"
40 | 


--------------------------------------------------------------------------------
/benchmark/test/gem5/lock/test_timing.yml:
--------------------------------------------------------------------------------
 1 | # Phase 1 configuration.
 2 | phase1:
 3 |   # Process 0
 4 |   - cmd: "$SIMULATOR_ROOT/gem5/build/X86/gem5.opt"
 5 |     args: ["$SIMULATOR_ROOT/gem5/configs/deprecated/example/se.py", "--cpu-type", "TimingSimpleCPU", "--caches", "--cmd", "$BENCHMARK_ROOT/bin/test_c", "-o", "0 0"]
 6 |     log: "gem5.0.0.log"
 7 |     is_to_stdout: false
 8 |     clock_rate: 500
 9 |   # Process 1
10 |   - cmd: "$SIMULATOR_ROOT/gem5/build/X86/gem5.opt"
11 |     args: ["$SIMULATOR_ROOT/gem5/configs/deprecated/example/se.py", "--cpu-type", "TimingSimpleCPU", "--caches", "--cmd", "$BENCHMARK_ROOT/bin/test_c", "-o", "0 1"]
12 |     log: "gem5.0.1.log"
13 |     is_to_stdout: false
14 |     clock_rate: 500
15 |   # Process 2
16 |   - cmd: "$SIMULATOR_ROOT/gem5/build/X86/gem5.opt"
17 |     args: ["$SIMULATOR_ROOT/gem5/configs/deprecated/example/se.py", "--cpu-type", "TimingSimpleCPU", "--caches", "--cmd", "$BENCHMARK_ROOT/bin/test_c", "-o", "1 0"]
18 |     log: "gem5.1.0.log"
19 |     is_to_stdout: false
20 |     clock_rate: 500
21 |   # Process 3
22 |   - cmd: "$SIMULATOR_ROOT/gem5/build/X86/gem5.opt"
23 |     args: ["$SIMULATOR_ROOT/gem5/configs/deprecated/example/se.py", "--cpu-type", "TimingSimpleCPU", "--caches", "--cmd", "$BENCHMARK_ROOT/bin/test_c", "-o", "1 1"]
24 |     log: "gem5.1.1.log"
25 |     is_to_stdout: false
26 |     clock_rate: 500
27 | 
28 | # Phase 2 configuration.
29 | phase2:
30 |   # Process 0
31 |   - cmd: "$SIMULATOR_ROOT/popnet_chiplet/build/popnet"
32 |     args: ["-A", "2", "-c", "2", "-V", "3", "-B", "12", "-O", "12", "-F", "4", "-L", "1000", "-T", "10000000", "-r", "1", "-I", "../bench.txt", "-R", "0", "-D", "../delayInfo.txt", "-P"]
33 |     log: "popnet_0.log"
34 |     is_to_stdout: false
35 |     clock_rate: 1
36 | 
37 | # File configuration. (Not used yet)
38 | bench_file: "./bench.txt"
39 | delayinfo_file: "./delayInfo.txt"
40 | 


--------------------------------------------------------------------------------
/benchmark/test/gem5/pipe/makefile:
--------------------------------------------------------------------------------
 1 | # Project environment
 2 | # SIMULATOR_ROOT, defined by setup_env.sh
 3 | BENCHMARK_ROOT=$(SIMULATOR_ROOT)/benchmark/test/gem5/pipe
 4 | 
 5 | # Compiler environment of C/C++
 6 | CC=g++
 7 | CFLAGS=-Wall -Werror -g -I$(SIMULATOR_ROOT)/interchiplet/includes
 8 | INTERCHIPLET_C_LIB=$(SIMULATOR_ROOT)/interchiplet/lib/libinterchiplet_c.a
 9 | 
10 | # C/C++ Source file
11 | C_SRCS=test.cpp
12 | C_OBJS=obj/test.o
13 | C_TARGET=bin/test_c
14 | 
15 | all: bin_dir obj_dir C_target
16 | 
17 | # C language target
18 | C_target: $(C_OBJS)
19 | 	$(CC) $(C_OBJS) $(INTERCHIPLET_C_LIB) -o $(C_TARGET)
20 | 
21 | # Rule for C object
22 | obj/%.o: ../../snipersim/pipe/%.cpp
23 | 	$(CC) $(CFLAGS) -c $< -o $@
24 | 
25 | # Directory for binary files.
26 | bin_dir:
27 | 	mkdir -p bin
28 | 
29 | # Directory for object files for C.
30 | obj_dir:
31 | 	mkdir -p obj
32 | 
33 | # Clean generated files.
34 | clean:
35 | 	rm -rf bench.txt delayInfo.txt buffer* message_record.txt
36 | 	rm -rf proc_r*_t* *.log
37 | 	rm -rf obj bin
38 | 


--------------------------------------------------------------------------------
/benchmark/test/gem5/pipe/test_atomic.yml:
--------------------------------------------------------------------------------
 1 | # Phase 1 configuration.
 2 | phase1:
 3 |   # Process 0
 4 |   - cmd: "$SIMULATOR_ROOT/gem5/build/X86/gem5.opt"
 5 |     args: ["$SIMULATOR_ROOT/gem5/configs/deprecated/example/se.py", "--cmd", "$BENCHMARK_ROOT/bin/test_c", "-o", "0 0"]
 6 |     log: "gem5.0.0.log"
 7 |     is_to_stdout: false
 8 |     clock_rate: 500
 9 |   # Process 1
10 |   - cmd: "$SIMULATOR_ROOT/gem5/build/X86/gem5.opt"
11 |     args: ["$SIMULATOR_ROOT/gem5/configs/deprecated/example/se.py", "--cmd", "$BENCHMARK_ROOT/bin/test_c", "-o", "0 1"]
12 |     log: "gem5.0.1.log"
13 |     is_to_stdout: false
14 |     clock_rate: 500
15 |   # Process 2
16 |   - cmd: "$SIMULATOR_ROOT/gem5/build/X86/gem5.opt"
17 |     args: ["$SIMULATOR_ROOT/gem5/configs/deprecated/example/se.py", "--cmd", "$BENCHMARK_ROOT/bin/test_c", "-o", "1 0"]
18 |     log: "gem5.1.0.log"
19 |     is_to_stdout: false
20 |     clock_rate: 500
21 |   # Process 3
22 |   - cmd: "$SIMULATOR_ROOT/gem5/build/X86/gem5.opt"
23 |     args: ["$SIMULATOR_ROOT/gem5/configs/deprecated/example/se.py", "--cmd", "$BENCHMARK_ROOT/bin/test_c", "-o", "1 1"]
24 |     log: "gem5.1.1.log"
25 |     is_to_stdout: false
26 |     clock_rate: 500
27 | 
28 | # Phase 2 configuration.
29 | phase2:
30 |   # Process 0
31 |   - cmd: "$SIMULATOR_ROOT/popnet_chiplet/build/popnet"
32 |     args: ["-A", "2", "-c", "2", "-V", "3", "-B", "12", "-O", "12", "-F", "4", "-L", "1000", "-T", "10000000", "-r", "1", "-I", "../bench.txt", "-R", "0", "-D", "../delayInfo.txt", "-P"]
33 |     log: "popnet_0.log"
34 |     is_to_stdout: false
35 |     clock_rate: 1
36 | 
37 | # File configuration. (Not used yet)
38 | bench_file: "./bench.txt"
39 | delayinfo_file: "./delayInfo.txt"
40 | 


--------------------------------------------------------------------------------
/benchmark/test/gem5/pipe/test_o3.yml:
--------------------------------------------------------------------------------
 1 | # Phase 1 configuration.
 2 | phase1:
 3 |   # Process 0
 4 |   - cmd: "$SIMULATOR_ROOT/gem5/build/X86/gem5.opt"
 5 |     args: ["$SIMULATOR_ROOT/gem5/configs/deprecated/example/se.py", "--cpu-type", "O3CPU", "--caches", "--cmd", "$BENCHMARK_ROOT/bin/test_c", "-o", "0 0"]
 6 |     log: "gem5.0.0.log"
 7 |     is_to_stdout: false
 8 |     clock_rate: 500
 9 |   # Process 1
10 |   - cmd: "$SIMULATOR_ROOT/gem5/build/X86/gem5.opt"
11 |     args: ["$SIMULATOR_ROOT/gem5/configs/deprecated/example/se.py", "--cpu-type", "O3CPU", "--caches", "--cmd", "$BENCHMARK_ROOT/bin/test_c", "-o", "0 1"]
12 |     log: "gem5.0.1.log"
13 |     is_to_stdout: false
14 |     clock_rate: 500
15 |   # Process 2
16 |   - cmd: "$SIMULATOR_ROOT/gem5/build/X86/gem5.opt"
17 |     args: ["$SIMULATOR_ROOT/gem5/configs/deprecated/example/se.py", "--cpu-type", "O3CPU", "--caches", "--cmd", "$BENCHMARK_ROOT/bin/test_c", "-o", "1 0"]
18 |     log: "gem5.1.0.log"
19 |     is_to_stdout: false
20 |     clock_rate: 500
21 |   # Process 3
22 |   - cmd: "$SIMULATOR_ROOT/gem5/build/X86/gem5.opt"
23 |     args: ["$SIMULATOR_ROOT/gem5/configs/deprecated/example/se.py", "--cpu-type", "O3CPU", "--caches", "--cmd", "$BENCHMARK_ROOT/bin/test_c", "-o", "1 1"]
24 |     log: "gem5.1.1.log"
25 |     is_to_stdout: false
26 |     clock_rate: 500
27 | 
28 | # Phase 2 configuration.
29 | phase2:
30 |   # Process 0
31 |   - cmd: "$SIMULATOR_ROOT/popnet_chiplet/build/popnet"
32 |     args: ["-A", "2", "-c", "2", "-V", "3", "-B", "12", "-O", "12", "-F", "4", "-L", "1000", "-T", "10000000", "-r", "1", "-I", "../bench.txt", "-R", "0", "-D", "../delayInfo.txt", "-P"]
33 |     log: "popnet_0.log"
34 |     is_to_stdout: false
35 |     clock_rate: 1
36 | 
37 | # File configuration. (Not used yet)
38 | bench_file: "./bench.txt"
39 | delayinfo_file: "./delayInfo.txt"
40 | 


--------------------------------------------------------------------------------
/benchmark/test/gem5/pipe/test_timing.yml:
--------------------------------------------------------------------------------
 1 | # Phase 1 configuration.
 2 | phase1:
 3 |   # Process 0
 4 |   - cmd: "$SIMULATOR_ROOT/gem5/build/X86/gem5.opt"
 5 |     args: ["$SIMULATOR_ROOT/gem5/configs/deprecated/example/se.py", "--cpu-type", "TimingSimpleCPU", "--caches", "--cmd", "$BENCHMARK_ROOT/bin/test_c", "-o", "0 0"]
 6 |     log: "gem5.0.0.log"
 7 |     is_to_stdout: false
 8 |     clock_rate: 500
 9 |   # Process 1
10 |   - cmd: "$SIMULATOR_ROOT/gem5/build/X86/gem5.opt"
11 |     args: ["$SIMULATOR_ROOT/gem5/configs/deprecated/example/se.py", "--cpu-type", "TimingSimpleCPU", "--caches", "--cmd", "$BENCHMARK_ROOT/bin/test_c", "-o", "0 1"]
12 |     log: "gem5.0.1.log"
13 |     is_to_stdout: false
14 |     clock_rate: 500
15 |   # Process 2
16 |   - cmd: "$SIMULATOR_ROOT/gem5/build/X86/gem5.opt"
17 |     args: ["$SIMULATOR_ROOT/gem5/configs/deprecated/example/se.py", "--cpu-type", "TimingSimpleCPU", "--caches", "--cmd", "$BENCHMARK_ROOT/bin/test_c", "-o", "1 0"]
18 |     log: "gem5.1.0.log"
19 |     is_to_stdout: false
20 |     clock_rate: 500
21 |   # Process 3
22 |   - cmd: "$SIMULATOR_ROOT/gem5/build/X86/gem5.opt"
23 |     args: ["$SIMULATOR_ROOT/gem5/configs/deprecated/example/se.py", "--cpu-type", "TimingSimpleCPU", "--caches", "--cmd", "$BENCHMARK_ROOT/bin/test_c", "-o", "1 1"]
24 |     log: "gem5.1.1.log"
25 |     is_to_stdout: false
26 |     clock_rate: 500
27 | 
28 | # Phase 2 configuration.
29 | phase2:
30 |   # Process 0
31 |   - cmd: "$SIMULATOR_ROOT/popnet_chiplet/build/popnet"
32 |     args: ["-A", "2", "-c", "2", "-V", "3", "-B", "12", "-O", "12", "-F", "4", "-L", "1000", "-T", "10000000", "-r", "1", "-I", "../bench.txt", "-R", "0", "-D", "../delayInfo.txt", "-P"]
33 |     log: "popnet_0.log"
34 |     is_to_stdout: false
35 |     clock_rate: 1
36 | 
37 | # File configuration. (Not used yet)
38 | bench_file: "./bench.txt"
39 | delayinfo_file: "./delayInfo.txt"
40 | 


--------------------------------------------------------------------------------
/benchmark/test/gpgpu-sim/barrier/makefile:
--------------------------------------------------------------------------------
 1 | # Project environment
 2 | # SIMULATOR_ROOT, defined by setup_env.sh
 3 | BENCHMARK_ROOT=$(SIMULATOR_ROOT)/benchmark/test/barrier
 4 | 
 5 | # Compiler environment of C/C++
 6 | CC=g++
 7 | CFLAGS=-Wall -Werror -g -I$(SIMULATOR_ROOT)/interchiplet/includes
 8 | INTERCHIPLET_C_LIB=$(SIMULATOR_ROOT)/interchiplet/lib/libinterchiplet_c.a
 9 | 
10 | # C/C++ Source file
11 | C_SRCS=msnip.cpp
12 | C_OBJS=obj/msnip.o
13 | C_TARGET=bin/msnip_c
14 | 
15 | # Compiler environment of CUDA
16 | NVCC=nvcc
17 | CUFLAGS=--compiler-options -Wall -I$(SIMULATOR_ROOT)/interchiplet/includes
18 | 
19 | # CUDA Source file
20 | CUDA_SRCS=msnip.cu
21 | CUDA_OBJS=cuobj/msnip.o
22 | CUDA_TARGET=bin/msnip_cu
23 | 
24 | all: bin_dir obj_dir cuobj_dir C_target CUDA_target
25 | 
26 | # C language target
27 | C_target: $(C_OBJS)
28 | 	$(CC) $(C_OBJS) $(INTERCHIPLET_C_LIB) -o $(C_TARGET)
29 | 
30 | # CUDA language target
31 | CUDA_target: $(CUDA_OBJS)
32 | 	$(NVCC) -L$(SIMULATOR_ROOT)/gpgpu-sim/lib/$(GPGPUSIM_CONFIG) --cudart shared $(CUDA_OBJS) -o $(CUDA_TARGET)
33 | 
34 | # Rule for C object
35 | obj/%.o: %.cpp
36 | 	$(CC) $(CFLAGS) -c $< -o $@
37 | 
38 | # Rule for Cuda object
39 | cuobj/%.o: %.cu
40 | 	$(NVCC) $(CUFLAGS) -c $< -o $@
41 | 
42 | # Directory for binary files.
43 | bin_dir:
44 | 	mkdir -p bin
45 | 
46 | # Directory for object files for C.
47 | obj_dir:
48 | 	mkdir -p obj
49 | 
50 | # Directory for object files for CUDA.
51 | cuobj_dir:
52 | 	mkdir -p cuobj
53 | 
54 | # Clean generated files.
55 | clean:
56 | 	rm -rf bench.txt delayInfo.txt buffer* message_record.txt
57 | 	rm -rf proc_r*_t* *.log
58 | 	rm -rf obj cuobj bin
59 | 


--------------------------------------------------------------------------------
/benchmark/test/gpgpu-sim/barrier/matmul.cpp:
--------------------------------------------------------------------------------
 1 | #include <fstream>
 2 | #include <iostream>
 3 | 
 4 | #include "apis_c.h"
 5 | #include "unistd.h"
 6 | 
 7 | #define Row 100
 8 | #define Col 100
 9 | 
10 | int idX, idY;
11 | 
12 | int main(int argc, char **argv) {
13 |     idX = atoi(argv[1]);
14 |     idY = atoi(argv[2]);
15 | 
16 |     int64_t *A = (int64_t *)malloc(sizeof(int64_t) * Row * Col);
17 |     int64_t *B = (int64_t *)malloc(sizeof(int64_t) * Row * Col);
18 |     int64_t *C = (int64_t *)malloc(sizeof(int64_t) * Col);
19 | 
20 |     for (int i = 0; i < Row * Col; i++) {
21 |         A[i] = rand() % 51;
22 |         B[i] = rand() % 51;
23 |     }
24 | 
25 |     for (int i = 0; i < 2; i ++)
26 |     {
27 |         // Create time gap between cycles.
28 |         usleep((idX * 2 + idY) * 100);
29 | 
30 |         InterChiplet::launch(1, 1, idX, idY);
31 | 
32 |         InterChiplet::sendMessage(1, 1, idX, idY, A, 10000 * sizeof(int64_t));
33 |         InterChiplet::sendMessage(1, 1, idX, idY, B, 10000 * sizeof(int64_t));
34 | 
35 |         InterChiplet::receiveMessage(idX, idY, 1, 1, C, 100 * sizeof(int64_t));
36 | 
37 |         InterChiplet::barrier(0xFF, idX, idY, 3);
38 |     }
39 | }
40 | 


--------------------------------------------------------------------------------
/benchmark/test/gpgpu-sim/barrier/matmul.cu:
--------------------------------------------------------------------------------
 1 | #include <math.h>
 2 | #include <stdint.h>
 3 | #include <stdio.h>
 4 | #include <sys/time.h>
 5 | 
 6 | #include <fstream>
 7 | #include <iostream>
 8 | #include <string>
 9 | 
10 | #include "apis_cu.h"
11 | #include "cuda_runtime.h"
12 | #include "device_launch_parameters.h"
13 | 
14 | /**
15 |  * 本示例程序为：通过4个GPU chiplet
16 |  * 计算随机数矩阵A（400 * 100）与随机数矩阵B（100 * 400）相乘结果。
17 |  * 由矩阵乘法原理可知，我们可将计算任务划分为4个100*100的矩阵相乘，并将结果相加。
18 |  */
19 | 
20 | #define Row 100
21 | #define Col 100
22 | 
23 | /**
24 |  * 矩阵乘法的核心函数，由每个线程都会运行一次本函数，
25 |  * 根据线程编号不同计算出位于结果矩阵不同位置的数据。
26 |  */
27 | 
28 | __global__ void matrix_mul_gpu(int64_t* M, int64_t* N, int64_t* P, int width) {
29 |     int sumNum = threadIdx.x + threadIdx.y * 10;
30 |     int i = threadIdx.x;
31 |     int j = threadIdx.y;
32 |     int64_t sum = 0;
33 |     for (int k = 0; k < width; k++) {
34 |         int64_t a = M[j * width + k];
35 |         int64_t b = N[k * width + i];
36 |         sum += a * b;
37 |     }
38 |     P[sumNum] = sum;
39 | }
40 | 
41 | int main(int argc, char** argv) {
42 |     // 读取本进程所代表的chiplet编号
43 |     int idX = atoi(argv[1]);
44 |     int idY = atoi(argv[2]);
45 | 
46 |     for (int round = 0; round < 6; round++) {
47 |         int locker_x = -1, locker_y = -1;
48 |         waitLaunch(idX, idY, &locker_x, &locker_y);
49 | 
50 |         int64_t *d_dataA, *d_dataB, *d_dataC;
51 |         cudaMalloc((void**)&d_dataA, sizeof(int64_t) * Row * Col);
52 |         cudaMalloc((void**)&d_dataB, sizeof(int64_t) * Row * Col);
53 |         cudaMalloc((void**)&d_dataC, sizeof(int64_t) * Col);
54 | 
55 |         receiveMessage(idX, idY, locker_x, locker_y, d_dataA, sizeof(int64_t) * Row * Col);
56 |         receiveMessage(idX, idY, locker_x, locker_y, d_dataB, sizeof(int64_t) * Row * Col);
57 | 
58 |         // calculate
59 |         dim3 threadPerBlock(10, 10);
60 |         dim3 blockNumber(1);
61 |         matrix_mul_gpu<<<blockNumber, threadPerBlock>>>(d_dataA, d_dataB, d_dataC, Col);
62 | 
63 |         sendMessage(locker_x, locker_y, idX, idY, d_dataC, 100 * sizeof(int64_t));
64 |         cudaFree(d_dataA);
65 |         cudaFree(d_dataB);
66 |         cudaFree(d_dataC);
67 |     }
68 |     return 0;
69 | }
70 | 


--------------------------------------------------------------------------------
/benchmark/test/gpgpu-sim/barrier/matmul.yml:
--------------------------------------------------------------------------------
 1 | # Phase 1 configuration.
 2 | phase1:
 3 |   # Process 0
 4 |   - cmd: "$SIMULATOR_ROOT/snipersim/run-sniper"
 5 |     args: ["--", "$BENCHMARK_ROOT/bin/msnip_c", "0", "1"]
 6 |     log: "sniper.0.1.log"
 7 |     is_to_stdout: false
 8 |     clock_rate: 1
 9 |   # Process 1
10 |   - cmd: "$SIMULATOR_ROOT/snipersim/run-sniper"
11 |     args: ["--", "$BENCHMARK_ROOT/bin/msnip_c", "1", "0"]
12 |     log: "sniper.1.0.log"
13 |     is_to_stdout: false
14 |     clock_rate: 1
15 |   # Process 2
16 |   - cmd: "$BENCHMARK_ROOT/bin/msnip_cu"
17 |     args: ["1", "1"]
18 |     log: "gpgpusim.1.1.log"
19 |     is_to_stdout: false
20 |     clock_rate: 1
21 |     pre_copy: "$SIMULATOR_ROOT/gpgpu-sim/configs/tested-cfgs/SM2_GTX480/*"
22 |   # Process 3
23 |   - cmd: "$SIMULATOR_ROOT/snipersim/run-sniper"
24 |     args: ["--", "$BENCHMARK_ROOT/bin/msnip_c", "0", "0"]
25 |     log: "sniper.0.0.log"
26 |     is_to_stdout: false
27 |     clock_rate: 1
28 | 
29 | # Phase 2 configuration.
30 | phase2:
31 |   # Process 0
32 |   - cmd: "$SIMULATOR_ROOT/popnet_chiplet/build/popnet"
33 |     args: ["-A", "2", "-c", "2", "-V", "3", "-B", "12", "-O", "12", "-F", "4", "-L", "1000", "-T", "10000000", "-r", "1", "-I", "../bench.txt", "-R", "0", "-D", "../delayInfo.txt", "-P"]
34 |     log: "popnet_0.log"
35 |     is_to_stdout: false
36 |     clock_rate: 1
37 | 
38 | # File configuration. (Not used yet)
39 | bench_file: "./bench.txt"
40 | delayinfo_file: "./delayInfo.txt"
41 | 


--------------------------------------------------------------------------------
/benchmark/test/gpgpu-sim/launch/makefile:
--------------------------------------------------------------------------------
 1 | # Project environment
 2 | # SIMULATOR_ROOT, defined by setup_env.sh
 3 | BENCHMARK_ROOT=$(SIMULATOR_ROOT)/benchmark/test/launch
 4 | 
 5 | # Compiler environment of C/C++
 6 | CC=g++
 7 | CFLAGS=-Wall -Werror -g -I$(SIMULATOR_ROOT)/interchiplet/includes
 8 | INTERCHIPLET_C_LIB=$(SIMULATOR_ROOT)/interchiplet/lib/libinterchiplet_c.a
 9 | 
10 | # C/C++ Source file
11 | C_SRCS=msnip.cpp
12 | C_OBJS=obj/msnip.o
13 | C_TARGET=bin/msnip_c
14 | 
15 | # Compiler environment of CUDA
16 | NVCC=nvcc
17 | CUFLAGS=--compiler-options -Wall -I$(SIMULATOR_ROOT)/interchiplet/includes
18 | 
19 | # CUDA Source file
20 | CUDA_SRCS=msnip.cu
21 | CUDA_OBJS=cuobj/msnip.o
22 | CUDA_TARGET=bin/msnip_cu
23 | 
24 | all: bin_dir obj_dir cuobj_dir C_target CUDA_target
25 | 
26 | # C language target
27 | C_target: $(C_OBJS)
28 | 	$(CC) $(C_OBJS) $(INTERCHIPLET_C_LIB) -o $(C_TARGET)
29 | 
30 | # CUDA language target
31 | CUDA_target: $(CUDA_OBJS)
32 | 	$(NVCC) -L$(SIMULATOR_ROOT)/gpgpu-sim/lib/$(GPGPUSIM_CONFIG) --cudart shared $(CUDA_OBJS) -o $(CUDA_TARGET)
33 | 
34 | # Rule for C object
35 | obj/%.o: %.cpp
36 | 	$(CC) $(CFLAGS) -c $< -o $@
37 | 
38 | # Rule for Cuda object
39 | cuobj/%.o: %.cu
40 | 	$(NVCC) $(CUFLAGS) -c $< -o $@
41 | 
42 | # Directory for binary files.
43 | bin_dir:
44 | 	mkdir -p bin
45 | 
46 | # Directory for object files for C.
47 | obj_dir:
48 | 	mkdir -p obj
49 | 
50 | # Directory for object files for CUDA.
51 | cuobj_dir:
52 | 	mkdir -p cuobj
53 | 
54 | # Clean generated files.
55 | clean:
56 | 	rm -rf bench.txt delayInfo.txt buffer* message_record.txt
57 | 	rm -rf proc_r*_t* *.log
58 | 	rm -rf obj cuobj bin
59 | 


--------------------------------------------------------------------------------
/benchmark/test/gpgpu-sim/launch/matmul.cpp:
--------------------------------------------------------------------------------
 1 | #include <fstream>
 2 | #include <iostream>
 3 | 
 4 | #include "apis_c.h"
 5 | #include "unistd.h"
 6 | 
 7 | #define Row 100
 8 | #define Col 100
 9 | 
10 | int idX, idY;
11 | 
12 | int main(int argc, char **argv) {
13 |     idX = atoi(argv[1]);
14 |     idY = atoi(argv[2]);
15 | 
16 |     int64_t *A = (int64_t *)malloc(sizeof(int64_t) * Row * Col);
17 |     int64_t *B = (int64_t *)malloc(sizeof(int64_t) * Row * Col);
18 |     int64_t *C = (int64_t *)malloc(sizeof(int64_t) * Col);
19 | 
20 |     for (int i = 0; i < Row * Col; i++) {
21 |         A[i] = rand() % 51;
22 |         B[i] = rand() % 51;
23 |     }
24 | 
25 |     for (int i = 0; i < 2; i ++)
26 |     {
27 |         // Create time gap between cycles.
28 |         usleep((idX * 2 + idY) * 100);
29 | 
30 |         InterChiplet::launch(1, 1, idX, idY);
31 | 
32 |         InterChiplet::sendMessage(1, 1, idX, idY, A, 10000 * sizeof(int64_t));
33 |         InterChiplet::sendMessage(1, 1, idX, idY, B, 10000 * sizeof(int64_t));
34 | 
35 |         InterChiplet::receiveMessage(idX, idY, 1, 1, C, 100 * sizeof(int64_t));
36 | 
37 |         InterChiplet::barrier(0xFF, idX, idY, 3);
38 |     }
39 | }
40 | 


--------------------------------------------------------------------------------
/benchmark/test/gpgpu-sim/launch/matmul.cu:
--------------------------------------------------------------------------------
 1 | #include <math.h>
 2 | #include <stdint.h>
 3 | #include <stdio.h>
 4 | #include <sys/time.h>
 5 | 
 6 | #include <fstream>
 7 | #include <iostream>
 8 | #include <string>
 9 | 
10 | #include "apis_cu.h"
11 | #include "cuda_runtime.h"
12 | #include "device_launch_parameters.h"
13 | 
14 | /**
15 |  * 本示例程序为：通过4个GPU chiplet
16 |  * 计算随机数矩阵A（400 * 100）与随机数矩阵B（100 * 400）相乘结果。
17 |  * 由矩阵乘法原理可知，我们可将计算任务划分为4个100*100的矩阵相乘，并将结果相加。
18 |  */
19 | 
20 | #define Row 100
21 | #define Col 100
22 | 
23 | /**
24 |  * 矩阵乘法的核心函数，由每个线程都会运行一次本函数，
25 |  * 根据线程编号不同计算出位于结果矩阵不同位置的数据。
26 |  */
27 | 
28 | __global__ void matrix_mul_gpu(int64_t* M, int64_t* N, int64_t* P, int width) {
29 |     int sumNum = threadIdx.x + threadIdx.y * 10;
30 |     int i = threadIdx.x;
31 |     int j = threadIdx.y;
32 |     int64_t sum = 0;
33 |     for (int k = 0; k < width; k++) {
34 |         int64_t a = M[j * width + k];
35 |         int64_t b = N[k * width + i];
36 |         sum += a * b;
37 |     }
38 |     P[sumNum] = sum;
39 | }
40 | 
41 | int main(int argc, char** argv) {
42 |     // 读取本进程所代表的chiplet编号
43 |     int idX = atoi(argv[1]);
44 |     int idY = atoi(argv[2]);
45 | 
46 |     for (int round = 0; round < 6; round++) {
47 |         int locker_x = -1, locker_y = -1;
48 |         waitLaunch(idX, idY, &locker_x, &locker_y);
49 | 
50 |         int64_t *d_dataA, *d_dataB, *d_dataC;
51 |         cudaMalloc((void**)&d_dataA, sizeof(int64_t) * Row * Col);
52 |         cudaMalloc((void**)&d_dataB, sizeof(int64_t) * Row * Col);
53 |         cudaMalloc((void**)&d_dataC, sizeof(int64_t) * Col);
54 | 
55 |         receiveMessage(idX, idY, locker_x, locker_y, d_dataA, sizeof(int64_t) * Row * Col);
56 |         receiveMessage(idX, idY, locker_x, locker_y, d_dataB, sizeof(int64_t) * Row * Col);
57 | 
58 |         // calculate
59 |         dim3 threadPerBlock(10, 10);
60 |         dim3 blockNumber(1);
61 |         matrix_mul_gpu<<<blockNumber, threadPerBlock>>>(d_dataA, d_dataB, d_dataC, Col);
62 | 
63 |         sendMessage(locker_x, locker_y, idX, idY, d_dataC, 100 * sizeof(int64_t));
64 |         cudaFree(d_dataA);
65 |         cudaFree(d_dataB);
66 |         cudaFree(d_dataC);
67 |     }
68 |     return 0;
69 | }
70 | 


--------------------------------------------------------------------------------
/benchmark/test/gpgpu-sim/launch/matmul.yml:
--------------------------------------------------------------------------------
 1 | # Phase 1 configuration.
 2 | phase1:
 3 |   # Process 0
 4 |   - cmd: "$SIMULATOR_ROOT/snipersim/run-sniper"
 5 |     args: ["--", "$BENCHMARK_ROOT/bin/msnip_c", "0", "1"]
 6 |     log: "sniper.0.1.log"
 7 |     is_to_stdout: false
 8 |     clock_rate: 1
 9 |   # Process 1
10 |   - cmd: "$SIMULATOR_ROOT/snipersim/run-sniper"
11 |     args: ["--", "$BENCHMARK_ROOT/bin/msnip_c", "1", "0"]
12 |     log: "sniper.1.0.log"
13 |     is_to_stdout: false
14 |     clock_rate: 1
15 |   # Process 2
16 |   - cmd: "$BENCHMARK_ROOT/bin/msnip_cu"
17 |     args: ["1", "1"]
18 |     log: "gpgpusim.1.1.log"
19 |     is_to_stdout: false
20 |     clock_rate: 1
21 |     pre_copy: "$SIMULATOR_ROOT/gpgpu-sim/configs/tested-cfgs/SM2_GTX480/*"
22 |   # Process 3
23 |   - cmd: "$SIMULATOR_ROOT/snipersim/run-sniper"
24 |     args: ["--", "$BENCHMARK_ROOT/bin/msnip_c", "0", "0"]
25 |     log: "sniper.0.0.log"
26 |     is_to_stdout: false
27 |     clock_rate: 1
28 | 
29 | # Phase 2 configuration.
30 | phase2:
31 |   # Process 0
32 |   - cmd: "$SIMULATOR_ROOT/popnet_chiplet/build/popnet"
33 |     args: ["-A", "2", "-c", "2", "-V", "3", "-B", "12", "-O", "12", "-F", "4", "-L", "1000", "-T", "10000000", "-r", "1", "-I", "../bench.txt", "-R", "0", "-D", "../delayInfo.txt", "-P"]
34 |     log: "popnet_0.log"
35 |     is_to_stdout: false
36 |     clock_rate: 1
37 | 
38 | # File configuration. (Not used yet)
39 | bench_file: "./bench.txt"
40 | delayinfo_file: "./delayInfo.txt"
41 | 


--------------------------------------------------------------------------------
/benchmark/test/gpgpu-sim/lock/makefile:
--------------------------------------------------------------------------------
 1 | # Project environment
 2 | # SIMULATOR_ROOT, defined by setup_env.sh
 3 | BENCHMARK_ROOT=$(SIMULATOR_ROOT)/benchmark/test/lock
 4 | 
 5 | # Compiler environment of C/C++
 6 | CC=g++
 7 | CFLAGS=-Wall -Werror -g -I$(SIMULATOR_ROOT)/interchiplet/includes
 8 | INTERCHIPLET_C_LIB=$(SIMULATOR_ROOT)/interchiplet/lib/libinterchiplet_c.a
 9 | 
10 | # C/C++ Source file
11 | C_SRCS=msnip.cpp
12 | C_OBJS=obj/msnip.o
13 | C_TARGET=bin/msnip_c
14 | 
15 | # Compiler environment of CUDA
16 | NVCC=nvcc
17 | CUFLAGS=--compiler-options -Wall -I$(SIMULATOR_ROOT)/interchiplet/includes
18 | 
19 | # CUDA Source file
20 | CUDA_SRCS=msnip.cu
21 | CUDA_OBJS=cuobj/msnip.o
22 | CUDA_TARGET=bin/msnip_cu
23 | 
24 | all: bin_dir obj_dir cuobj_dir C_target CUDA_target
25 | 
26 | # C language target
27 | C_target: $(C_OBJS)
28 | 	$(CC) $(C_OBJS) $(INTERCHIPLET_C_LIB) -o $(C_TARGET)
29 | 
30 | # CUDA language target
31 | CUDA_target: $(CUDA_OBJS)
32 | 	$(NVCC) -L$(SIMULATOR_ROOT)/gpgpu-sim/lib/$(GPGPUSIM_CONFIG) --cudart shared $(CUDA_OBJS) -o $(CUDA_TARGET)
33 | 
34 | # Rule for C object
35 | obj/%.o: %.cpp
36 | 	$(CC) $(CFLAGS) -c $< -o $@
37 | 
38 | # Rule for Cuda object
39 | cuobj/%.o: %.cu
40 | 	$(NVCC) $(CUFLAGS) -c $< -o $@
41 | 
42 | # Directory for binary files.
43 | bin_dir:
44 | 	mkdir -p bin
45 | 
46 | # Directory for object files for C.
47 | obj_dir:
48 | 	mkdir -p obj
49 | 
50 | # Directory for object files for CUDA.
51 | cuobj_dir:
52 | 	mkdir -p cuobj
53 | 
54 | # Clean generated files.
55 | clean:
56 | 	rm -rf bench.txt delayInfo.txt buffer* message_record.txt
57 | 	rm -rf proc_r*_t* *.log
58 | 	rm -rf obj cuobj bin
59 | 


--------------------------------------------------------------------------------
/benchmark/test/gpgpu-sim/lock/matmul.cpp:
--------------------------------------------------------------------------------
 1 | #include <fstream>
 2 | #include <iostream>
 3 | 
 4 | #include "apis_c.h"
 5 | #include "unistd.h"
 6 | 
 7 | #define Row 100
 8 | #define Col 100
 9 | 
10 | int idX, idY;
11 | 
12 | int main(int argc, char **argv) {
13 |     idX = atoi(argv[1]);
14 |     idY = atoi(argv[2]);
15 | 
16 |     int64_t *A = (int64_t *)malloc(sizeof(int64_t) * Row * Col);
17 |     int64_t *B = (int64_t *)malloc(sizeof(int64_t) * Row * Col);
18 |     int64_t *C = (int64_t *)malloc(sizeof(int64_t) * Col);
19 | 
20 |     for (int i = 0; i < Row * Col; i++) {
21 |         A[i] = rand() % 51;
22 |         B[i] = rand() % 51;
23 |     }
24 | 
25 |     for (int i = 0; i < 2; i ++)
26 |     {
27 |         // Create time gap between cycles.
28 |         usleep((idX * 2 + idY) * 100);
29 | 
30 |         InterChiplet::launch(1, 1, idX, idY);
31 | 
32 |         InterChiplet::sendMessage(1, 1, idX, idY, A, 10000 * sizeof(int64_t));
33 |         InterChiplet::sendMessage(1, 1, idX, idY, B, 10000 * sizeof(int64_t));
34 | 
35 |         InterChiplet::receiveMessage(idX, idY, 1, 1, C, 100 * sizeof(int64_t));
36 | 
37 |         InterChiplet::barrier(0xFF, idX, idY, 3);
38 |     }
39 | }
40 | 


--------------------------------------------------------------------------------
/benchmark/test/gpgpu-sim/lock/matmul.cu:
--------------------------------------------------------------------------------
 1 | #include <math.h>
 2 | #include <stdint.h>
 3 | #include <stdio.h>
 4 | #include <sys/time.h>
 5 | 
 6 | #include <fstream>
 7 | #include <iostream>
 8 | #include <string>
 9 | 
10 | #include "apis_cu.h"
11 | #include "cuda_runtime.h"
12 | #include "device_launch_parameters.h"
13 | 
14 | /**
15 |  * 本示例程序为：通过4个GPU chiplet
16 |  * 计算随机数矩阵A（400 * 100）与随机数矩阵B（100 * 400）相乘结果。
17 |  * 由矩阵乘法原理可知，我们可将计算任务划分为4个100*100的矩阵相乘，并将结果相加。
18 |  */
19 | 
20 | #define Row 100
21 | #define Col 100
22 | 
23 | /**
24 |  * 矩阵乘法的核心函数，由每个线程都会运行一次本函数，
25 |  * 根据线程编号不同计算出位于结果矩阵不同位置的数据。
26 |  */
27 | 
28 | __global__ void matrix_mul_gpu(int64_t* M, int64_t* N, int64_t* P, int width) {
29 |     int sumNum = threadIdx.x + threadIdx.y * 10;
30 |     int i = threadIdx.x;
31 |     int j = threadIdx.y;
32 |     int64_t sum = 0;
33 |     for (int k = 0; k < width; k++) {
34 |         int64_t a = M[j * width + k];
35 |         int64_t b = N[k * width + i];
36 |         sum += a * b;
37 |     }
38 |     P[sumNum] = sum;
39 | }
40 | 
41 | int main(int argc, char** argv) {
42 |     // 读取本进程所代表的chiplet编号
43 |     int idX = atoi(argv[1]);
44 |     int idY = atoi(argv[2]);
45 | 
46 |     for (int round = 0; round < 6; round++) {
47 |         int locker_x = -1, locker_y = -1;
48 |         waitLaunch(idX, idY, &locker_x, &locker_y);
49 | 
50 |         int64_t *d_dataA, *d_dataB, *d_dataC;
51 |         cudaMalloc((void**)&d_dataA, sizeof(int64_t) * Row * Col);
52 |         cudaMalloc((void**)&d_dataB, sizeof(int64_t) * Row * Col);
53 |         cudaMalloc((void**)&d_dataC, sizeof(int64_t) * Col);
54 | 
55 |         receiveMessage(idX, idY, locker_x, locker_y, d_dataA, sizeof(int64_t) * Row * Col);
56 |         receiveMessage(idX, idY, locker_x, locker_y, d_dataB, sizeof(int64_t) * Row * Col);
57 | 
58 |         // calculate
59 |         dim3 threadPerBlock(10, 10);
60 |         dim3 blockNumber(1);
61 |         matrix_mul_gpu<<<blockNumber, threadPerBlock>>>(d_dataA, d_dataB, d_dataC, Col);
62 | 
63 |         sendMessage(locker_x, locker_y, idX, idY, d_dataC, 100 * sizeof(int64_t));
64 |         cudaFree(d_dataA);
65 |         cudaFree(d_dataB);
66 |         cudaFree(d_dataC);
67 |     }
68 |     return 0;
69 | }
70 | 


--------------------------------------------------------------------------------
/benchmark/test/gpgpu-sim/lock/matmul.yml:
--------------------------------------------------------------------------------
 1 | # Phase 1 configuration.
 2 | phase1:
 3 |   # Process 0
 4 |   - cmd: "$SIMULATOR_ROOT/snipersim/run-sniper"
 5 |     args: ["--", "$BENCHMARK_ROOT/bin/msnip_c", "0", "1"]
 6 |     log: "sniper.0.1.log"
 7 |     is_to_stdout: false
 8 |     clock_rate: 1
 9 |   # Process 1
10 |   - cmd: "$SIMULATOR_ROOT/snipersim/run-sniper"
11 |     args: ["--", "$BENCHMARK_ROOT/bin/msnip_c", "1", "0"]
12 |     log: "sniper.1.0.log"
13 |     is_to_stdout: false
14 |     clock_rate: 1
15 |   # Process 2
16 |   - cmd: "$BENCHMARK_ROOT/bin/msnip_cu"
17 |     args: ["1", "1"]
18 |     log: "gpgpusim.1.1.log"
19 |     is_to_stdout: false
20 |     clock_rate: 1
21 |     pre_copy: "$SIMULATOR_ROOT/gpgpu-sim/configs/tested-cfgs/SM2_GTX480/*"
22 |   # Process 3
23 |   - cmd: "$SIMULATOR_ROOT/snipersim/run-sniper"
24 |     args: ["--", "$BENCHMARK_ROOT/bin/msnip_c", "0", "0"]
25 |     log: "sniper.0.0.log"
26 |     is_to_stdout: false
27 |     clock_rate: 1
28 | 
29 | # Phase 2 configuration.
30 | phase2:
31 |   # Process 0
32 |   - cmd: "$SIMULATOR_ROOT/popnet_chiplet/build/popnet"
33 |     args: ["-A", "2", "-c", "2", "-V", "3", "-B", "12", "-O", "12", "-F", "4", "-L", "1000", "-T", "10000000", "-r", "1", "-I", "../bench.txt", "-R", "0", "-D", "../delayInfo.txt", "-P"]
34 |     log: "popnet_0.log"
35 |     is_to_stdout: false
36 |     clock_rate: 1
37 | 
38 | # File configuration. (Not used yet)
39 | bench_file: "./bench.txt"
40 | delayinfo_file: "./delayInfo.txt"
41 | 


--------------------------------------------------------------------------------
/benchmark/test/gpgpu-sim/pipe/makefile:
--------------------------------------------------------------------------------
 1 | # Project environment
 2 | # SIMULATOR_ROOT, defined by setup_env.sh
 3 | BENCHMARK_ROOT=$(SIMULATOR_ROOT)/benchmark/test/pipe
 4 | 
 5 | # Compiler environment of C/C++
 6 | CC=g++
 7 | CFLAGS=-Wall -Werror -g -I$(SIMULATOR_ROOT)/interchiplet/includes
 8 | INTERCHIPLET_C_LIB=$(SIMULATOR_ROOT)/interchiplet/lib/libinterchiplet_c.a
 9 | 
10 | # C/C++ Source file
11 | C_SRCS=matmul.cpp
12 | C_OBJS=obj/matmul.o
13 | C_TARGET=bin/matmul_c
14 | 
15 | # Compiler environment of CUDA
16 | NVCC=nvcc
17 | CUFLAGS=--compiler-options -Wall -I$(SIMULATOR_ROOT)/interchiplet/includes
18 | 
19 | # CUDA Source file
20 | CUDA_SRCS=matmul.cu
21 | CUDA_OBJS=cuobj/matmul.o
22 | CUDA_TARGET=bin/matmul_cu
23 | 
24 | all: bin_dir obj_dir cuobj_dir C_target CUDA_target
25 | 
26 | # C language target
27 | C_target: $(C_OBJS)
28 | 	$(CC) $(C_OBJS) $(INTERCHIPLET_C_LIB) -o $(C_TARGET)
29 | 
30 | # CUDA language target
31 | CUDA_target: $(CUDA_OBJS)
32 | 	$(NVCC) -L$(SIMULATOR_ROOT)/gpgpu-sim/lib/$(GPGPUSIM_CONFIG) --cudart shared $(CUDA_OBJS) -o $(CUDA_TARGET)
33 | 
34 | # Rule for C object
35 | obj/%.o: %.cpp
36 | 	$(CC) $(CFLAGS) -c $< -o $@
37 | 
38 | # Rule for Cuda object
39 | cuobj/%.o: %.cu
40 | 	$(NVCC) $(CUFLAGS) -c $< -o $@
41 | 
42 | # Directory for binary files.
43 | bin_dir:
44 | 	mkdir -p bin
45 | 
46 | # Directory for object files for C.
47 | obj_dir:
48 | 	mkdir -p obj
49 | 
50 | # Directory for object files for CUDA.
51 | cuobj_dir:
52 | 	mkdir -p cuobj
53 | 
54 | # Clean generated files.
55 | clean:
56 | 	rm -rf bench.txt delayInfo.txt buffer* message_record.txt
57 | 	rm -rf proc_r*_t* *.log
58 | 	rm -rf obj cuobj bin
59 | 


--------------------------------------------------------------------------------
/benchmark/test/gpgpu-sim/pipe/matmul.cpp:
--------------------------------------------------------------------------------
 1 | #include <fstream>
 2 | #include <iostream>
 3 | 
 4 | #include "apis_c.h"
 5 | 
 6 | #define Row 100
 7 | #define Col 100
 8 | 
 9 | int idX, idY;
10 | 
11 | int main(int argc, char **argv) {
12 |     idX = atoi(argv[1]);
13 |     idY = atoi(argv[2]);
14 | 
15 |     int64_t *A = (int64_t *)malloc(sizeof(int64_t) * Row * Col);
16 |     int64_t *B = (int64_t *)malloc(sizeof(int64_t) * Row * Col);
17 |     int64_t *C1 = (int64_t *)malloc(sizeof(int64_t) * Col);
18 |     int64_t *C2 = (int64_t *)malloc(sizeof(int64_t) * Col);
19 |     int64_t *C3 = (int64_t *)malloc(sizeof(int64_t) * Col);
20 | 
21 |     for (int i = 0; i < Row * Col; i++) {
22 |         A[i] = rand() % 51;
23 |         B[i] = rand() % 51;
24 |     }
25 | 
26 |     InterChiplet::sendMessage(0, 1, idX, idY, A, 10000 * sizeof(int64_t));
27 |     InterChiplet::sendMessage(1, 0, idX, idY, A, 10000 * sizeof(int64_t));
28 |     InterChiplet::sendMessage(1, 1, idX, idY, A, 10000 * sizeof(int64_t));
29 | 
30 |     InterChiplet::sendMessage(0, 1, idX, idY, B, 10000 * sizeof(int64_t));
31 |     InterChiplet::sendMessage(1, 0, idX, idY, B, 10000 * sizeof(int64_t));
32 |     InterChiplet::sendMessage(1, 1, idX, idY, B, 10000 * sizeof(int64_t));
33 | 
34 |     InterChiplet::receiveMessage(idX, idY, 0, 1, C1, 100 * sizeof(int64_t));
35 |     InterChiplet::receiveMessage(idX, idY, 1, 0, C2, 100 * sizeof(int64_t));
36 |     InterChiplet::receiveMessage(idX, idY, 1, 1, C3, 100 * sizeof(int64_t));
37 | 
38 |     for (int i = 0; i < 100; i++) {
39 |         C1[i] += C2[i];
40 |         C1[i] += C3[i];
41 |     }
42 | }
43 | 


--------------------------------------------------------------------------------
/benchmark/test/gpgpu-sim/pipe/matmul.cu:
--------------------------------------------------------------------------------
 1 | #include <math.h>
 2 | #include <stdint.h>
 3 | #include <stdio.h>
 4 | #include <sys/time.h>
 5 | 
 6 | #include <fstream>
 7 | #include <iostream>
 8 | #include <string>
 9 | 
10 | #include "apis_cu.h"
11 | #include "cuda_runtime.h"
12 | #include "device_launch_parameters.h"
13 | 
14 | /**
15 |  * 本示例程序为：通过4个GPU chiplet
16 |  * 计算随机数矩阵A（400 * 100）与随机数矩阵B（100 * 400）相乘结果。
17 |  * 由矩阵乘法原理可知，我们可将计算任务划分为4个100*100的矩阵相乘，并将结果相加。
18 |  */
19 | 
20 | #define Row 100
21 | #define Col 100
22 | 
23 | /**
24 |  * 矩阵乘法的核心函数，由每个线程都会运行一次本函数，
25 |  * 根据线程编号不同计算出位于结果矩阵不同位置的数据。
26 |  */
27 | 
28 | __global__ void matrix_mul_gpu(int64_t* M, int64_t* N, int64_t* P, int width) {
29 |     int sumNum = threadIdx.x + threadIdx.y * 10;
30 |     int i = threadIdx.x;
31 |     int j = threadIdx.y;
32 |     int64_t sum = 0;
33 |     for (int k = 0; k < width; k++) {
34 |         int64_t a = M[j * width + k];
35 |         int64_t b = N[k * width + i];
36 |         sum += a * b;
37 |     }
38 |     P[sumNum] = sum;
39 | }
40 | 
41 | int main(int argc, char** argv) {
42 |     // 读取本进程所代表的chiplet编号
43 | 
44 |     int idX = atoi(argv[1]);
45 |     int idY = atoi(argv[2]);
46 |     int64_t *d_dataA, *d_dataB, *d_dataC;
47 |     cudaMalloc((void**)&d_dataA, sizeof(int64_t) * Row * Col);
48 |     cudaMalloc((void**)&d_dataB, sizeof(int64_t) * Row * Col);
49 |     cudaMalloc((void**)&d_dataC, sizeof(int64_t) * Col);
50 | 
51 |     receiveMessage(idX, idY, 0, 0, d_dataA, sizeof(int64_t) * Row * Col);
52 |     receiveMessage(idX, idY, 0, 0, d_dataB, sizeof(int64_t) * Row * Col);
53 | 
54 |     // calculate
55 |     dim3 threadPerBlock(10, 10);
56 |     dim3 blockNumber(1);
57 |     matrix_mul_gpu<<<blockNumber, threadPerBlock>>>(d_dataA, d_dataB, d_dataC, Col);
58 | 
59 |     sendMessage(0, 0, idX, idY, d_dataC, 100 * sizeof(int64_t));
60 |     cudaFree(d_dataA);
61 |     cudaFree(d_dataB);
62 |     cudaFree(d_dataC);
63 |     return 0;
64 | }
65 | 


--------------------------------------------------------------------------------
/benchmark/test/gpgpu-sim/pipe/matmul.yml:
--------------------------------------------------------------------------------
 1 | # Phase 1 configuration.
 2 | phase1:
 3 |   # Process 0
 4 |   - cmd: "$BENCHMARK_ROOT/bin/matmul_cu"
 5 |     args: ["0", "1"]
 6 |     log: "gpgpusim.0.1.log"
 7 |     is_to_stdout: false
 8 |     clock_rate: 1
 9 |     pre_copy: "$SIMULATOR_ROOT/gpgpu-sim/configs/tested-cfgs/SM2_GTX480/*"
10 |   # Process 1
11 |   - cmd: "$BENCHMARK_ROOT/bin/matmul_cu"
12 |     args: ["1", "0"]
13 |     log: "gpgpusim.1.0.log"
14 |     is_to_stdout: false
15 |     clock_rate: 1
16 |     pre_copy: "$SIMULATOR_ROOT/gpgpu-sim/configs/tested-cfgs/SM2_GTX480/*"
17 |   # Process 2
18 |   - cmd: "$BENCHMARK_ROOT/bin/matmul_cu"
19 |     args: ["1", "1"]
20 |     log: "gpgpusim.1.1.log"
21 |     is_to_stdout: false
22 |     clock_rate: 1
23 |     pre_copy: "$SIMULATOR_ROOT/gpgpu-sim/configs/tested-cfgs/SM2_GTX480/*"
24 |   # Process 3
25 |   - cmd: "$SIMULATOR_ROOT/snipersim/run-sniper"
26 |     args: ["--", "$BENCHMARK_ROOT/bin/matmul_c", "0", "0"]
27 |     log: "sniper.0.0.log"
28 |     is_to_stdout: false
29 |     clock_rate: 1
30 | 
31 | # Phase 2 configuration.
32 | phase2:
33 |   # Process 0
34 |   - cmd: "$SIMULATOR_ROOT/popnet_chiplet/build/popnet"
35 |     args: ["-A", "2", "-c", "2", "-V", "3", "-B", "12", "-O", "12", "-F", "4", "-L", "1000", "-T", "10000000", "-r", "1", "-I", "../bench.txt", "-R", "0", "-D", "../delayInfo.txt", "-P"]
36 |     log: "popnet_0.log"
37 |     is_to_stdout: false
38 |     clock_rate: 1
39 | 
40 | # File configuration. (Not used yet)
41 | bench_file: "./bench.txt"
42 | delayinfo_file: "./delayInfo.txt"
43 | 


--------------------------------------------------------------------------------
/benchmark/test/snipersim/barrier/makefile:
--------------------------------------------------------------------------------
 1 | # Project environment
 2 | # SIMULATOR_ROOT, defined by setup_env.sh
 3 | BENCHMARK_ROOT=$(SIMULATOR_ROOT)/benchmark/test/snipersim/barrier
 4 | 
 5 | # Compiler environment of C/C++
 6 | CC=g++
 7 | CFLAGS=-Wall -Werror -g -I$(SIMULATOR_ROOT)/interchiplet/includes
 8 | INTERCHIPLET_C_LIB=$(SIMULATOR_ROOT)/interchiplet/lib/libinterchiplet_c.a
 9 | 
10 | # C/C++ Source file
11 | C_SRCS=test.cpp
12 | C_OBJS=obj/test.o
13 | C_TARGET=bin/test_c
14 | 
15 | all: bin_dir obj_dir C_target
16 | 
17 | # C language target
18 | C_target: $(C_OBJS)
19 | 	$(CC) $(C_OBJS) $(INTERCHIPLET_C_LIB) -o $(C_TARGET)
20 | 
21 | # Rule for C object
22 | obj/%.o: %.cpp
23 | 	$(CC) $(CFLAGS) -c $< -o $@
24 | 
25 | # Directory for binary files.
26 | bin_dir:
27 | 	mkdir -p bin
28 | 
29 | # Directory for object files for C.
30 | obj_dir:
31 | 	mkdir -p obj
32 | 
33 | # Clean generated files.
34 | clean:
35 | 	rm -rf bench.txt delayInfo.txt buffer* message_record.txt
36 | 	rm -rf proc_r*_t* *.log
37 | 	rm -rf obj bin
38 | 


--------------------------------------------------------------------------------
/benchmark/test/snipersim/barrier/test.cpp:
--------------------------------------------------------------------------------
 1 | #include <fstream>
 2 | #include <iostream>
 3 | 
 4 | #include "apis_c.h"
 5 | #include "unistd.h"
 6 | 
 7 | #define Row 100
 8 | #define Col 100
 9 | 
10 | int idX, idY;
11 | 
12 | int main(int argc, char **argv) {
13 |     idX = atoi(argv[1]);
14 |     idY = atoi(argv[2]);
15 | 
16 |     int delay_count[2] = {0, 0};
17 | 
18 |     if (idX == 0 && idY == 0) {
19 |         delay_count[0] = 5000;
20 |         delay_count[1] = 4000;
21 |     } else if (idX == 0 && idY == 1) {
22 |         delay_count[0] = 1000;
23 |         delay_count[1] = 3000;
24 |     } else if (idX == 1 && idY == 0) {
25 |         delay_count[0] = 2000;
26 |         delay_count[1] = 1000;
27 |     } else if (idX == 1 && idY == 1) {
28 |         delay_count[0] = 2000;
29 |         delay_count[1] = 4000;
30 |     }
31 | 
32 |     int64_t sum = 0;
33 | 
34 |     for (int r = 0; r < 2; r ++) {
35 |         // Create time gap between threads.
36 |         for (int j = 0; j < delay_count[r]; j++) {
37 |             sum += rand() % 10;
38 |         }
39 | 
40 |         InterChiplet::barrier(255, idX, idY, 4);
41 |     }
42 |     std::cout << "Sum = " << sum << std::endl;
43 | }
44 | 


--------------------------------------------------------------------------------
/benchmark/test/snipersim/barrier/test.yml:
--------------------------------------------------------------------------------
 1 | # Phase 1 configuration.
 2 | phase1:
 3 |   # Process 0
 4 |   - cmd: "$SIMULATOR_ROOT/snipersim/run-sniper"
 5 |     args: ["--", "$BENCHMARK_ROOT/bin/test_c", "0", "0"]
 6 |     log: "sniper.0.0.log"
 7 |     is_to_stdout: false
 8 |     clock_rate: 1
 9 |   # Process 1
10 |   - cmd: "$SIMULATOR_ROOT/snipersim/run-sniper"
11 |     args: ["--", "$BENCHMARK_ROOT/bin/test_c", "0", "1"]
12 |     log: "sniper.0.1.log"
13 |     is_to_stdout: false
14 |     clock_rate: 1
15 |   # Process 2
16 |   - cmd: "$SIMULATOR_ROOT/snipersim/run-sniper"
17 |     args: ["--", "$BENCHMARK_ROOT/bin/test_c", "1", "0"]
18 |     log: "sniper.1.0.log"
19 |     is_to_stdout: false
20 |     clock_rate: 1
21 |   # Process 3
22 |   - cmd: "$SIMULATOR_ROOT/snipersim/run-sniper"
23 |     args: ["--", "$BENCHMARK_ROOT/bin/test_c", "1", "1"]
24 |     log: "sniper.1.1.log"
25 |     is_to_stdout: false
26 |     clock_rate: 1
27 | 
28 | # Phase 2 configuration.
29 | phase2:
30 |   # Process 0
31 |   - cmd: "$SIMULATOR_ROOT/popnet_chiplet/build/popnet"
32 |     args: ["-A", "2", "-c", "2", "-V", "3", "-B", "12", "-O", "12", "-F", "4", "-L", "1000", "-T", "10000000", "-r", "1", "-I", "../bench.txt", "-R", "0", "-D", "../delayInfo.txt", "-P"]
33 |     log: "popnet_0.log"
34 |     is_to_stdout: false
35 |     clock_rate: 1
36 | 
37 | # File configuration. (Not used yet)
38 | bench_file: "./bench.txt"
39 | delayinfo_file: "./delayInfo.txt"
40 | 


--------------------------------------------------------------------------------
/benchmark/test/snipersim/launch/makefile:
--------------------------------------------------------------------------------
 1 | # Project environment
 2 | # SIMULATOR_ROOT, defined by setup_env.sh
 3 | BENCHMARK_ROOT=$(SIMULATOR_ROOT)/benchmark/test/snipersim/launch
 4 | 
 5 | # Compiler environment of C/C++
 6 | CC=g++
 7 | CFLAGS=-Wall -Werror -g -I$(SIMULATOR_ROOT)/interchiplet/includes
 8 | INTERCHIPLET_C_LIB=$(SIMULATOR_ROOT)/interchiplet/lib/libinterchiplet_c.a
 9 | 
10 | # C/C++ Source file
11 | C_SRCS=test.cpp
12 | C_OBJS=obj/test.o
13 | C_TARGET=bin/test_c
14 | 
15 | all: bin_dir obj_dir C_target
16 | 
17 | # C language target
18 | C_target: $(C_OBJS)
19 | 	$(CC) $(C_OBJS) $(INTERCHIPLET_C_LIB) -o $(C_TARGET)
20 | 
21 | # Rule for C object
22 | obj/%.o: %.cpp
23 | 	$(CC) $(CFLAGS) -c $< -o $@
24 | 
25 | # Directory for binary files.
26 | bin_dir:
27 | 	mkdir -p bin
28 | 
29 | # Directory for object files for C.
30 | obj_dir:
31 | 	mkdir -p obj
32 | 
33 | # Clean generated files.
34 | clean:
35 | 	rm -rf bench.txt delayInfo.txt buffer* message_record.txt
36 | 	rm -rf proc_r*_t* *.log
37 | 	rm -rf obj bin
38 | 


--------------------------------------------------------------------------------
/benchmark/test/snipersim/launch/test.cpp:
--------------------------------------------------------------------------------
 1 | #include <fstream>
 2 | #include <iostream>
 3 | 
 4 | #include "apis_c.h"
 5 | #include "unistd.h"
 6 | 
 7 | #define Row 100
 8 | #define Col 100
 9 | 
10 | int idX, idY;
11 | 
12 | int main(int argc, char **argv) {
13 |     idX = atoi(argv[1]);
14 |     idY = atoi(argv[2]);
15 | 
16 |     // Core (0,0), wait launch
17 |     if (idX == 0 && idY == 0) {
18 |         for (int r = 0; r < 6; r ++) {
19 |             int64_t srcX = -1, srcY = -1;
20 |             InterChiplet::waitLaunch(0, 0, &srcX, &srcY);
21 | 
22 |             int64_t *A = (int64_t *)malloc(sizeof(int64_t) * Row * Col);
23 | 
24 |             int64_t sum = 0;
25 |             for (int i = 0; i < Row * Col; i++) {
26 |                 sum = sum + A[i];
27 |             }
28 | 
29 |             InterChiplet::sendMessage(srcX, srcY, 0, 0, &sum, sizeof(int64_t));
30 |         }
31 |     }
32 |     // Core (0,1),(1,0),(1,1), launch
33 |      else {
34 |         int delay_count[2] = {0, 0};
35 | 
36 |         if (idX == 0 && idY == 0) {
37 |             delay_count[0] = 5000;
38 |             delay_count[1] = 4000;
39 |         } else if (idX == 0 && idY == 1) {
40 |             delay_count[0] = 1000;
41 |             delay_count[1] = 3000;
42 |         } else if (idX == 1 && idY == 0) {
43 |             delay_count[0] = 2000;
44 |             delay_count[1] = 1000;
45 |         } else if (idX == 1 && idY == 1) {
46 |             delay_count[0] = 2000;
47 |             delay_count[1] = 4000;
48 |         }
49 | 
50 |         int64_t sum = 0;
51 |         for (int r = 0; r < 2; r ++) {
52 |             // Create time gap between threads.
53 |             for (int j = 0; j < delay_count[r]; j++) {
54 |                 sum += rand() % 10;
55 |             }
56 | 
57 |             InterChiplet::launch(0, 0, idX, idY);
58 | 
59 |             // Read result from Core (0,0)
60 |             int64_t result;
61 |             InterChiplet::receiveMessage(idX, idY, 0, 0, &result, sizeof(int64_t));
62 | 
63 |             sum = sum + result;
64 |         }
65 |         std::cout << "Sum = " << sum << std::endl;
66 |     }
67 | }
68 | 


--------------------------------------------------------------------------------
/benchmark/test/snipersim/launch/test.yml:
--------------------------------------------------------------------------------
 1 | # Phase 1 configuration.
 2 | phase1:
 3 |   # Process 0
 4 |   - cmd: "$SIMULATOR_ROOT/snipersim/run-sniper"
 5 |     args: ["--", "$BENCHMARK_ROOT/bin/test_c", "0", "0"]
 6 |     log: "sniper.0.0.log"
 7 |     is_to_stdout: false
 8 |     clock_rate: 1
 9 |   # Process 1
10 |   - cmd: "$SIMULATOR_ROOT/snipersim/run-sniper"
11 |     args: ["--", "$BENCHMARK_ROOT/bin/test_c", "0", "1"]
12 |     log: "sniper.0.1.log"
13 |     is_to_stdout: false
14 |     clock_rate: 1
15 |   # Process 2
16 |   - cmd: "$SIMULATOR_ROOT/snipersim/run-sniper"
17 |     args: ["--", "$BENCHMARK_ROOT/bin/test_c", "1", "0"]
18 |     log: "sniper.1.0.log"
19 |     is_to_stdout: false
20 |     clock_rate: 1
21 |   # Process 3
22 |   - cmd: "$SIMULATOR_ROOT/snipersim/run-sniper"
23 |     args: ["--", "$BENCHMARK_ROOT/bin/test_c", "1", "1"]
24 |     log: "sniper.1.1.log"
25 |     is_to_stdout: false
26 |     clock_rate: 1
27 | 
28 | # Phase 2 configuration.
29 | phase2:
30 |   # Process 0
31 |   - cmd: "$SIMULATOR_ROOT/popnet_chiplet/build/popnet"
32 |     args: ["-A", "2", "-c", "2", "-V", "3", "-B", "12", "-O", "12", "-F", "4", "-L", "1000", "-T", "10000000", "-r", "1", "-I", "../bench.txt", "-R", "0", "-D", "../delayInfo.txt", "-P"]
33 |     log: "popnet_0.log"
34 |     is_to_stdout: false
35 |     clock_rate: 1
36 | 
37 | # File configuration. (Not used yet)
38 | bench_file: "./bench.txt"
39 | delayinfo_file: "./delayInfo.txt"
40 | 


--------------------------------------------------------------------------------
/benchmark/test/snipersim/lock/makefile:
--------------------------------------------------------------------------------
 1 | # Project environment
 2 | # SIMULATOR_ROOT, defined by setup_env.sh
 3 | BENCHMARK_ROOT=$(SIMULATOR_ROOT)/benchmark/test/snipersim/lock
 4 | 
 5 | # Compiler environment of C/C++
 6 | CC=g++
 7 | CFLAGS=-Wall -Werror -g -I$(SIMULATOR_ROOT)/interchiplet/includes
 8 | INTERCHIPLET_C_LIB=$(SIMULATOR_ROOT)/interchiplet/lib/libinterchiplet_c.a
 9 | 
10 | # C/C++ Source file
11 | C_SRCS=test.cpp
12 | C_OBJS=obj/test.o
13 | C_TARGET=bin/test_c
14 | 
15 | all: bin_dir obj_dir C_target
16 | 
17 | # C language target
18 | C_target: $(C_OBJS)
19 | 	$(CC) $(C_OBJS) $(INTERCHIPLET_C_LIB) -o $(C_TARGET)
20 | 
21 | # Rule for C object
22 | obj/%.o: %.cpp
23 | 	$(CC) $(CFLAGS) -c $< -o $@
24 | 
25 | # Directory for binary files.
26 | bin_dir:
27 | 	mkdir -p bin
28 | 
29 | # Directory for object files for C.
30 | obj_dir:
31 | 	mkdir -p obj
32 | 
33 | # Clean generated files.
34 | clean:
35 | 	rm -rf bench.txt delayInfo.txt buffer* message_record.txt
36 | 	rm -rf proc_r*_t* *.log
37 | 	rm -rf obj bin
38 | 


--------------------------------------------------------------------------------
/benchmark/test/snipersim/lock/test.cpp:
--------------------------------------------------------------------------------
 1 | #include <cstdint>
 2 | #include <fstream>
 3 | #include <iostream>
 4 | 
 5 | #include "apis_c.h"
 6 | #include "unistd.h"
 7 | 
 8 | #define Row 100
 9 | #define Col 100
10 | 
11 | int idX, idY;
12 | 
13 | int main(int argc, char **argv) {
14 |     idX = atoi(argv[1]);
15 |     idY = atoi(argv[2]);
16 | 
17 |     int delay_count[3] = {0, 0, 0};
18 |     if (idX == 0 && idY == 0) {
19 |         delay_count[0] = 5000;
20 |         delay_count[1] = 4000;
21 |         delay_count[2] = 3000;
22 |     } else if (idX == 0 && idY == 1) {
23 |         delay_count[0] = 1000;
24 |         delay_count[1] = 3000;
25 |         delay_count[2] = 5000;
26 |     } else if (idX == 1 && idY == 0) {
27 |         delay_count[0] = 2000;
28 |         delay_count[1] = 1000;
29 |         delay_count[2] = 0000;
30 |     } else if (idX == 1 && idY == 1) {
31 |         delay_count[0] = 2000;
32 |         delay_count[1] = 4000;
33 |         delay_count[2] = 6000;
34 |     }
35 | 
36 |     int64_t sum = 0;
37 |     for (int i = 0; i < 3; i++) {
38 |         // Create time gap between threads.
39 |         for (int j = 0; j < delay_count[i]; j++) {
40 |             sum += rand() % 10;
41 |         }
42 | 
43 |         InterChiplet::lock(255, idX, idY);
44 | 
45 |         for (int j = 0; j < delay_count[i]; j++) {
46 |             sum += rand() % 10;
47 |         }
48 | 
49 |         InterChiplet::unlock(255, idX, idY);
50 |     }
51 | 
52 |     std::cout << "Sum = " << sum << std::endl;
53 | 
54 |     return 0;
55 | }
56 | 


--------------------------------------------------------------------------------
/benchmark/test/snipersim/lock/test.yml:
--------------------------------------------------------------------------------
 1 | # Phase 1 configuration.
 2 | phase1:
 3 |   # Process 0
 4 |   - cmd: "$SIMULATOR_ROOT/snipersim/run-sniper"
 5 |     args: ["--", "$BENCHMARK_ROOT/bin/test_c", "0", "0"]
 6 |     log: "sniper.0.0.log"
 7 |     is_to_stdout: false
 8 |     clock_rate: 1
 9 |   # Process 1
10 |   - cmd: "$SIMULATOR_ROOT/snipersim/run-sniper"
11 |     args: ["--", "$BENCHMARK_ROOT/bin/test_c", "0", "1"]
12 |     log: "sniper.0.1.log"
13 |     is_to_stdout: false
14 |     clock_rate: 1
15 |   # Process 2
16 |   - cmd: "$SIMULATOR_ROOT/snipersim/run-sniper"
17 |     args: ["--", "$BENCHMARK_ROOT/bin/test_c", "1", "0"]
18 |     log: "sniper.1.0.log"
19 |     is_to_stdout: false
20 |     clock_rate: 1
21 |   # Process 3
22 |   - cmd: "$SIMULATOR_ROOT/snipersim/run-sniper"
23 |     args: ["--", "$BENCHMARK_ROOT/bin/test_c", "1", "1"]
24 |     log: "sniper.1.1.log"
25 |     is_to_stdout: false
26 |     clock_rate: 1
27 | 
28 | # Phase 2 configuration.
29 | phase2:
30 |   # Process 0
31 |   - cmd: "$SIMULATOR_ROOT/popnet_chiplet/build/popnet"
32 |     args: ["-A", "2", "-c", "2", "-V", "3", "-B", "12", "-O", "12", "-F", "4", "-L", "1000", "-T", "10000000", "-r", "1", "-I", "../bench.txt", "-R", "0", "-D", "../delayInfo.txt", "-P"]
33 |     log: "popnet_0.log"
34 |     is_to_stdout: false
35 |     clock_rate: 1
36 | 
37 | # File configuration. (Not used yet)
38 | bench_file: "./bench.txt"
39 | delayinfo_file: "./delayInfo.txt"
40 | 


--------------------------------------------------------------------------------
/benchmark/test/snipersim/pipe/makefile:
--------------------------------------------------------------------------------
 1 | # Project environment
 2 | # SIMULATOR_ROOT, defined by setup_env.sh
 3 | BENCHMARK_ROOT=$(SIMULATOR_ROOT)/benchmark/test/snipersim/pipe
 4 | 
 5 | # Compiler environment of C/C++
 6 | CC=g++
 7 | CFLAGS=-Wall -Werror -g -I$(SIMULATOR_ROOT)/interchiplet/includes
 8 | INTERCHIPLET_C_LIB=$(SIMULATOR_ROOT)/interchiplet/lib/libinterchiplet_c.a
 9 | 
10 | # C/C++ Source file
11 | C_SRCS=test.cpp
12 | C_OBJS=obj/test.o
13 | C_TARGET=bin/test_c
14 | 
15 | all: bin_dir obj_dir C_target
16 | 
17 | # C language target
18 | C_target: $(C_OBJS)
19 | 	$(CC) $(C_OBJS) $(INTERCHIPLET_C_LIB) -o $(C_TARGET)
20 | 
21 | # Rule for C object
22 | obj/%.o: %.cpp
23 | 	$(CC) $(CFLAGS) -c $< -o $@
24 | 
25 | # Directory for binary files.
26 | bin_dir:
27 | 	mkdir -p bin
28 | 
29 | # Directory for object files for C.
30 | obj_dir:
31 | 	mkdir -p obj
32 | 
33 | # Clean generated files.
34 | clean:
35 | 	rm -rf bench.txt delayInfo.txt buffer* message_record.txt
36 | 	rm -rf proc_r*_t* *.log
37 | 	rm -rf obj bin
38 | 


--------------------------------------------------------------------------------
/benchmark/test/snipersim/pipe/test.cpp:
--------------------------------------------------------------------------------
 1 | #include <fstream>
 2 | #include <iostream>
 3 | #include <cstring>
 4 | 
 5 | #include "apis_c.h"
 6 | 
 7 | #define Row 100
 8 | #define Col 100
 9 | 
10 | int idX, idY;
11 | 
12 | int main(int argc, char **argv) {
13 |     idX = atoi(argv[1]);
14 |     idY = atoi(argv[2]);
15 | 
16 |     // Test Purpose:
17 |     //  Communication (0,0) -> (0,1) -> (1,0) -> (1,1) -> (0,0)
18 | 
19 |     // Core (0,0)
20 |     if (idX == 0 && idY == 0) {
21 |         int64_t *A = (int64_t *)malloc(sizeof(int64_t) * Row * Col);
22 |         int64_t *B = (int64_t *)malloc(sizeof(int64_t) * Row * Col);
23 | 
24 |         // Initialization value.
25 |         for (int i = 0; i < Row * Col; i++) {
26 |             A[i] = rand() % 51;
27 |         }
28 | 
29 |         // Send message to (0,1)
30 |         InterChiplet::sendMessage(0, 1, idX, idY, A, Row * Col * sizeof(int64_t));
31 |         // Receive message from (1,1)
32 |         InterChiplet::receiveMessage(idX, idY, 1, 1, B, Row * Col * sizeof(int64_t));
33 | 
34 |         // Check result
35 |         for (int i = 0; i < Row * Col; i++) {
36 |             if (A[i] != B[i]) {
37 |                 std::cout << "Data check error!" << std::endl;
38 |                 return 1;
39 |             }
40 |         }
41 |         std::cout << "Data check PASS!" << std::endl;
42 | 
43 |         return 0;
44 |     }
45 |     // Core (0,1)
46 |     else if (idX == 0 && idY == 1) {
47 |         int64_t *A = (int64_t *)malloc(sizeof(int64_t) * Row * Col);
48 |         memset(A, 0, Row * Col * sizeof(int64_t));
49 |         // Receive message from (0,0)
50 |         InterChiplet::receiveMessage(idX, idY, 0, 0, A, Row * Col * sizeof(int64_t));
51 |         // Send message to (1,0)
52 |         InterChiplet::sendMessage(1, 0, idX, idY, A, Row * Col * sizeof(int64_t));
53 | 
54 |         return 0;
55 |     }
56 |     // Core (1,0)
57 |     else if (idX == 1 && idY == 0) {
58 |         int64_t *A = (int64_t *)malloc(sizeof(int64_t) * Row * Col);
59 |         memset(A, 0, Row * Col * sizeof(int64_t));
60 |         // Receive message from (0,1)
61 |         InterChiplet::receiveMessage(idX, idY, 0, 1, A, Row * Col * sizeof(int64_t));
62 |         // Send message to (1,1)
63 |         InterChiplet::sendMessage(1, 1, idX, idY, A, Row * Col * sizeof(int64_t));
64 | 
65 |         return 0;
66 |     }
67 |     // Core (1,1)
68 |     else if (idX == 1 && idY == 1) {
69 |         int64_t *A = (int64_t *)malloc(sizeof(int64_t) * Row * Col);
70 |         memset(A, 0, Row * Col * sizeof(int64_t));
71 |         // Receive message from (0,0)
72 |         InterChiplet::receiveMessage(idX, idY, 1, 0, A, Row * Col * sizeof(int64_t));
73 |         // Send message to (1,0)
74 |         InterChiplet::sendMessage(0, 0, idX, idY, A, Row * Col * sizeof(int64_t));
75 | 
76 |         return 0;
77 |     }
78 | }
79 | 


--------------------------------------------------------------------------------
/benchmark/test/snipersim/pipe/test.yml:
--------------------------------------------------------------------------------
 1 | # Phase 1 configuration.
 2 | phase1:
 3 |   # Process 0
 4 |   - cmd: "$SIMULATOR_ROOT/snipersim/run-sniper"
 5 |     args: ["--", "$BENCHMARK_ROOT/bin/test_c", "0", "0"]
 6 |     log: "sniper.0.0.log"
 7 |     is_to_stdout: false
 8 |     clock_rate: 1
 9 |   # Process 1
10 |   - cmd: "$SIMULATOR_ROOT/snipersim/run-sniper"
11 |     args: ["--", "$BENCHMARK_ROOT/bin/test_c", "0", "1"]
12 |     log: "sniper.0.1.log"
13 |     is_to_stdout: false
14 |     clock_rate: 1
15 |   # Process 2
16 |   - cmd: "$SIMULATOR_ROOT/snipersim/run-sniper"
17 |     args: ["--", "$BENCHMARK_ROOT/bin/test_c", "1", "0"]
18 |     log: "sniper.1.0.log"
19 |     is_to_stdout: false
20 |     clock_rate: 1
21 |   # Process 3
22 |   - cmd: "$SIMULATOR_ROOT/snipersim/run-sniper"
23 |     args: ["--", "$BENCHMARK_ROOT/bin/test_c", "1", "1"]
24 |     log: "sniper.1.1.log"
25 |     is_to_stdout: false
26 |     clock_rate: 1
27 | 
28 | # Phase 2 configuration.
29 | phase2:
30 |   # Process 0
31 |   - cmd: "$SIMULATOR_ROOT/popnet_chiplet/build/popnet"
32 |     args: ["-A", "2", "-c", "2", "-V", "3", "-B", "12", "-O", "12", "-F", "4", "-L", "1000", "-T", "10000000", "-r", "1", "-I", "../bench.txt", "-R", "0", "-D", "../delayInfo.txt", "-P"]
33 |     log: "popnet_0.log"
34 |     is_to_stdout: false
35 |     clock_rate: 1
36 | 
37 | # File configuration. (Not used yet)
38 | bench_file: "./bench.txt"
39 | delayinfo_file: "./delayInfo.txt"
40 | 


--------------------------------------------------------------------------------
/docs/.gitignore:
--------------------------------------------------------------------------------
1 | 
2 | site
3 | html
4 | latex
5 | 


--------------------------------------------------------------------------------
/docs/docs/02-benchmarks/022-yaml.md:
--------------------------------------------------------------------------------
 1 | 
 2 | # YAML Configuration File
 3 | 
 4 | You can learn the following topics from this page:
 5 | 
 6 | - Create the configuration file (YAML format) of a novel benchmark.
 7 | 
 8 | ## YAML Configuration File Format
 9 | 
10 | The execution process is controlled by a YAML configuration file. One benchmark must have at least one YAML configuration file. More configuration files can be created to describe different configurations of one benchmark.
11 | 
12 | The example structure of the YAML file is as follows:
13 | 
14 | ```yaml
15 | # Phase 1 configuration.
16 | phase1:
17 |   # Process 0
18 |   - cmd: "$BENCHMARK_ROOT/bin/matmul_cu"
19 |     args: ["0", "1"]
20 |     log: "gpgpusim.0.1.log"
21 |     is_to_stdout: false
22 |     pre_copy: "$SIMULATOR_ROOT/gpgpu-sim/configs/tested-cfgs/SM2_GTX480/*"
23 |     clock_rate: 1
24 |   # Process 1
25 |   - cmd: "$BENCHMARK_ROOT/bin/matmul_cu"
26 |     args: ["1", "0"]
27 |     log: "gpgpusim.1.0.log"
28 |     is_to_stdout: false
29 |     pre_copy: "$SIMULATOR_ROOT/gpgpu-sim/configs/tested-cfgs/SM2_GTX480/*"
30 |     clock_rate: 1
31 |   ......
32 | 
33 | # Phase 2 configuration.
34 | phase2:
35 |   # Process 0
36 |   - cmd: "$SIMULATOR_ROOT/popnet_chiplet/build/popnet"
37 |     args: ["-A", "2", "-c", "2", "-V", "3", "-B", "12", "-O", "12", "-F", "4", "-L", "1000", "-T", "10000000", "-r", "1", "-I", "../bench.txt", "-R", "0", "-D", "../delayInfo.txt", "-P"]
38 |     log: "popnet_0.log"
39 |     is_to_stdout: false
40 |     clock_rate: 1
41 | ```
42 | 
43 | In the above configuration files, the first-level tags are
44 | 
45 | * `phase1` provides the configuration for processes in Phase 1.
46 | * `phase2` provides the configuration for processes in Phase 2.
47 | 
48 | Both `phase1` and `phase2` accept a list of process configuration structures. Each structure corresponds to one parallel simulator process.
49 | 
50 | Configuration structures provide the following tags:
51 | 
52 | - `cmd` ppresents the command of the simulator. A string is accepted. The environment variables `$BENCHMARK_ROOT` and `$SIMULATOR_ROOT` are supported to describe the path of the simulator.
53 | - `args` presents the arguments of the simulator. A list of strings is accepted. The environment variables `$BENCHMARK_ROOT` and `$SIMULATOR_ROOT` are also supported to specify the path of related files. `cmd` and `args` combine the SHELL command to execute one simulator.
54 | - `log` presents the name of the logger. A string is accepted. Neither the absolute path nor the related path is supported. The log file is stored in the sub-directory of each simulation process.
55 | - `is_to_stdout` presents whether the standard output and standard error output of this simulator process are redirected to the standard output of the intercoupled.
56 | - `pre_copy` provides a list of files that should be copied to the sub-directory of this simulation process before calling the simulator. A string is accepted. If there are multiple files to copy, files are separated by space.
57 | - `clock_rate` provides a floating-pointing number as the ratio between chiplet clocks (clocks of simulators) and the system clock (clock of the *interchiplet*).
58 | 
59 | > TODO: Change pre_copy to pre_cmd.
60 | 
61 | The following commands are supported when writing one benchmark configuration file.
62 | 
63 | - `$BENCHMARK_ROOT` presents the root path of the benchmark, specified by the location of the YAML configuration file.
64 | - `$SIMULATOR_ROOT` presents the root path of the LegoSim, set by *setup_env.sh*.
65 | 


--------------------------------------------------------------------------------
/docs/docs/03-sync-proto/030-net-proto.md:
--------------------------------------------------------------------------------
  1 | 
  2 | # Interconnection Simulation
  3 | 
  4 | LegoSim applies Popnet as the interconnection simulator to provide the latency of each transaction. Popnet simulates the interconnection behavior according to the benchmark file provided by Phase 1. Popnet generates one latency information file, providing the latency of each package.
  5 | 
  6 | The latency of packages is related to the source-destination address and the network congestion. The source-destination address does not change crossing iterations. The same degree of network congestion leads to similar latency information. Hence, the latency information generated by Phase 2 can be used in Phase 1 of the next iteration.
  7 | 
  8 | Each pair of WRITE and READ commands generates one transaction in the interconnection. In particular, WRITE commands with LOCK, UNLOCK, and BARRIER descriptors generate transactions in the interconnection without pairing READ commands.
  9 | 
 10 | ## File format
 11 | 
 12 | LegoSim uses the trace file and the latency information file to communicate with Popnet.
 13 | 
 14 | ### Trace file format
 15 | 
 16 | The trace file is one text file. Each line presents one transaction in the interconnection, as shown below.
 17 | 
 18 | ```
 19 | <src_cycle> <dst_cycle> <src_x> <src_y> <dst_x> <dst_y> <flit_num> <desc>
 20 | ```
 21 | 
 22 | Fields in the trace file are filled by the value from the WRITE and READ commands.
 23 | 
 24 | | Field in trace file | Field in WRITE command | Field in READ command | Description |
 25 | | ---- | ---- | ---- | ---- |
 26 | | `src_cycle` | `cycle` | | Start cycle of the transaction from the source's view. |
 27 | | `dst_cycle` | | `cycle` | Start cycle of the transaction form the destination's view. |
 28 | | `src_x`, `src_y` | `src_x`, `src_y` | `src_x`, `src_y` | Source address. |
 29 | | `dst_x`, `dst_y` | `dst_x`, `dst_y` | `dst_x`, `dst_y` | Destination address. |
 30 | | `flit_num` | | | Flit number. $\text{ceil}(nbytes / payload\#) + 1$. |
 31 | | `desc`  | `desc` | `desc` | Behavior of transaction. |
 32 | 
 33 | 
 34 | The trace file is generated by *interchiplet*. *interchiplet* always keeps packages in the order of the package injection cycle. For example,
 35 | 
 36 | ```
 37 | 2846470 0 0 0 0 1 1251 0
 38 | 2847814 0 0 0 1 0 1251 0
 39 | 2849309 0 0 0 1 1 1251 0
 40 | 2850905 2847725 0 0 0 1 1251 0
 41 | 2852501 2849069 0 0 1 0 1251 0
 42 | 2854098 2850569 0 0 1 1 1251 0
 43 | 2875272 2855527 0 1 0 0 14 0
 44 | 2876868 2875644 1 0 0 0 14 0
 45 | 2878470 2877240 1 1 0 0 14 0
 46 | ```
 47 | 
 48 | > TODO: flexible format for address. Flexible format for different kinds of PComps and SComps.
 49 | 
 50 | ### Latency information file format
 51 | 
 52 | The latency information file is also one text file. Each line provides the latency information for one transaction in the interconnection, as shown below.
 53 | 
 54 | ```
 55 | <cycle> <src_x> <src_y> <dst_x> <dst_y> <desc> <lat_num> [<lat_0> <lat_1> ...]
 56 | ```
 57 | 
 58 | `cycle` presents the start cycle of one transaction from the source's view as the `src_cycle` field in the trace file. `src_x`, `src_y`, `dst_x`, and `dst_y` present the source and destination address as the trace file. `desc` describes the behavior of the transaction as the trace file.
 59 | 
 60 | For each transaction, latency information provides the latency of each package related to one transaction. For example, normal communication transactions only provide the latency of one package. BARRIER/LOCK/UNLOCK/LAUNCH transactions provide the latency of one request package and one acknowledge package.
 61 | 
 62 | `lat_num` presents the number of provided latency values. Each package has two latency values. `lat_{2i}` and `lat_{2i+1}` are the latency of the `i`-th package from the source's and destination's views.
 63 | 
 64 | | Transaction's behavior | `lat_num` | `lat_0` and `lat_1` | `lat_2` and `lat_3` |
 65 | | ---- | :----: | ---- | ---- |
 66 | | Normal | 2 | Normal transfer package | |
 67 | | LAUNCH | 4 | Request package | Acknowledge package |
 68 | | BARRIER | 4 | Request package | Acknowledge package |
 69 | | LOCK | 4 | Request package | Acknowledge package |
 70 | | UNLOCK | 4 | Request package | Acknowledge package |
 71 | 
 72 | Popnet does not need to sort packages. *interchiplet* will reorder packages according to the package injection cycle when loading files. One example of latency information files is shown below:
 73 | 
 74 | ```
 75 | 2847106 0 0 0 1 0 2 1250 1255
 76 | 2848612 0 0 1 0 0 2 1250 1255
 77 | 2850107 0 0 1 1 0 2 1250 1260
 78 | 2851703 0 0 0 1 0 2 1250 1255
 79 | 2853299 0 0 1 0 0 2 1250 1255
 80 | 2854896 0 0 1 1 0 2 1250 1260
 81 | 2876066 0 1 0 0 0 2 13 18
 82 | 2877662 1 0 0 0 0 2 13 18
 83 | 2879259 1 1 0 0 0 2 13 23
 84 | ```
 85 | 
 86 | > TODO: flexible format for address. Flexible format for different kinds of PComps and SComps.
 87 | 
 88 | ## Sorting Latency Information
 89 | 
 90 | *interchiplet* needs to rebuild the order of transactions according to the latency information file.
 91 | 
 92 | Transactions are organized by a two-level structure. The first level is a map of addresses and ordered queues of latency information. The second level is one queue of latency information ordered by cycles.
 93 | 
 94 | |  | Ordered content | Key of map | Key for ordering |
 95 | | ---- | ---- | ---- | ---- |
 96 | | Order of Transaction | All latency informations | Source addresses | Transaction start cycle |
 97 | | Order of Launch | Latency information of Launch transactions | Destination addresses | Cycle when the request package arrives the destination |
 98 | | Order of Lock and unlock | Latency information of Lock/unlock transactions | Destination addresses | Cycle when the request package arrives the destination |
 99 | 
100 | ## TODO
101 | 
102 | The network latency is provided by Phase 2 of the previous iteration. *interchiplet* will load all delay information before starting the simulation processes in Phase 1. When it receives the paired READ and WRITE commands, it will search for the first delay information message with the same source and destination. The matched delay information message will be dropped after use.
103 | 
104 | There is no implicit guarantee that the number and the order of transactions do not change crossing the iteration. Because the simulation flow is based on iteration, the difference crossing iteration should reduce as the simulation continues.
105 | 
106 | If the network delay from SComps is missing, the network delay only considers the propagation delay, which equals the data amount divided by network bandwidth. In the first iteration, the network delay is determined in the same way.
107 | 


--------------------------------------------------------------------------------
/docs/docs/03-sync-proto/032-barrier.md:
--------------------------------------------------------------------------------
  1 | # Barrier
  2 | 
  3 | You can find the following topics on this page:
  4 | 
  5 | - The sequences of synchronization protocol to handle barrier transactions between processes.
  6 | - The algorithm to handle synchronization protocol within *interchiplet*.
  7 | - The algorithm to calculate the end cycle of one barrier transaction.
  8 | 
  9 | ## Command syntax
 10 | 
 11 | ```
 12 | # barrier
 13 | BARRIER <src_x> <src_y> <uid> <count>
 14 | WRITE <cycle> <src_x> <src_y> <dst_x> <dst_y> <nbytes=1> <desc=0x20000+count>
 15 | ```
 16 | 
 17 | `src_x` and `src_y` present the source address of the process that enters the barrier. `uid` specifies the unique ID of the barrier. `count` specifies the number of processes that enter the barrier when the barrier overflows. Non-zero `count` always overrides the number of the barrier.
 18 | 
 19 | The `cycle` field in the WRITE command presents the time when the process enters the barrier.
 20 | 
 21 | The figure below shows the relationship between arguments of APIs and commands.
 22 | 
 23 | ```mermaid
 24 | flowchart TB
 25 | 
 26 | subgraph barrier
 27 | A1[__src_x]
 28 | A2[__src_y]
 29 | A3[__uid]
 30 | A4[__count]
 31 | end
 32 | 
 33 | subgraph BARRIER command
 34 | B1[src_x]
 35 | B2[src_y]
 36 | B3[uid]
 37 | B4[count]
 38 | end
 39 | 
 40 | subgraph WRITE command
 41 | C0[cycle]
 42 | C1[src_x]
 43 | C2[src_y]
 44 | C3[dst_x]
 45 | C4[dst_y=0]
 46 | C5[nbytes=1]
 47 | C6[desc=0x20000+count]
 48 | end
 49 | 
 50 | A1 -.-> B1 -.-> C1
 51 | A2 -.-> B2 -.-> C2
 52 | A3 -.-> B3 -.-> C3
 53 | A4 -.-> B4 -.-> C6
 54 | 
 55 | ```
 56 | 
 57 | ## Command Sequence
 58 | 
 59 | One example of the command sequence is shown below:
 60 | 
 61 | ```mermaid
 62 | sequenceDiagram
 63 | autonumber
 64 | 
 65 | participant interchiplet
 66 | participant SP0 as Simulator<br/>Process 0
 67 | participant SP1 as Simulator<br/>Process 1
 68 | participant SP2 as Simulator<br/>Process 2
 69 | participant SP3 as Simulator<br/>Process 3
 70 | 
 71 | activate SP0
 72 | activate SP1
 73 | activate SP2
 74 | activate SP3
 75 | 
 76 | Note over SP0,SP3: Example starts
 77 | 
 78 | SP1->>interchiplet: BARRIER 0 1 255 4
 79 | deactivate SP1
 80 | activate interchiplet
 81 | Note over interchiplet: Register BARRIER command.
 82 | deactivate interchiplet
 83 | 
 84 | SP0->>interchiplet: BARRIER 0 0 255 4
 85 | deactivate SP0
 86 | activate interchiplet
 87 | Note over interchiplet: Register BARRIER command.
 88 | deactivate interchiplet
 89 | 
 90 | SP3->>interchiplet: BARRIER 1 1 255 4
 91 | deactivate SP3
 92 | activate interchiplet
 93 | Note over interchiplet: Register BARRIER command.
 94 | deactivate interchiplet
 95 | 
 96 | SP2->>interchiplet: BARRIER 1 0 255 4
 97 | deactivate SP2
 98 | activate interchiplet
 99 | Note over interchiplet: 1. Register BARRIER command.<br/>2. Barrier overflows.<br/>3. Send RESULT command to<br/>each Simulator Process.
100 | interchiplet->>SP1: RESULT 0
101 | interchiplet->>SP0: RESULT 0
102 | interchiplet->>SP3: RESULT 0
103 | interchiplet->>SP2: RESULT 0
104 | deactivate interchiplet
105 | 
106 | SP1->>interchiplet: WRITE 2305339 0 1 255 0 1 131076
107 | activate interchiplet
108 | Note over interchiplet: Register WRITE command<br>with the barrier flag.
109 | deactivate interchiplet
110 | 
111 | SP0->>interchiplet: WRITE 2410745 0 0 255 0 1 131076
112 | activate interchiplet
113 | Note over interchiplet: Register WRITE command<br>with the barrier flag.
114 | deactivate interchiplet
115 | 
116 | SP3->>interchiplet: WRITE 2330513 1 1 255 0 1 131076
117 | activate interchiplet
118 | Note over interchiplet: Register WRITE command<br>with the barrier flag.
119 | deactivate interchiplet
120 | 
121 | SP2->>interchiplet: WRITE 2331564 1 0 255 0 1 131076
122 | activate interchiplet
123 | Note over interchiplet: 1. Register BARRIER command<br/>with the barrier flag.<br/>2. Barrier overflows and<br>calculate barrier overflow time.<br/>3. Send SYNC command to<br/>each Simulator Process.
124 | interchiplet->>SP1: SYNC 2411664
125 | activate SP1
126 | interchiplet->>SP0: SYNC 2411659
127 | activate SP0
128 | interchiplet->>SP3: SYNC 2411669
129 | activate SP3
130 | interchiplet->>SP2: SYNC 2411664
131 | activate SP2
132 | deactivate interchiplet
133 | 
134 | Note over SP0,SP3: Example ends
135 | 
136 | deactivate SP0
137 | deactivate SP1
138 | deactivate SP2
139 | deactivate SP3
140 | ```
141 | 
142 | ## Handle BARRIER Command
143 | 
144 | *interchiplet* emulates the function of the barrier. The following diagram shows the flow to handle one BARRIER command.
145 | 
146 | ```mermaid
147 | flowchart TB
148 | 
149 | A(Start)
150 | B[Register BARRIER command]
151 | C{Check whether<br/>barrier overflows}
152 | E[Send RESULT commands<br/>to each pending<br/>BARRIER command]
153 | Z(End)
154 | 
155 | A-->B-->C--"Yes"-->E-->Z
156 | C--"No"-->Z
157 | ```
158 | 
159 | *interchiplet* response one RESULT command without any result for each process that enters the barrier when the barrier overflows.
160 | 
161 | > The order of BARRIER does not change by the timing information.
162 | 
163 | ## Handle WRITE Command with the Barrier Flag
164 | 
165 | In a realistic system, when a process enters a barrier, the process sends one request to a controller, like a mailbox. Then, the process blocks till it receives the acknowledgment from the controller. The location of the controller is configured in Popnet.
166 | 
167 | The `cycle` field in the WRITE command with the barrier flag presents the time when the source component sends the barrier requirement to the controller in the system, referenced as `src_cycle`. WRITE commands with the barrier flag do not need to pair with READ commands.
168 | 
169 | The SYNC command after one WRITE command with the barrier flag means the source has received acknowledgment. The task or flow in the source can continue after receiving the SYNC command. The execution cycle of the source should be adjusted to the value specified in the cycle field of SYNC commands.
170 | 
171 | Latency information provides four latency values (`lat_0`, `lat_1`, `lat_2`, and `lat_3`) for one barrier transaction:
172 | 
173 | | | From the source's view | From the destination's view |
174 | | ---- | :----: | :----: |
175 | | **Request package** | `lat_0` | `lat_1` |
176 | | **Acknowledgement package** | `lat_2` | `lat_3` |
177 | 
178 | The request package is injected at `src_cycle`. Hence, the request package arrives at the controller at `src_cycle + lat_1`. Then, when the barrier overflows, the controller sends one acknowledgment package to each source component.
179 | 
180 | The timing sequence is shown below:
181 | 
182 | ```mermaid
183 | sequenceDiagram 
184 | autonumber
185 | 
186 | participant SP0 as Simulator<br/>Process 0
187 | participant SP1 as Simulator<br/>Process 1
188 | participant SP2 as Simulator<br/>Process 2
189 | 
190 | note right of SP2: src_cycle[2]
191 | SP2->>SP0: 
192 | note left of SP0: src_cycle[2] + lat_1[2]
193 | 
194 | note right of SP1: src_cycle[1]
195 | SP1->>SP0: 
196 | note left of SP0: src_cycle[1] + lat_1[1]
197 | 
198 | SP0->>SP2: 
199 | note right of SP2: src_cycle[1] + lat_1[1] + lat_3[2]
200 | 
201 | SP0->>SP1: 
202 | note right of SP1: src_cycle[1] + lat_1[1] + lat_3[1]
203 | ```
204 | 
205 | In summary,
206 | 
207 | - The barrier overflow time is  `max(src_cycle[i] + lat_1[i])`.
208 | - The `cycle` of the SYNC command to the WRITE command with the barrier flag is `barrier overflow time + lat_3`.
209 | 


--------------------------------------------------------------------------------
/docs/docs/03-sync-proto/035-cycle.md:
--------------------------------------------------------------------------------
 1 | # Cycle
 2 | 
 3 | ```
 4 | CYCLE <cycle>
 5 | ```
 6 | 
 7 | CYCLE command reports the execution cycle of one simulator process to *interchiplet*. This command does not need any response at this time.
 8 | 
 9 | After receiving a CYCLE command, one sub-thread updates the execution cycle with the value provided by the CYCLE command if the new execution cycle is greater than the recorded execution cycle. At last, the maximum execution cycles recorded by all CYCLE commands are reported as the total execution cycle.
10 | 
11 | > TODO: Use the cycle command to build up period synchronization.
12 | 


--------------------------------------------------------------------------------
/docs/docs/03-sync-proto/index.md:
--------------------------------------------------------------------------------
 1 | 
 2 | # Synchronization Protocol
 3 | 
 4 | You can find the following topics on this page:
 5 | 
 6 | - The synchronization protocol between simulation processes.
 7 | 
 8 | ## List of Protocol Commands
 9 | 
10 | Synchronization protocol can be categorized into functional protocol and timing protocol.
11 | 
12 | ### Functional Protocol Commands
13 | 
14 | The functional protocol performs the functionality of benchmarks correctly, and commands do not care about the execution cycle. It is used in the functional model of simulators. Functional protocol commands and their syntax are listed below:
15 | 
16 | | Command      | Arguments                         | Direction | Location    | Pair with    | Usage |
17 | | :----------: | --------------------------------- | :-------: | :---------: | :----------: | ----- |
18 | | `BARRIER`    | `<src_x> <src_y> <uid> <count>`   | Up        | Source      | --           | The thread at the source enters the barrier. |
19 | | `LAUNCH`     | `<src_x> <src_y> <dst_x> <dst_y>` | Up        | Source      | `WAITLAUNCH` | The thread at the source requires to execute a task at the destination. |
20 | | `LOCK`       | `<src_x> <src_y> <uid>`           | Up        | Source      | `UNLOCK`     | The thread at the source requires to locking the mutex. |
21 | | `RECEIVE`    | `<src_x> <src_y> <dst_x> <dst_y>` | Up        | Destination | `SEND`       | Receive data from the source to the destination. |
22 | | `RESULT`     | `<res_num> [<res_0> <res_1> ...]` | Down      | --          | --           | Response to functional protocol commands with vary-number of results. |
23 | | `SEND`       | `<src_x> <src_y> <dst_x> <dst_y>` | Up        | Source      | `RECEIVE`    | Send data from the source to the destination.  |
24 | | `UNLOCK`     | `<src_x> <src_y> <uid>`           | Up        | Source      | `LOCK`       | The thread at the source unlocks the mutex. |
25 | | `WAITLAUNCH` | `<src_x> <src_y> <dst_x> <dst_y>` | Up        | Destination | `LAUNCH`     | The thread at the destiantion waits to launch task. |
26 | 
27 | > "Up" means from simulation process to interchiplet. "Down" means from interchiplet to simulation process.
28 | 
29 | Every functional protocol command needs one RESULT command as the response. RESULT commands provide a list of result fields whose number is specified by `res_num`.
30 | 
31 | As the response to BARRIER, LOCK, UNLOCK, and LAUNCH commands, RESULT commands provide no result. As the response to SEND and RECEIVE commands, RESULT commands should provide the path of the Named Pipe. As the response to WAITLAUNCH commands, RESULT commands should provide the source of LAUNCH commands.
32 | 
33 | > TODO: More flexible format for address
34 | 
35 | ### Timing Protocol Commands
36 | 
37 | The timing protocol synchronizes the execution cycles of simulators. It is used in the timing model of simulators. Timing protocol commands and their syntax are listed below:
38 | 
39 | | Command | Arguments                                                 | Direction | Location    | Pair with | Usage |
40 | | :-----: | --------------------------------------------------------- | :-------: | :---------: | :-------: | ----- |
41 | | `CYCLE` | `<cycle>`                                                 | Up        | Any         | --        | Report execution time of simulation processes. |
42 | | `READ`  | `<cycle> <src_x> <src_y> <dst_x> <dst_y> <nbytes> <desc>` | Up        | Source      | `WRITE`   | Read transaction request from the source to the destination. |
43 | | `SYNC`  | `<cycle>`                                                 | Down      | --          | --        | Response to timing protocol commands. |
44 | | `WRITE` | `<cycle> <src_x> <src_y> <dst_x> <dst_y> <nbytes> <desc>` | Up        | Destination | `READ`    | Write transaction request from the source to the destination. |
45 | 
46 | As described in the [Overview](../index.md), the sequence to perform the communication between processes has been abstracted as the transaction. Hence, the essential read/write operations to lock flags are abstracted into READ/WRITE commands. READ/WRITE commands can be used for both communication and synchronization, such as barrier, lock, and launch. Therefore, **one READ or WRITE command presents a sequence of communication rather than one single package.** `desc` field defines the transaction's behavior.
47 | 
48 | Bit [19:16] presents the category of communication.
49 | 
50 | | Bit [19:16] | Behavior flag | Transaction's behavior |
51 | | :---------: | :-----------: | ---------------------- |
52 | | `0x0`       |               | Controlled by the bit [15:0] of `desc`. |
53 | | `0x1`       | launch        | One request package and one acknowledgment package. |
54 | | `0x2`       | barrier       | One request package and one acknowledgment package.<br/> Bit [15:0] presents the number of processes when the barrier overflows. |
55 | | `0x4`       | lock          | One request package and one acknowledgment package. |
56 | | `0x8`       | unlock        | One request package and one acknowledgment package. |
57 | 
58 | READ/WRITE commands need SYNC commands as the response. SYNC commands provide a cycle to specify the end cycle of transactions.
59 | 
60 | > TODO: More flexible format for address
61 | 
62 | ## APIs vs. Synchronization Protocol
63 | 
64 | Each API in benchmarks needs one command from the functional protocol and one from the timing protocol. Some trace-based simulators, like SniperSim, provide separated timing and function models. Hence, the functional protocol should not merge with the timing protocol.
65 | 
66 | | Benchmark API    | Command from functional protocol | Command from timing protocol  |
67 | | :--------------: | :------------------------------: | :---------------------------: |
68 | | `sendMessage`    | `SEND`                           | `WRITE`                       |
69 | | `receiveMessage` | `RECEIVE`                        | `READ`                        |
70 | | `barrier`        | `BARRIER`                        | `WRITE` with the barrier flag |
71 | | `lock`           | `LOCK`                           | `WRITE` with the lock flag    |
72 | | `unlock`         | `UNLOCK`                         | `WRITE` with the unlock flag  |
73 | | `launch`         | `LAUNCH`                         | `WRITE` with the launch flag  |
74 | | `waitLaunch`     | `WAITLAUNCH`                     | `READ` with the launch flag   |
75 | 
76 | The basic flow for APIs is shown below:
77 | 
78 | ```mermaid
79 | flowchart TB
80 | 
81 | A[Start]
82 | B[Issue one functional protocol command]
83 | C[Wait RESULT command]
84 | D[Issue one timing protocol command]
85 | E[Wait SYNC command]
86 | F[End]
87 | 
88 | A-->B-->C-->D-->E-->F
89 | ```
90 | 
91 | It is not necessary to implement the above flow in one single function. For those simulators that provide one separate function model and timing model, functional protocol commands are handled in the function model, and the timing protocol commands are handled in the timing model.
92 | 
93 | 
94 | 
95 | 


--------------------------------------------------------------------------------
/docs/docs/04-import-sim/041-snipersim.md:
--------------------------------------------------------------------------------
  1 | 
  2 | # Importing SniperSim
  3 | 
  4 | [SniperSim](http://snipersim.org//w/The_Sniper_Multi-Core_Simulator) is a trace-based CPU simulator that can achieve high speed and reasonable accuracy.
  5 | 
  6 | ## APIs
  7 | 
  8 | APIs are implemented by System Calls. The following system call numbers are assigned to these APIs.
  9 | 
 10 | ```c++
 11 | SYSCALL_LAUNCH = 501,        // Launch request.
 12 | SYSCALL_WAITLAUNCH = 502,    // Waiit launch request.
 13 | SYSCALL_BARRIER = 503,       // Enter barrier.
 14 | SYSCALL_LOCK = 504,          // Lock mutex.
 15 | SYSCALL_UNLOCK = 505,        // Unlock mutex.
 16 | SYSCALL_REMOTE_READ = 506,   // Read cross chiplet
 17 | SYSCALL_REMOTE_WRITE = 507,  // Write cross chiplet
 18 | ```
 19 | 
 20 | Each benchmark API corresponds to one system call. All arguments of the benchmark APIs are also the arguments for system calls.
 21 | 
 22 | ## Handle Syscalls
 23 | 
 24 | SniperSim provides separate functional and timing models. Hence, syscalls are handled separately in functional and timing models.
 25 | 
 26 | In the functional model, system calls are handled in file *$SIMULATOR_ROOT/snipersim/sift/recorder/syscall_modeling.cc*. In the timing model, system calls are handled in file *\$SIMULATOR_ROOT/snipersim/common/core/syscall_model.cc*.
 27 | 
 28 | ### Handle SYSCALL_REMOTE_WRITE/SYSCALL_REMOTE_READ
 29 | 
 30 | The flow chart of the function model is as follows:
 31 | 
 32 | ```mermaid
 33 | flowchart TD
 34 | 
 35 | subgraph Write Syscall
 36 | A1[Issue SEND command]
 37 | B1[Wait for RESULT command]
 38 | C1[Open PIPE]
 39 | D1[Write data to PIPE]
 40 | end
 41 | 
 42 | A1-->B1-->C1-->D1
 43 | B1-->B1
 44 | 
 45 | subgraph Read Syscall
 46 | A2[Issue RECEIVE command]
 47 | B2[Wait for RESULT command]
 48 | C2[Open PIPE]
 49 | D2[Read data from PIPE]
 50 | end
 51 | 
 52 | A2-->B2-->C2-->D2
 53 | B2-->B2
 54 | ```
 55 | 
 56 | The flow chart of the timing model is as follows:
 57 | 
 58 | ```mermaid
 59 | flowchart TD
 60 | 
 61 | subgraph Write Syscall
 62 | A1[Get current execution cycle]
 63 | B1[Issue WRITE command]
 64 | C1[Wait for SYNC command]
 65 | D1[Sleep core until cycle specified by SYNC command]
 66 | end
 67 | 
 68 | A1-->B1-->C1-->D1
 69 | C1-->C1
 70 | 
 71 | subgraph Read Syscall
 72 | A2[Get current execution cycle]
 73 | B2[Issue READ command]
 74 | C2[Wait for SYNC command]
 75 | D2[Sleep core until cycle specified by SYNC command]
 76 | end
 77 | 
 78 | A2-->B2-->C2-->D2
 79 | C2-->C2
 80 | ```
 81 | 
 82 | SniperSim is not a cycle-driven simulator. Hence, the execution cycle cannot be changed by modifying the value of some variables. Instead, one Sleep instruction is injected into the timing model, and the duration of the Sleep instruction equals the gap from the cycle issue one READ/WRITE command to the cycle receiving the corresponding SYNC command.
 83 | 
 84 | ```c++
 85 | // Update simulator time.
 86 | ComponentPeriod time_wake_period = *(Sim()->getDvfsManager()->getGlobalDomain()) * end_time;
 87 | SubsecondTime time_wake = time_wake_period.getPeriod();
 88 | SubsecondTime sleep_end_time;
 89 | Sim()->getSyscallServer()->handleSleepCall(m_thread->getId(), time_wake, start_time, sleep_end_time);
 90 | 
 91 | // Sleep core until specified time.
 92 | if (m_thread->reschedule(sleep_end_time, core))
 93 |     core = m_thread->getCore();
 94 | 
 95 | core->getPerformanceModel()->queuePseudoInstruction(new SyncInstruction(sleep_end_time, SyncInstruction::SLEEP));
 96 | ```
 97 | 
 98 | ### Handle Other System Calls
 99 | 
100 | Different from SYSCALL_REMOTE_READ and SYSCALL_REMOTE_WRITE, except functional and timing commands, it is not necessary to handle other functionality.
101 | 
102 | The flow chart is as follows:
103 | 
104 | ```mermaid
105 | flowchart TD
106 | 
107 | subgraph Functional model
108 | A1[Issue functional command]
109 | B1[Wait for READ command]
110 | end
111 | 
112 | A1-->B1
113 | B1-->B1
114 | 
115 | subgraph Timing model
116 | A2[Issue timing command]
117 | B2[Wait for SYNC command]
118 | C2[Sleep core until cycle specified by SYNC command]
119 | end
120 | 
121 | A2-->B2-->C2
122 | B2-->B2
123 | ```
124 | 
125 | The mapping between system calls and commands is shown below:
126 | 
127 | | System call          | Functional command | Timing command |
128 | | -------------------- | :----------------: | :------------: |
129 | | SYSCALL_LAUNCH       | `LAUNCH`           | `WRITE`        |
130 | | SYSCALL_WAITLAUNCH   | `WAITLAUNCH`       | `READ`         |
131 | | SYSCALL_BARRIER      | `BARRIER`          | `WRITE`        |
132 | | SYSCALL_LOCK         | `LOCK`             | `WRITE`        |
133 | | SYSCALL_UNLOCK       | `UNLOCK`           | `WRITE`        |
134 | | SYSCALL_REMOTE_READ  | `READ`             | `READ`         |
135 | | SYSCALL_REMOTE_WRITE | `WRITE`            | `WRITE`        |
136 | 
137 | ## Issue CYCLE command
138 | 
139 | Because the CPU always controls the flow of benchmarks, the CPU's execution cycle plays a vital role in the execution cycle of the entire simulation. CYCLE command is issued in file *$SIMULATOR_ROOT/snipersim/common/core/core.cc*.
140 | 


--------------------------------------------------------------------------------
/docs/docs/04-import-sim/042-gpgpusim.md:
--------------------------------------------------------------------------------
  1 | 
  2 | # Importing GPGPUSim
  3 | 
  4 | [GPGPUSim](http://www.gpgpu-sim.org) is a cycle-accurate model that simulates the micro-architecture of Nvidia GPGPU.
  5 | 
  6 | ## APIs
  7 | 
  8 | In real GPGPU systems, benchmark APIs will be scheduled to particular APIs. For example, DMA units handle communication. The mailbox handles the barrier and lock. Therefore, benchmark APIs should be implemented by CUDA runtime APIs rather than kernels.
  9 | 
 10 | CUDA runtime APIs are declared and implemented in the CUDA kit. GPGPUSim provides a different runtime library **libcudart.so** rather than the standard runtime library from the CUDA kit. When compiling CUDA executable files, nvcc will link the standard runtime library.
 11 | 
 12 | Benchmark APIs are added to the runtime library provided by GPGPUSim. APIs are implemented in file *$SIMULATOR_ROOT/gpgpu-sim/libcuda/cuda_runtime_api.cc*. Hence, when compiling CUDA executable files, the path to search libraries must be redirected to GPGPUSim:
 13 | 
 14 | ```
 15 | nvcc -L$(SIMULATOR_ROOT)/gpgpu-sim/lib/$(GPGPUSIM_CONFIG) --cudart shared $(CUDA_OBJS) -o $(CUDA_TARGET)
 16 | ```
 17 | 
 18 | ### sendMessage and receiveMessage
 19 | 
 20 | The flow chart of `sendMessage` and `receiveMessage` is as follows:
 21 | 
 22 | ```mermaid
 23 | flowchart TD
 24 | 
 25 | subgraph Write Syscall
 26 | O1(Start)
 27 | A1[Copy data from device to host]
 28 | B1[Issue SEND command]
 29 | C1[Wait for RESULT command]
 30 | D1[Open Pipe]
 31 | E1[Write data to Pipe]
 32 | F1[Get current simulation cycle]
 33 | G1[Send WRITE command]
 34 | H1[Wait for SYNC command]
 35 | I1[Adjust simulation cycle]
 36 | Z1(End)
 37 | end
 38 | 
 39 | O1-->A1-->B1-->C1-->D1-->E1-->F1-->G1-->H1---->I1-->Z1
 40 | C1-->C1
 41 | H1-->H1
 42 | 
 43 | subgraph Read Syscall
 44 | O2(Start)
 45 | A2[Issue RECEIVE command]
 46 | B2[Wait for RESULT command]
 47 | C2[Open Pipe]
 48 | D2[Read data from Pipe]
 49 | E2[Get current simulation cycle]
 50 | F2[Send READ command]
 51 | G2[Wait for SYNC command]
 52 | H2[Write data from host to device]
 53 | I2[Adjust simulation cycle]
 54 | Z2(End)
 55 | end
 56 | 
 57 | O2---->A2-->B2-->C2-->D2-->E2-->F2-->G2-->H2-->I2-->Z2
 58 | B2-->B2
 59 | G2-->G2
 60 | ```
 61 | 
 62 | CUDA provides memory copy APIs to transfer data between the host and the device.
 63 | 
 64 | ```C++
 65 | // From the device to the host, used by sendMessage
 66 | cudaMemcpy(interdata, __addr, __nbyte, cudaMemcpyDeviceToHost);
 67 | 
 68 | // From the host to the device, used by receiveMessage
 69 | cudaMemcpy(__addr, interdata, __nbyte, cudaMemcpyHostToDevice);
 70 | ```
 71 | 
 72 | GPGPUSim is a cycle-driven simulator whose cycle loop can be found in file *\$SIMULATOR_ROOT/gpgpu-sim/src/gpgpu-sim/gpu-sim.h* and *\$SIMULATOR_ROOT/gpgpu-sim/src/gpgpu-sim/gpu-sim.cc*.
 73 | 
 74 | GPGPUSim applies two variables to record the execution cycles: `gpgpu_sim::gpu_sim_cycle` and `gpgpu_sim::gpu_tot_sim_cycle`. The sum of these two variables presents the real consumed cycle, which should be replaced by the cycle value in the SYNC command.
 75 | 
 76 | When GPGPUSim handles CUDA APIs, the cycle loop has not started. Hence, `gpgpu_sim::gpu_sim_cycle` can be directly modified:
 77 | 
 78 | ```c++
 79 | // Get the current cycle.
 80 | gpgpu_sim *gpu = GPGPU_Context()->the_gpgpusim->the_context->get_device()->get_gpgpu();
 81 | long long unsigned int timeNow = gpu->gpu_sim_cycle + gpu->gpu_tot_sim_cycle;
 82 | 
 83 | long long int timeEnd = ...
 84 | 
 85 | // Update the current cycle.
 86 | gpu->gpu_tot_sim_cycle = timeEnd - gpu->gpu_sim_cycle;
 87 | ```
 88 | 
 89 | ### Other APIs
 90 | 
 91 | Different from `sendMessage` and `receiveMessage`, except for functional and timing commands, it is not necessary to handle other functionality.
 92 | 
 93 | The flow chart is as follows:
 94 | 
 95 | ```mermaid
 96 | flowchart TD
 97 | 
 98 | A1[Issue functional command]
 99 | B1[Wait for RESULT command]
100 | 
101 | A2[Issue timing command]
102 | B2[Wait for SYNC command]
103 | C2[Change the simulator cycle]
104 | 
105 | A1-->B1-->A2-->B2-->C2
106 | B1-->B1
107 | B2-->B2
108 | ```
109 | 
110 | The mapping between APIs and commands is shown below:
111 | 
112 | | System call      | Functional command | Timing command |
113 | | ---------------- | :----------------: | :------------: |
114 | | `launch`         | `LAUNCH`           | `WRITE`        |
115 | | `waitlaunch`     | `WAITLAUNCH`       | `READ`         |
116 | | `barrier`        | `BARRIER`          | `WRITE`        |
117 | | `lock`           | `LOCK`             | `WRITE`        |
118 | | `unlock`         | `UNLOCK`           | `WRITE`        |
119 | | `receiveMessage` | `READ`             | `READ`         |
120 | | `sendMessage`    | `WRITE`            | `WRITE`        |
121 | 
122 | ## Issue CYCLE command
123 | 
124 | The task on the GPU is triggered by CPUs in the system. CPUs prepare the data required by tasks and accept the generated result. The execution cycle of CPUs reflects the execution cycle of GPUs through the synchronization performed by data transmission. Therefore, GPGPUSim does not issue CYCLE commands.
125 | 


--------------------------------------------------------------------------------
/docs/docs/images/Chiplet Simulator.drawio:
--------------------------------------------------------------------------------
1 | <mxfile host="Electron" modified="2024-05-07T02:07:17.025Z" agent="5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) draw.io/20.7.4 Chrome/106.0.5249.199 Electron/21.3.3 Safari/537.36" version="20.7.4" etag="4GPFQnyt4CJbGkOIIiT9" type="device" pages="3"><diagram id="5hvEZPYf0C5aOQCWrC9J" name="Page-1">7Vtbc5s4FP41zLQP6YAkiP2Y2G3azmY3U0+m6aMWFKNdjFxZvu2vrzACjIRTbxJArvtio4ME4jsXnfMJHDiabW44nse3LCKJA9xo48CxA8DQ8+VvJtjmgmDo5oIpp1Eu8irBhP5HlLDotqQRWdQ6CsYSQed1YcjSlISiJsOcs3W92yNL6ned4ykxBJMQJ6b0K41EnEsHvlvJPxI6jYs7e646M8NFZyVYxDhi6z0RfO/AEWdM5EezzYgkGXYFLvm4DwfOlhPjJBXHDKCf5+EQP36Eq/vFzThKvpDp/YUXqMmJbfHEnC3TiGSDXAdeMy5iNmUpTv5gbC6FnhT+Q4TYKl3hpWBSFItZos6SDRUPe8ffsku981VrvFFX3jW2RSMVfPuw38hHAb9oV+N2rWLgQmAurjJFS0GY4MWChoX4A02KOeVPSSJD2RV6SrRgSx6SpyBTVoj5lIin+oFSydI5CJsROW85kJMEC7qqTwQrM52W/SpNygOlzP+j2Py6K5ws1Z1Gd/dSMKbEAUEiJ379N68pPvi+zEzx+pGl4mKxU+6V7OCB+WYHYHFeHk2z/zdrKi1cGjqZMb59+xM7WsdUkMkc76Bdy1BRt5nspsqgJG6FvlaEC7J5WmMmwMWAwvFU5PEC1V5Xflz2ifd8GLkt6QQ066TSRg7siHHyMjRfAT2ooYeAiR7qEjx4SuAFloGHTgg85FoGnn9K4CHLwAtOCDxwRMwLugTv8oTA846IeZ2CNzDAm8TyaaIyZclummbNEIexrDGsQ7D3hGV4ahAaWUsThJ1aYVHH7mN42ztOem586Zo4dWppnlmv3HG6woJYa2s6hkHv7updGqBkFe9ENVOWyr/rlup78LwC38L6XpVpPy/wUbN9dFTgD3pU9uX5KdvvVdnDTpXtPkvZ3q+jbNirsk2e6NNfzqtTd2wpHHvZOzDQ2Dt05OpapnevrxeTgro5c04VHKuV9nIek9o6N6XonE//RLdnUmbnrpT+PQXBbnOIcsfv296Z5hSi+1Rgbwf2eSu8GnrHaCqcgwyiry9GeeqhRmkKLafxgh0mk234k4g14/9K4adUEP6Y+UDvpbPGdPkN9EOjc4C2nAOY/IMC7oKlF6OYzvsH7ZBt1aj9TiOK/zuiVBYEWoko+m5Y5xHFLEZsjCg68dsYUZqI3/YiirnzNf5yddu0eyPLYpYkhPeOol50BQMTxUEDiK2VXKBhE8cC9lzL7Xpnz0HDRo0FMAWWwQTt3IsZ2AaTmQtZABNwbYPJXBltgMm2nT2EDEy6SxLLvNCSJBG+lFhuThL1EBK43SaJ0GRGbUwSjR3bYzmZ1pJEaLKXFsQQveboPYZAk0+0/e0AHcP+3w6AZkFig63Ztl5B86U7G2CyreSAVlZmyLqSw8rKDNlWchQXtgsm37Z1EJlfa51vLo0OKO+lhKsWQrrOpdHBnQircmkjv+k9l0bdvv1ouXOAVpxDp9k7dw6Tc7HROQwevcE5mnj0Zywcsll9QpzDXH2HDd//AA==</diagram><diagram id="50E7gOnlWqVizQLOeL3i" name="Page-2">7VnbctowEP0aHtvxBRvyGCCknWkzmTLTlqeMgoWtVLYYWdz69V1hyXcoCQanU14ArXYt6eye1a7p2MNwc8/RIvjKPEw7luFtOvaoY1k3pgOfUrBNBO6NkQh8TrxEZGaCCfmNlVCrLYmH44KiYIwKsigKZyyK8EwUZIhzti6qzRktrrpAPq4IJjNEq9IfxBNBIu07Rib/hIkf6JVNQ82ESCsrQRwgj61zIvuuYw85YyL5FW6GmErsNC6J3XjPbLoxjiNxjAGdPgb4/uXp6cGeoM338Qh/iz/01d7EVh8Ye3B+NYxYBF8DzpaRh+VjDBgxLgLmswjRL4wtQGiC8AULsVXeQ0vBQBSIkKpZvCHipzT/6KjRNDcz2qgn7wZbPYgE3+aM5HCan8vMdiNtN2eRUBsxAeNBLBAXtzISsgPtZGNCqbJJQJAn34utEsVsyWf4AKA6RhH3sTigZ6cRAMzBLMRwBrDjmCJBVsV9IBXDfqqXuRl+KE+/wuvquStEl2qlHSKVWMgcL321DojAkwXanX8NbC86uQw84jM1dDKMV5gLvDmMchUVZWB1FZVULjE1tdY5ZipRkCOlNmscR6uC413kvXcUu0YRRcdtGcWbaw5qNgfZR+Ygp80cZFe48xigGIPIlGck4VLugkUH6GS8nk5NZKESf1Ji5PhjupckkK6VrgxqikHOkQzqtXqL9y7qdjPn9CwE6t3+Fve9NVQadHvvSLdbJ7pdmT4yAjvMLmanmFjsfilhJPtSVlnwAK5om1NbSIX4wDqlMqrbN0qxmDwxi8z0jCcE64U7jTMHa9O5pvFgSku4cpXXcDClNbgOWrfUhf5F33EvEHxOpdb4PJeFRiw3BvUDTyoNe1wtNgIWPi/jVgqNtIjQ0Na0O2lRkS80ynmjuRvHuhYa7dw45p5QuUyl0avnjwhkuY4BLK63Am6XL+gkGhyvCAPqyKh1KRxu8Axqri9/ZZyDlaUKxXG8eyKKdl8cQ9TQml76HfGxW1P4X5iP9rvlY9uv4U7ly3G3atoLNnyrltdx+4dv1dPU9SnOWwBWXzrqht/6txp+12i94e9WsJxWYIMDiyI2seDsFx4yCvk6JeEc+FcSIUp8qIZGMwAIg3wg4SMzRG/VREg8Ty5T64yiu86Rh0v9i12Xh2vcYZ3NHdX68uH/cYdVvhZr6HFZd7hXd+QarLquoRl3wDD7+zK5J7L/gO27Pw==</diagram><diagram id="lMHYC53xT88wkW592aym" name="Page-3">7V1bd6M2EP41eUwO4u7HTTa73dOkzWl6TncfCRCbFiMX403cX19hC2NGYGRuEg55iRHiNvq+uUgzcKXdLd+/xs5q8Yg9P7xSFe/9Svt8papImdnkX9qy3bfYKm2Yx4FHO+UNz8F/fnYkbd0Enr8udEwwDpNgVWx0cRT5blJoc+IYvxW7veKweNWVM/eZhmfXCdnWvwIvWdCnMJS8/Rc/mC+yKyOF7lk6WWfasF44Hn47atLur7S7GONk/2v5fueHqfAyueyP+1Kx93BjsR8lPAd8/T0y439n+ta7/vPeDu9/ffi0vNbpzf10wg19Ynq3yTYTQYw3keenZ1GutNu3RZD4zyvHTfe+kUEnbYtkGZItRH6+4iiho4jUdDsIwzsc4pg0RDgi7bf0in6c+O+Vz4IOEiLQ8vHST+It6UIPMOltb4tgectHyMq6LI5G59DRoaiYH86cC478oLI7Q46IEeNzsNyETkIeXDVDcv3bl/TXPP31FGPXX6+7FXQHUrWLUj1I+UisdolUzd6EajMi8j3CTrpJ4VSUGo6TBZ7jyAkfMF5RWf3tJ8mWCsvZJLgoSf89SL6nh98YdOvH0Z7P7/TMu41tthGRB/x+vPEjP0O6mR+228qOg6O2Tpw4+ZTqqfyBdm1fglRWu2PWSYz/OWiffKxTWZweaaIrnXjuJ6dkbJVDIvYJeIOfxQuUjTA99AkH5NIHKEEszRQAkjXexK5Pj8pxQkThbI+6rdIO6zOuYyoAdvsz5iA8PGNzXKqXQHZVl4zt2iVIVVckk6p+CVI1TMmkalyCVC1VMqnqM0asd7HvJP5JNyC3+Pd56627iX/uZIvEuwi5V/Cj4BR05yLwOwR7u3tiEExex6Gl39AKKSbLv83LdbIgaPGk4xlwURAnzfTelBcb8000G5pmFifNVJE0s8ZEM+jQiucZOykw8WxontmcPNNE8sweE89giCOeZ2w8PvFsaJ7NOHmmi+QZG19IzDMY9IrnGTtDM/FsaJ5lKxi1RDNEEg2xIYbETIMTIcKZhljH+zdnSSSkKk/BiuXbeuGs0p8edjfL3UPXSfFlL/KHlwEdhZkuWKzqsOtJ6EgX5ZrpPG2EetBGbdaT6vUTb2Br2C0VVPmCkGYB0CHlRjn6AyesWIZiTmuC4JJZz9o/b+v1LBNyZoj1LJ0NVA+z2Nkk9rco8WN3EaxCMqp0J7nYYT/DLKI3kiIv9rgDyQAl+QFOGMwjsun66TVJQ6qFAtcJP9Edy8Dzdl5EmZIr0rcHtVafe1Cm1LTeJs2rvTLFiVKTscRRsFuYGFD3neOJCfao9LZTbOVM1uF0lMqnMTojtX5ywMvd8vbjP6BXXYgh+AFRa5n6yYioxUOFKeoMD8aEh2Z46MdREY4Hc8JDMzzMLhMP1oSHhil0ymUCgp1Wf3SCSNZJHDjbIHqyQWcXdViCRV6GWTd01uvALYrpdCzTlDtnzs52yBSzfASPhsgoGaKsTRCf2JxTGAFCDPXs2RsXkX0GsmJsTr72l9JXklM0PqnCJAjxYi3JIRmfWKFxES/WkpSB8YkVLnGKF2vJCvH4xArXs4SLNTvxkVifvj3dM4L7sOvGufd3HLTkk+Y9LdaYvI6bIXQ12WKV3R++F8RpvaeImLXB+l6TeLUp0uSAjFUxTUJ1lHKjUje2pVuPgCNpGXzeeH6irCN+fV37vXjsM/kzQQfDcgHJKQhOg1kEdKtqIinirsltI00toA51E6NW+UOdrzIXKaOAyvHT3WfG6e7IAP2LhenFNewhaWiyNJQ4KwnoNdPmc+J6m9Ay1cmJk9KJ03iduIrpt4Fyb1n4TDZweBvIC5YaG6jcKKZp9GEC1argtWMbCK9TZwRh/zorqM5Af00WK1hSaC+vFYQTmuLNIFtRP5lBGcygwWsG2+b3tDOD8hdRfAQzyAuWWjNoGln5WdvE5aoFsY7tHrxOnd2D/YHdE2jHSpZB5bVjcAVJvB1j1zsnOyaDHRusgKIdfEb1bgC40CiefeySxsQ+GdjHW8dstM0CbQefUVUyw/Vo4ezLasSmgiee4dPA8OkG3/D1VvJkqQ30pJypytVqrqDXPWe96CUhU+PUi3Y/RRAIagZI+Z5znC1tQlJHSDI+OJIup9xONJKsD46kyynUE42k2QdHEhujs9AaQwlKASCcCOsSR9wvm6vA0TAFLk3hVlvgYsLE4J4LXCw2un3Acwa6cr71BsHkqpKXCelDBrs2G+yOR5ow9hQvTTaHbDzS1OHaj3BpqiOWJnwnkHhpsmvL45GmrcgmTTbRo7Lq4OJrCuodNjlyHO2KWvyuqw4Ok5sZWi0+F4k9EQIngp+oqnXa+l9pt9mV9okJopjAm7lin1a3nTFBm3XEBPjmPimZwCZNTEwQxQTeRIm9k9k/EwyjIyYY0LjIyAR2bmligigm8CYt2BVfOeiaCRZ0apoywYTGRUYmsKk8j5s0Dsrev0nirXj37mbPSZwdM+KNm2zikpTfMAxW6x1eaIzmhnjj1QdoEOwlSQYlk+ztZmbAXIJREq8dZiSPAza7r4ANKdLmVrdSRqigiurzpJskcovQRvVltoqqmAWQXXdUZGTA1yzDXBVuBQVPBKvPO/rOqQVnR2qytmH/umolE9bsIkmyvJHCBp8TqYcnNa+zXVswoRt6sW6wK07rsOCOc9WKVQ4wEIUWqyNOG3D+uIbTsL80lRhIOf1hBVkTlwsuURM2d0gwpHCHs0Lz/hE6nZ4ix0c0ispVto+ydwgGpAjNQz/c6FhfEis6E52OX0GAf8apRFSFBHFj+dQPzPAXL1d0Oh9zUlNdqCnuj5FRPSHOaLE5MJ/JldM7+Ba94nGRzZaOa+y8C0u+PM+QQrVBkuE5c7L8OK9/lV3FovIwaX2V5Tdnp/VB4HC+Ke/suMYsv07Vfdkw3VA/r79V079dd3A3V2d+gYtsxhgnx92J0lg8Ys9Pe/wP</diagram></mxfile>


--------------------------------------------------------------------------------
/docs/javascripts/mathjax.js:
--------------------------------------------------------------------------------
 1 | window.MathJax = {
 2 |     tex: {
 3 |       inlineMath: [["\\(", "\\)"]],
 4 |       displayMath: [["\\[", "\\]"]],
 5 |       processEscapes: true,
 6 |       processEnvironments: true
 7 |     },
 8 |     options: {
 9 |       ignoreHtmlClass: ".*|",
10 |       processHtmlClass: "arithmatex"
11 |     }
12 |   };
13 |   
14 |   document$.subscribe(() => { 
15 |     MathJax.startup.output.clearCache()
16 |     MathJax.typesetClear()
17 |     MathJax.texReset()
18 |     MathJax.typesetPromise()
19 |   })
20 | 


--------------------------------------------------------------------------------
/docs/mkdocs.yml:
--------------------------------------------------------------------------------
  1 | site_name: Documents for LegoSim
  2 | 
  3 | nav:
  4 |   - index.md
  5 |   - 01-quick-start.md
  6 |   - Benchmarks:
  7 |     - APIs:
  8 |       - 02-benchmarks/021-apis/index.md
  9 |       - APIs for CPU: apiProject1/group__apis__for__cpu.md
 10 |       - APIs for GPU: apiProject1/group__apis__for__cuda.md
 11 |     - 02-benchmarks/022-yaml.md
 12 |   - Synchronization Protocol:
 13 |     - 03-sync-proto/index.md
 14 |     - 03-sync-proto/030-net-proto.md
 15 |     - 03-sync-proto/031-communication.md
 16 |     - 03-sync-proto/032-barrier.md
 17 |     - 03-sync-proto/033-lock.md
 18 |     - 03-sync-proto/034-launch.md
 19 |     - 03-sync-proto/035-cycle.md
 20 |   - Importing Simulators:
 21 |     - 04-import-sim/index.md
 22 |     - 04-import-sim/041-snipersim.md
 23 |     - 04-import-sim/043-gem5.md
 24 |     - 04-import-sim/042-gpgpusim.md
 25 |     - Synchronization protocol interface: apiProject1/group__sync__proto.md
 26 |     - Pipe communication interface: apiProject1/group__pipe__comm.md
 27 |   - interchiplet Documents:
 28 |     - Command Line Parser: apiProject1/group__cmdline.md
 29 |     - Yaml Configuration Interface: apiProject1/group__benchmark__yaml.md
 30 |     - Network benchmark interface: apiProject1/group__net__bench.md
 31 |     - Network latency information interface: apiProject1/group__net__delay.md
 32 |     - Command handler structures: apiProject1/group__cmd__handler__struct.md
 33 |     - Command handler functions: apiProject1/group__cmd__handler__func.md
 34 |   - Source Codes:
 35 |     - pages list: apiProject1/pages.md
 36 |     - modules:
 37 |       - modules list: apiProject1/modules.md
 38 |     - namespace:
 39 |       - namespaces list: apiProject1/namespaces.md
 40 |       - namespace_members: apiProject1/namespace_members.md
 41 |       - namespace_member_functions: apiProject1/namespace_member_functions.md
 42 |       - namespace_member_variables: apiProject1/namespace_member_variables.md
 43 |       - namespace_member_typedefs: apiProject1/namespace_member_typedefs.md
 44 |       - namespace_member_enums: apiProject1/namespace_member_enums.md
 45 |       - InterChiplet: apiProject1/namespaceInterChiplet.md
 46 |     - classes:
 47 |       - classes list: apiProject1/annotated.md
 48 |       - classes index: apiProject1/classes.md
 49 |       - classes hierarchy: apiProject1/hierarchy.md
 50 |       - class_members: apiProject1/class_members.md
 51 |       - class_member_functions: apiProject1/class_member_functions.md
 52 |       - class_member_variables: apiProject1/class_member_variables.md
 53 |       - class_member_typedefs: apiProject1/class_member_typedefs.md
 54 |       - class_member_enums: apiProject1/class_member_enums.md
 55 |       - links: apiProject1/links.md
 56 |       - BenchmarkConfig: apiProject1/classBenchmarkConfig.md
 57 |       - CmdLineOptions: apiProject1/classCmdLineOptions.md
 58 |       - NetworkBenchItem: apiProject1/classNetworkBenchItem.md
 59 |       - NetworkBenchList: apiProject1/classNetworkBenchList.md
 60 |       - NetworkDelayItem: apiProject1/classNetworkDelayItem.md
 61 |       - NetworkDelayMap: apiProject1/classNetworkDelayMap.md
 62 |       - NetworkDelayStruct: apiProject1/classNetworkDelayStruct.md
 63 |       - PipeComm: apiProject1/classInterChiplet_1_1PipeComm.md
 64 |       - PipeCommUnit: apiProject1/classInterChiplet_1_1PipeCommUnit.md
 65 |       - ProcessConfig: apiProject1/classProcessConfig.md
 66 |       - ProcessStruct: apiProject1/classProcessStruct.md
 67 |       - SyncBarrierStruct: apiProject1/classSyncBarrierStruct.md
 68 |       - SyncClockStruct: apiProject1/classSyncClockStruct.md
 69 |       - SyncCommand: apiProject1/classInterChiplet_1_1SyncCommand.md
 70 |       - SyncCommStruct: apiProject1/classSyncCommStruct.md
 71 |       - SyncLaunchStruct: apiProject1/classSyncLaunchStruct.md
 72 |       - SyncLockStruct: apiProject1/classSyncLockStruct.md
 73 |       - SyncPipeStruct: apiProject1/classSyncPipeStruct.md
 74 |       - SyncStruct: apiProject1/classSyncStruct.md
 75 |     - files:
 76 |       - file list: apiProject1/files.md
 77 |       - functions: apiProject1/functions.md
 78 |       - macros: apiProject1/macros.md
 79 |       - variables: apiProject1/variables.md
 80 |       - interchiplet: apiProject1/dir_a2025b34133129e5724d121abe9a4a4a.md
 81 |       - interchiplet/includes: apiProject1/dir_943fa6db2bfb09b7dcf1f02346dde40e.md
 82 |       - interchiplet/srcs: apiProject1/dir_b94c70d771af9f161858c2c4e7b3d1c5.md
 83 |       - apis_c.h: apiProject1/apis__c_8h.md
 84 |       - apis_c.cpp: apiProject1/apis__c_8cpp.md
 85 |       - apis_cu.h: apiProject1/apis__cu_8h.md
 86 |       - benchmark_yaml.h: apiProject1/benchmark__yaml_8h.md
 87 |       - cmd_handler.h: apiProject1/cmd__handler_8h.md
 88 |       - cmd_handler.cpp: apiProject1/cmd__handler_8cpp.md
 89 |       - cmdline_options.h: apiProject1/cmdline__options_8h.md
 90 |       - interchiplet.cpp: apiProject1/interchiplet_8cpp.md
 91 |       - global_define.h: apiProject1/global__define_8h.md
 92 |       - net_bench.h: apiProject1/net__bench_8h.md
 93 |       - net_delay.h: apiProject1/net__delay_8h.md
 94 |       - pipe_comm.h: apiProject1/pipe__comm_8h.md
 95 |       - sync_protocol.h: apiProject1/sync__protocol_8h.md
 96 |     - sources:
 97 |       - apis_c.h: apiProject1/apis__c_8h_source.md
 98 |       - apis_c.cpp: apiProject1/apis__c_8cpp_source.md
 99 |       - apis_cu.h: apiProject1/apis__cu_8h_source.md
100 |       - benchmark_yaml.h: apiProject1/benchmark__yaml_8h_source.md
101 |       - cmd_handler.h: apiProject1/cmd__handler_8h_source.md
102 |       - cmdline_options.h: apiProject1/cmdline__options_8h_source.md
103 |       - interchiplet.cpp: apiProject1/interchiplet_8cpp_source.md
104 |       - global_define.h: apiProject1/global__define_8h_source.md
105 |       - net_bench.h: apiProject1/net__bench_8h_source.md
106 |       - net_delay.h: apiProject1/net__delay_8h_source.md
107 |       - pipe_comm.h: apiProject1/pipe__comm_8h_source.md
108 |       - sync_protocol.h: apiProject1/sync__protocol_8h_source.md
109 |       - cmd_handler.h: apiProject1/cmd__handler_8cpp_source.md
110 | 
111 | theme:
112 |   name: "material"
113 |   features:
114 |     - navigation.indexes
115 | 
116 | plugins:
117 |   - search
118 |   - autorefs
119 |   - mkdoxy:
120 |       projects:
121 |         apiProject1:
122 |           src-dirs:
123 |             ../interchiplet/includes
124 |             ../interchiplet/srcs
125 |           full-doc: True
126 |           doxy-cfg:
127 |             FILE_PATTERNS: "*.cpp *.h*"
128 |             EXAMPLE_PATH: ""
129 |             RECURSIVE: True
130 | 
131 | markdown_extensions:
132 |   - pymdownx.highlight
133 |   - pymdownx.superfences:
134 |       custom_fences:
135 |         - name: mermaid
136 |           class: mermaid
137 |           format: !!python/name:pymdownx.superfences.fence_code_format
138 |   - pymdownx.arithmatex:
139 |       generic: true
140 | 
141 | extra_javascript:
142 |   - javascripts/mathjax.js
143 |   - https://polyfill.io/v3/polyfill.min.js?features=es6
144 |   - https://unpkg.com/mathjax@3/es5/tex-mml-chtml.js
145 | 


--------------------------------------------------------------------------------
/docs/requirements.txt:
--------------------------------------------------------------------------------
1 | mkdocs
2 | "mkdocstrings[python]"
3 | mkdocs-material
4 | mkdoxy


--------------------------------------------------------------------------------
/global_manager_test.cpp:
--------------------------------------------------------------------------------
 1 | #include "global_manager.h"
 2 | #include <gtest/gtest.h>
 3 | 
 4 | class GlobalManagerTest : public ::testing::Test {
 5 | protected:
 6 |     void SetUp() override {
 7 |         gm = std::make_unique<GlobalManager>();
 8 |     }
 9 | 
10 |     void TearDown() override {
11 |         gm.reset();
12 |     }
13 | 
14 |     std::unique_ptr<GlobalManager> gm;
15 | };
16 | 
17 | TEST_F(GlobalManagerTest, TestIsFinish) {
18 |     EXPECT_TRUE(gm->IsFinish());
19 | }
20 | 
21 | TEST_F(GlobalManagerTest, TestAddRequest) {
22 |     InterChiplet::AddrType sender = {0, 0};
23 |     InterChiplet::AddrType receiver = {1, 1};
24 |     std::string data = "Hello";
25 |     double senderClock = 100.0;
26 |     double frequency = 1.0;
27 |     InterChiplet::SyncCommType behavior = InterChiplet::SC_SEND;
28 | 
29 |     gm->AddRequest(sender, receiver, data, senderClock, frequency, behavior);
30 |     EXPECT_FALSE(gm->IsFinish());
31 | }
32 | 
33 | TEST_F(GlobalManagerTest, TestCheckPair) {
34 |     InterChiplet::AddrType sender = {0, 0};
35 |     InterChiplet::AddrType receiver = {1, 1};
36 |     std::string data = "Hello";
37 |     double senderClock = 100.0;
38 |     double frequency = 1.0;
39 | 
40 |     gm->AddRequest(sender, receiver, data, senderClock, frequency, InterChiplet::SC_SEND);
41 |     gm->AddRequest(receiver, sender, data, senderClock, frequency, InterChiplet::SC_RECEIVE);
42 | 
43 |     EXPECT_TRUE(gm->CheckPair());
44 | }
45 | 
46 | int main(int argc, char **argv) {
47 |     ::testing::InitGoogleTest(&argc, argv);
48 |     return RUN_ALL_TESTS();
49 | }


--------------------------------------------------------------------------------
/interchiplet/.gitignore:
--------------------------------------------------------------------------------
1 | 
2 | bin/
3 | lib/
4 | build/
5 | 


--------------------------------------------------------------------------------
/interchiplet/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | 
 2 | cmake_minimum_required(VERSION 3.10)
 3 | enable_language(CUDA)
 4 | 
 5 | # Project name.
 6 | project(interchiplet LANGUAGES CUDA CXX)
 7 | 
 8 | # Third-party library
 9 | add_subdirectory(thirdparty/yaml-cpp)
10 | add_subdirectory(thirdparty/CLI11)
11 | add_subdirectory(thirdparty/spdlog)
12 | 
13 | # Parameter set.
14 | set(EXECUTABLE_OUTPUT_PATH ${PROJECT_SOURCE_DIR}/bin)
15 | set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${PROJECT_SOURCE_DIR}/lib)
16 | include_directories(includes)
17 | 
18 | # Import library
19 | # find cppzmq wrapper, installed by make of cppzmq
20 | find_package(Boost REQUIRED COMPONENTS
21 |     system
22 |     filesystem
23 |     program_options
24 | )
25 | 
26 | # Static library: interchiplet_c
27 | add_library(interchiplet_c STATIC
28 |     srcs/apis_c.cpp)
29 | 
30 | # Static library: interchiplet_cu
31 | # add_library(interchiplet_cu STATIC
32 | #     srcs/apis_cu.cu)
33 | 
34 | # Executable: interchiplet
35 | add_executable(interchiplet srcs/interchiplet.cpp
36 |                             srcs/cmd_handler.cpp)
37 | target_include_directories(interchiplet PUBLIC thirdparty/CLI11/include)
38 | target_include_directories(interchiplet PUBLIC thirdparty/spdlog/include)
39 | target_compile_options(interchiplet PUBLIC "-pthread")
40 | target_link_options(interchiplet PUBLIC "-pthread")
41 | target_link_libraries(interchiplet ${Boost_LIBRARIES})
42 | target_link_libraries(interchiplet yaml-cpp::yaml-cpp) # The library or executable that require yaml-cpp library
43 | 


--------------------------------------------------------------------------------
/interchiplet/depreciate/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | 
 2 | cmake_minimum_required(VERSION 3.10)
 3 | enable_language(CUDA)
 4 | 
 5 | # Project name.
 6 | project(interchiplet LANGUAGES CUDA CXX)
 7 | 
 8 | # Parameter set.
 9 | set(EXECUTABLE_OUTPUT_PATH ${PROJECT_SOURCE_DIR}/bin)
10 | set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${PROJECT_SOURCE_DIR}/lib)
11 | include_directories(includes)
12 | 
13 | # Import library
14 | # find cppzmq wrapper, installed by make of cppzmq
15 | find_package(cppzmq)
16 | find_package(Boost REQUIRED COMPONENTS
17 |     system
18 |     filesystem
19 | )
20 | 
21 | # Static library: interchiplet_c
22 | add_library(interchiplet_c STATIC
23 |     srcs/apis_c.cpp)
24 | 
25 | # Static library: interchiplet_cu
26 | add_library(interchiplet_cu STATIC
27 |     srcs/apis_cu.cu)
28 | 
29 | # Executable: record_transfer
30 | add_executable(record_transfer srcs/record_transfer.cpp)
31 | target_link_libraries(record_transfer ${Boost_LIBRARIES})
32 | 
33 | # Executable: comm_bridge
34 | add_executable(comm_bridge srcs/comm_bridge.cpp)
35 | target_compile_options(comm_bridge PUBLIC "-pthread")
36 | target_link_options(comm_bridge PUBLIC "-pthread")
37 | #target_link_libraries(comm_bridge ${Boost_LIBRARIES})
38 | 
39 | # Executable: zmq_pro
40 | # Require cppzmq and Boost. If not found, skip.
41 | if (CPPZMQ_FOUND AND Boost_FOUND)
42 |     add_executable(zmq_pro
43 |         srcs/zmq_pro.cpp
44 |         srcs/interchiplet_client.cpp)
45 | 
46 |     include_directories(${Boost_INCLUDE_DIRS})
47 |     target_link_libraries(zmq_pro cppzmq)
48 |     target_link_libraries(zmq_pro ${Boost_LIBRARIES})
49 | else()
50 |     if (CPPZMQ_FOUND)
51 |         message(STATUS "Cannot find Boost library, skip target zmq_pro.")
52 |     elseif(Boost_FOUND)
53 |         message(STATUS "Cannot find cppzmq library, skip target zmq_pro.")
54 |     endif()
55 | endif()
56 | 


--------------------------------------------------------------------------------
/interchiplet/depreciate/interchiplet_client.cpp:
--------------------------------------------------------------------------------
 1 | #include "interchiplet_client.h"
 2 | 
 3 | #include<cstring>
 4 | 
 5 | //与popnet通信部分
 6 | namespace nsInterchiplet
 7 | {
 8 | using namespace std;
 9 | 
10 | const int MSG_LEN = 1024;
11 | 
12 | static zmq::context_t *zmqContext;
13 | static zmq::socket_t *zmqSocket;
14 | static deque<zmq::message_t> msgBuf;
15 | int connectZmq(const string &addr)
16 | {
17 |     zmqContext = new zmq::context_t(1);
18 |     zmqSocket = new zmq::socket_t(*zmqContext, zmq::socket_type::pair);
19 |     zmqSocket->connect(addr);
20 |     zmqSocket->send(zmq::str_buffer("ready"));
21 |     char buf[MSG_LEN + 1] = {0};
22 |     zmqSocket->recv(buf, MSG_LEN);
23 |     return strcmp(buf, "start") == 0 ? 0 : -1;
24 | }
25 | /* int connectZmq()
26 | {
27 |     return connectZmq(popnetAddr);
28 | }
29 | int disconnectZmq()
30 | {
31 |     zmqSocket->disconnect(popnetAddr);
32 |     return 0;
33 | } */
34 | int closeZmq()
35 | {
36 |     zmqSocket->close();
37 |     delete zmqSocket;
38 |     delete zmqContext;
39 |     return 0;
40 | }
41 | int readAllMsg()
42 | {
43 |     auto ret = zmq::recv_multipart(*zmqSocket, std::back_inserter(msgBuf));
44 |     return (int)*ret;
45 | }
46 | int readMsg(string &str)
47 | {
48 |     if (msgBuf.empty())
49 |         readAllMsg();
50 |     //str = msgBuf.front().to_string() + '\0';
51 |     char buf[MSG_LEN+1]={0};
52 |     memcpy(buf,msgBuf.front().data(),msgBuf.front().size());
53 |     str=buf;
54 |     msgBuf.pop_front();
55 |     return 0;
56 | }
57 | int writeMsg(const string &str)
58 | {
59 |     char tmp[MSG_LEN] = {0};
60 |     strcpy(tmp, str.c_str());
61 |     auto r = zmqSocket->send(zmq::str_buffer(tmp)/* , zmq::send_flags::dontwait */);
62 |     return *r;
63 | }
64 | } // namespace nsInterchiplet
65 | 


--------------------------------------------------------------------------------
/interchiplet/depreciate/interchiplet_client.h:
--------------------------------------------------------------------------------
 1 | #include <zmq.hpp>
 2 | #include <zmq_addon.hpp>
 3 | #include<deque>
 4 | #include<string>
 5 | 
 6 | namespace nsInterchiplet
 7 | {
 8 |     extern const int MSG_LEN;
 9 | 
10 |     int connectZmq(const std::string &addr);
11 |     int closeZmq();
12 |     int readMsg(std::string &str);
13 |     int writeMsg(const std::string &str);
14 | }


--------------------------------------------------------------------------------
/interchiplet/depreciate/record_transfer.cpp:
--------------------------------------------------------------------------------
 1 | #include<iostream>
 2 | #include<fstream>
 3 | #include<boost/property_tree/ini_parser.hpp>
 4 | #include<filesystem>
 5 | #include <boost/property_tree/ptree.hpp>
 6 | #include <boost/filesystem.hpp>
 7 | #include<cstdint>
 8 | #include<string>
 9 | using namespace std;
10 | 
11 | const char CONFIG_PATH[]="changes/zmq_pro.ini";
12 | const char SUBNET_ITEM[]="subnet_id";
13 | 
14 | int64_t subnet;
15 | void readConfig()
16 | {
17 |     if(!boost::filesystem::exists(CONFIG_PATH)){
18 |         cout<<"no config file\n";
19 |         return;
20 |     }
21 |     boost::property_tree::ptree root,tag;
22 |     boost::property_tree::ini_parser::read_ini(CONFIG_PATH,root);
23 |     tag=root.get_child("config");
24 |     if(tag.count(SUBNET_ITEM)!=1)return;
25 |     subnet=tag.get<int64_t>(SUBNET_ITEM);
26 | }
27 | 
28 | int main()
29 | {
30 |     readConfig();
31 |     ifstream ifs("message_record.txt");
32 |     ofstream ofs("record_"+to_string(subnet)+".txt");
33 |     string cmd;
34 |     int64_t localPort,remotePort,remoteAddr,localCore;
35 |     uint64_t t;
36 |     while (ifs>>cmd>>localPort>>remoteAddr>>remotePort>>localCore>>t)
37 |     {
38 |         //命令 本地地址  本地端口 远程地址 远程端口 本地核心 时间（纳秒）
39 |         ofs<<cmd<<' '
40 |             <<subnet<<' '
41 |             <<localPort<<' '
42 |             <<remoteAddr<<' '
43 |             <<remotePort<<' '
44 |             <<localCore<<' '
45 |             <<t<<'\n';
46 |     }
47 |     ifs.close();
48 |     ofs.flush();
49 |     ofs.close();
50 |     return 0;
51 | }
52 | 


--------------------------------------------------------------------------------
/interchiplet/depreciate/sniper_change.cpp:
--------------------------------------------------------------------------------
 1 | #include "sniper_change.h"
 2 | 
 3 | #include"zmq.hpp"
 4 | #include"zmq_addon.hpp"
 5 | 
 6 | #include <utility>
 7 | #include <list>
 8 | #include <atomic>
 9 | std::atomic_int64_t socketNumber(0);
10 | using namespace std;
11 | namespace nsChange
12 | {
13 |    //
14 | }
15 | 
16 | typedef std::unordered_map<int64_t, std::list<std::string>> bufMap_t;
17 | bufMap_t buf;
18 | zmq::context_t zmqContext;
19 | zmq::socket_t zmqSocket;
20 | extern "C" int connectZmq(const char *addr)
21 | {
22 |    zmqSocket = zmq::socket_t(zmqContext, zmq::socket_type::pair);
23 |    zmqSocket.bind(addr);
24 |    return 0;
25 | }
26 | extern "C" int disconnectZmq()
27 | {
28 |    zmqSocket.close();
29 |    return 0;
30 | }
31 | extern "C" int readAllMsg()
32 | {
33 |    std::vector<zmq::message_t> msgBuf;
34 |    auto ret = zmq::recv_multipart(zmqSocket, std::back_inserter(msgBuf));
35 |    for (auto &msg : msgBuf)
36 |    {
37 |       //将消息存入不同socket的buf上
38 |    }
39 |    return (int)*ret;
40 | }
41 | extern "C" const char *getBuf(int64_t socket)
42 | {
43 |    bufMap_t::iterator it = buf.find(socket);
44 |    if (it == buf.end())
45 |       return NULL;
46 |    else if (it->second.empty())
47 |       return NULL;
48 |    else
49 |       return it->second.front().c_str();
50 | }
51 | extern "C" int popBuf(int64_t socket)
52 | {
53 |    bufMap_t::iterator it = buf.find(socket);
54 |    if (it == buf.end())
55 |       return -1;
56 |    else if (it->second.empty())
57 |       return -1;
58 |    else
59 |    {
60 |       it->second.pop_front();
61 |       return 0;
62 |    }
63 | }
64 | 


--------------------------------------------------------------------------------
/interchiplet/depreciate/zmq_pro.cpp:
--------------------------------------------------------------------------------
  1 | #include"interchiplet_client.h"
  2 | 
  3 | #include <iostream>
  4 | #include <cstring>
  5 | #include <sys/socket.h>
  6 | #include <netinet/in.h>
  7 | #include <sys/types.h>
  8 | #include <deque>
  9 | #include <string>
 10 | #include <algorithm>
 11 | #include <cassert>
 12 | #include <iterator>
 13 | #include <zmq.hpp>
 14 | #include <zmq_addon.hpp>
 15 | #include <fstream>
 16 | #include <thread>
 17 | #include <errno.h>
 18 | #include <unistd.h>
 19 | #include<sstream>
 20 | #include<boost/property_tree/ini_parser.hpp>
 21 | #include<filesystem>
 22 | #include <boost/property_tree/ptree.hpp>
 23 | #include <boost/filesystem.hpp>
 24 | #include<queue>
 25 | using namespace std;
 26 | 
 27 | ofstream logfile("changes/zmq_pro.log");
 28 | 
 29 | //read config
 30 | int64_t subnet;
 31 | string popnetAddr;
 32 | int readConfig()
 33 | {
 34 |     const char CONFIG_PATH[]="changes/zmq_pro.ini";
 35 |     const char SUBNET_ITEM[]="subnet_id";
 36 |     const char INTER_ADDR_ITEM[]="inter_address";
 37 |     if(!boost::filesystem::exists(CONFIG_PATH)){
 38 |         return -1;
 39 |     }
 40 |     boost::property_tree::ptree root,tag;
 41 |     boost::property_tree::ini_parser::read_ini(CONFIG_PATH,root);
 42 |     tag=root.get_child("config");
 43 |     if(tag.count(SUBNET_ITEM)!=1||tag.count(INTER_ADDR_ITEM)!=1)return -2;
 44 |     subnet=tag.get<int64_t>(SUBNET_ITEM);
 45 |     popnetAddr=tag.get<string>(INTER_ADDR_ITEM);
 46 |     return 0;
 47 | }
 48 | 
 49 | //与sniper通信部分
 50 | namespace nsSniperConn
 51 | {
 52 | const size_t BUF_SIZE = 4096;
 53 | const int SNIPER_PORT_BASE = 7000;
 54 | int listenFd, connfd;
 55 | queue<string> lines;
 56 | int openPort(int port)
 57 | {
 58 |     const int LISTEN_QUEUE_LEN = 10;
 59 |     listenFd = socket(AF_INET, SOCK_STREAM, 0);
 60 |     if (listenFd < 0)
 61 |         return listenFd;
 62 |     sockaddr_in serverAddr;
 63 |     memset(&serverAddr, 0, sizeof(serverAddr));
 64 |     serverAddr.sin_family = AF_INET;
 65 |     serverAddr.sin_addr.s_addr = htonl(INADDR_ANY);
 66 |     serverAddr.sin_port = htons(port);
 67 |     int r = bind(listenFd, (sockaddr *)&serverAddr, sizeof(serverAddr));
 68 |     if (r < 0)
 69 |         return -1;
 70 |     r = listen(listenFd, LISTEN_QUEUE_LEN);
 71 |     if(r<0)return -2;
 72 |     return 0;
 73 | }
 74 | int getConnection()
 75 | {
 76 |     connfd = accept(listenFd, (sockaddr *)NULL, NULL);
 77 |     return connfd;
 78 | }
 79 | int receiveLine()
 80 | {
 81 |     static int remain = 0;
 82 |     static char buf[BUF_SIZE+1] = {0};
 83 |     if (lines.empty())
 84 |     {
 85 |         char *end = buf + remain;
 86 |         char *pos = end;
 87 |         int r;
 88 |         while (pos == end)
 89 |         {
 90 |             r = recv(connfd, end, BUF_SIZE - remain, 0);
 91 |             if (r <= 0)
 92 |                 return r;
 93 |             remain += r;
 94 |             end = buf + remain;
 95 |             pos = find(buf, end, '\n');
 96 |             //assert(remain < BUF_SIZE);
 97 |             if(remain>=BUF_SIZE)throw "Message too long";
 98 |         }
 99 |         *pos = '\0';
100 |         lines.push(buf);
101 |         char *pos0 = pos + 1;
102 |         for (;;)
103 |         {
104 |             pos = find(pos0, end, '\n');
105 |             if (pos == end)
106 |                 break;
107 |             *pos = '\0';
108 |             lines.push(pos0);
109 |             pos0 = pos + 1;
110 |         }
111 |         remain = distance(pos0, end);
112 |         copy(pos0, end, buf);
113 |         return lines.size();
114 |     }
115 |     return 0;
116 | }
117 | int disconnect()
118 | {
119 |     return close(connfd) | close(listenFd);
120 | }
121 | } // namespace nsSniperConn
122 | 
123 | int main()
124 | {
125 |     int r;
126 |     r=readConfig();
127 |     logfile<<"readConfig: "<<r<<endl;
128 |     logfile<<"\tsubnet: "<<subnet<<endl
129 |         <<"\tinter addr: "<<popnetAddr<<endl;
130 |     if(r<0)return r;
131 |     r=nsInterchiplet::connectZmq(popnetAddr);
132 |     logfile<<"popnet connect: "<<r<<endl;
133 |     r = nsSniperConn::openPort(nsSniperConn::SNIPER_PORT_BASE + subnet);
134 |     //logfile<<"openport succeeds"<<endl;
135 |     logfile << "sniper openport: " << r << endl;
136 |     cout << subnet << endl;
137 |     r = nsSniperConn::getConnection();
138 |     //cout<<"ok"<<endl;
139 |     //logfile<<"connection succeeds"<<endl;
140 |     logfile << "sniper connection: " << r << endl;
141 |     for (;;)
142 |     {
143 |         if (nsSniperConn::receiveLine() <= 0)
144 |         {
145 |             logfile << "Error: " << errno << endl;
146 |             break;
147 |         }
148 |         logfile << "read from sniper: " << nsSniperConn::lines.front() << endl;
149 |         stringstream ss(nsSniperConn::lines.front()+'\n');
150 |         string cmd,d;
151 |         ss>>cmd;
152 |         if(cmd=="send"){
153 |             getline(ss,d);
154 |             nsInterchiplet::writeMsg("send "+d);
155 |             //控制发送速率，避免丢消息
156 |             this_thread::sleep_for(1ms);
157 |             cout<<"send_ret"<<endl;
158 |         }
159 |         else if(cmd=="recv"){
160 |             nsInterchiplet::writeMsg("recv\n");
161 |             nsInterchiplet::readMsg(d);
162 |             if(d.find("exit")<d.size()){
163 |                 logfile<<"interchiplet exits"<<endl;
164 |                 break;
165 |             }
166 |             cout<<d<<endl;
167 |             logfile<<"read from ZMQ: "<<d<<endl;
168 |         }
169 |         nsSniperConn::lines.pop();
170 |         //this_thread::sleep_for(1ms);
171 |     }
172 |     r = nsSniperConn::disconnect();
173 |     logfile << "sniper disconnect: " << r << endl;
174 |     logfile<<"popnet disconnect: "<<nsInterchiplet::closeZmq()<<endl;
175 |     logfile << "exit" << endl;
176 |     logfile.close();
177 |     return 0;
178 | }
179 | 


--------------------------------------------------------------------------------
/interchiplet/includes/apis_c.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <unistd.h>
 4 | 
 5 | #include <cstdint>
 6 | 
 7 | namespace InterChiplet {
 8 | typedef decltype(syscall(0)) syscall_return_t;
 9 | 
10 | /**
11 |  * @defgroup apis_for_cpu APIs for CPU
12 |  * @brief APIs for CPU.
13 |  * @{
14 |  */
15 | /**
16 |  * @brief Launch application to remote chiplet.
17 |  * @param __dst_x Destination address in X-axis.
18 |  * @param __dst_y Destination address in Y-axis.
19 |  * @param __src_x Source address in X-axis.
20 |  * @param __src_y Source address in Y-axis.
21 |  */
22 | syscall_return_t launch(int64_t __dst_x, int64_t __dst_y, int64_t __src_x, int64_t __src_y);
23 | 
24 | /**
25 |  * @brief Wait launch from remote chiplet.
26 |  * @param __dst_x Destination address in X-axis.
27 |  * @param __dst_y Destination address in Y-axis.
28 |  * @param __src_x Source address in X-axis. Return value.
29 |  * @param __src_y Source address in Y-axis. Return value.
30 |  */
31 | syscall_return_t waitLaunch(int64_t __dst_x, int64_t __dst_y, int64_t* __src_x, int64_t* __src_y);
32 | 
33 | /**
34 |  * @brief Barrier.
35 |  * @param __uid Barrier ID.
36 |  * @param __src_x Source address in X-axis.
37 |  * @param __src_y Source address in Y-axis.
38 |  * @param __count Number of threads to barrier. 
39 |  */
40 | syscall_return_t barrier(int64_t __uid, int64_t __src_x, int64_t __src_y, int64_t __count = 0);
41 | 
42 | /**
43 |  * @brief Lock mutex.
44 |  * @param __uid Mutex ID.
45 |  * @param __src_x Source address in X-axis.
46 |  * @param __src_y Source address in Y-axis.
47 |  */
48 | syscall_return_t lock(int64_t __uid, int64_t __src_x, int64_t __src_y);
49 | 
50 | /**
51 |  * @brief Unlock mutex.
52 |  * @param __uid Mutex ID.
53 |  * @param __src_x Source address in X-axis.
54 |  * @param __src_y Source address in Y-axis.
55 |  */
56 | syscall_return_t unlock(int64_t __uid, int64_t __src_x, int64_t __src_y);
57 | 
58 | /**
59 |  * @brief Send data to remote chiplet.
60 |  * @param __dst_x Destination address in X-axis.
61 |  * @param __dst_y Destination address in Y-axis.
62 |  * @param __src_x Source address in X-axis.
63 |  * @param __src_y Source address in Y-axis.
64 |  * @param __addr Data address.
65 |  * @param __nbyte Number of bytes.
66 |  */
67 | syscall_return_t sendMessage(int64_t __dst_x, int64_t __dst_y, int64_t __src_x, int64_t __src_y,
68 |                              void* __addr, int64_t __nbyte);
69 | 
70 | /**
71 |  * @brief Read data from remote chiplet.
72 |  * @param __dst_x Destination address in X-axis.
73 |  * @param __dst_y Destination address in Y-axis.
74 |  * @param __src_x Source address in X-axis.
75 |  * @param __src_y Source address in Y-axis.
76 |  * @param __addr Data address.
77 |  * @param __nbyte Number of bytes.
78 |  */
79 | syscall_return_t receiveMessage(int64_t __dst_x, int64_t __dst_y, int64_t __src_x, int64_t __src_y,
80 |                                 void* __addr, int64_t __nbyte);
81 | 
82 | /**
83 |  * @}
84 |  */
85 | }  // namespace InterChiplet
86 | 


--------------------------------------------------------------------------------
/interchiplet/includes/apis_cu.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include "cuda_runtime_api.h"
 4 | 
 5 | /**
 6 |  * @defgroup apis_for_cuda
 7 |  * @brief APIs for CUDA.
 8 |  * @{
 9 |  */
10 | /**
11 |  * @brief Launch application to remote chiplet.
12 |  * @param __dst_x Destination address in X-axis.
13 |  * @param __dst_y Destination address in Y-axis.
14 |  * @param __src_x Source address in X-axis.
15 |  * @param __src_y Source address in Y-axis.
16 |  */
17 | extern __host__ cudaError_t CUDARTAPI launch(int __dst_x, int __dst_y, int __src_x, int __src_y);
18 | 
19 | /**
20 |  * @brief Wait launch from remote chiplet.
21 |  * @param __dst_x Destination address in X-axis.
22 |  * @param __dst_y Destination address in Y-axis.
23 |  * @param __src_x Source address in X-axis.
24 |  * @param __src_y Source address in Y-axis.
25 |  */
26 | extern __host__ cudaError_t CUDARTAPI waitLaunch(int __dst_x, int __dst_y, int* __src_x,
27 |                                                  int* __src_y);
28 | 
29 | /**
30 |  * @brief Barrier.
31 |  * @param __uid Barrier ID.
32 |  * @param __src_x Source address in X-axis.
33 |  * @param __src_y Source address in Y-axis.
34 |  * @param __count Number of threads to barrier. 
35 |  */
36 | extern __host__ cudaError_t CUDARTAPI barrier(int __uid, int __src_x, int __src_y, int __count = 0);
37 | 
38 | /**
39 |  * @brief Lock mutex.
40 |  * @param __uid Mutex ID.
41 |  * @param __src_x Source address in X-axis.
42 |  * @param __src_y Source address in Y-axis.
43 |  */
44 | extern __host__ cudaError_t CUDARTAPI lock(int __uid, int __src_x, int __src_y);
45 | 
46 | /**
47 |  * @brief Unlock mutex.
48 |  * @param __uid Mutex ID.
49 |  * @param __src_x Source address in X-axis.
50 |  * @param __src_y Source address in Y-axis.
51 |  */
52 | extern __host__ cudaError_t CUDARTAPI unlock(int __uid, int __src_x, int __src_y);
53 | 
54 | /**
55 |  * @brief Send data to remote chiplet.
56 |  * @param __dst_x Destination address in X-axis.
57 |  * @param __dst_y Destination address in Y-axis.
58 |  * @param __src_x Source address in X-axis.
59 |  * @param __src_y Source address in Y-axis.
60 |  * @param __addr Data address.
61 |  * @param __nbyte Number of bytes.
62 |  */
63 | extern __host__ cudaError_t CUDARTAPI sendMessage(int __dst_x, int __dst_y, int __src_x,
64 |                                                   int __srx_y, void* __addr, int __nbyte);
65 | 
66 | /**
67 |  * @brief Read data from remote chiplet.
68 |  * @param __dst_x Destination address in X-axis.
69 |  * @param __dst_y Destination address in Y-axis.
70 |  * @param __src_x Source address in X-axis.
71 |  * @param __src_y Source address in Y-axis.
72 |  * @param __addr Data address.
73 |  * @param __nbyte Number of bytes.
74 |  */
75 | extern __host__ cudaError_t CUDARTAPI receiveMessage(int __dst_x, int __dst_y, int __src_x,
76 |                                                      int __srx_y, void* __addr, int __nbyte);
77 | /**
78 |  * @}
79 |  */


--------------------------------------------------------------------------------
/interchiplet/includes/benchmark_yaml.h:
--------------------------------------------------------------------------------
  1 | #pragma once
  2 | 
  3 | #include <boost/filesystem.hpp>
  4 | #include <iostream>
  5 | 
  6 | #include "yaml-cpp/yaml.h"
  7 | namespace fs = boost::filesystem;
  8 | 
  9 | /**
 10 |  * @defgroup benchmark_yaml
 11 |  * @brief YAML configuration file interface.
 12 |  * @{
 13 |  */
 14 | /**
 15 |  * @brief Data structure to configure one simulation process.
 16 |  */
 17 | class ProcessConfig {
 18 |    public:
 19 |     /**
 20 |      * @brief Construct ProcessConfig.
 21 |      * @param __cmd Command of simulation process.
 22 |      * @param __args Arguments of simulation process.
 23 |      * @param __log Path of logging name.
 24 |      * @param __to_stdout True means redirect output of this process to standard output.
 25 |      * @param __clock_rate the rate of inter-simulator cycle convert.
 26 |      * @param __pre_copy Files copy to sub-directory of simulator before executing.
 27 |      */
 28 |     ProcessConfig(const std::string& __cmd, const std::vector<std::string>& __args,
 29 |                   const std::string& __log, bool __to_stdout, double __clock_rate,
 30 |                   const std::string& __pre_copy)
 31 |         : m_command(__cmd),
 32 |           m_args(__args),
 33 |           m_log_file(__log),
 34 |           m_to_stdout(__to_stdout),
 35 |           m_clock_rate(__clock_rate),
 36 |           m_pre_copy(__pre_copy) {}
 37 | 
 38 |    public:
 39 |     /**
 40 |      * @brief Command of simulation process.
 41 |      */
 42 |     std::string m_command;
 43 |     /**
 44 |      * @brief Arguments of simulation process.
 45 |      */
 46 |     std::vector<std::string> m_args;
 47 |     /**
 48 |      * @brief Path of logging name.
 49 |      */
 50 |     std::string m_log_file;
 51 |     /**
 52 |      * @brief True means redirect output of this process to standard output.
 53 |      */
 54 |     bool m_to_stdout;
 55 |     /**
 56 |      * @brief the rate of inter-simulator cycle convert.
 57 |      */
 58 |     double m_clock_rate;
 59 |     /**
 60 |      * @brief Files copy to sub-directory of simulator before executing.
 61 |      */
 62 |     std::string m_pre_copy;
 63 | };
 64 | 
 65 | /**
 66 |  * @brief Benchmark configuration structure.
 67 |  */
 68 | class BenchmarkConfig {
 69 |    public:
 70 |     /**
 71 |      * @brief Parse YAML configuration file to get benchmark configuration.
 72 |      * @param file_name Path of YAML configuration file.
 73 |      */
 74 |     BenchmarkConfig(const std::string& file_name) {
 75 |         // Get environment variables.
 76 |         m_benchmark_root = fs::canonical(fs::path(file_name)).parent_path().string();
 77 |         if (getenv("SIMULATOR_ROOT") == NULL) {
 78 |             std::cerr << "The environment variable SIMULATOR_ROOT is not defined.\n";
 79 |             exit(EXIT_FAILURE);
 80 |         } else {
 81 |             m_simulator_root = getenv("SIMULATOR_ROOT");
 82 |         }
 83 | 
 84 |         // Parse YAML file.
 85 |         YAML::Node config;
 86 |         try {
 87 |             config = YAML::LoadFile(file_name);
 88 |         } catch (const std::exception& e) {
 89 |             std::cerr << "Error: " << e.what() << "\n";
 90 |             exit(EXIT_FAILURE);
 91 |         }
 92 | 
 93 |         // Parse YAML Tree.
 94 |         yaml_parse(config);
 95 | 
 96 |         // Extend environment variables.
 97 |         extend_env_var();
 98 |     }
 99 | 
100 |     /**
101 |      * @brief Parse YAML configuration tree.
102 |      * @param config Top node of YAML Tree.
103 |      */
104 |     void yaml_parse(const YAML::Node& config) {
105 |         m_phase1_proc_cfg_list = yaml_parse_phase(config["phase1"]);
106 |         m_phase2_proc_cfg_list = yaml_parse_phase(config["phase2"]);
107 |     }
108 | 
109 |    private:
110 |     /**
111 |      * @brief Parse YAML configuration tree from "phase1" or "phase2".
112 |      * @param config "phase1" or "phase2" node of YAML.
113 |      */
114 |     std::vector<ProcessConfig> yaml_parse_phase(const YAML::Node& config) {
115 |         std::vector<ProcessConfig> proc_list;
116 |         for (YAML::const_iterator it = config.begin(); it != config.end(); it++) {
117 |             proc_list.push_back(yaml_parse_process(*it));
118 |         }
119 |         return proc_list;
120 |     }
121 | 
122 |     /**
123 |      * @brief Parse YAML configuration tree below "phase1" or "phase2".
124 |      * @param config node below "phase1" or "phase2" of YAML.
125 |      */
126 |     ProcessConfig yaml_parse_process(const YAML::Node& config) {
127 |         std::string pre_copy;
128 |         if (config["pre_copy"]) {
129 |             pre_copy = config["pre_copy"].as<std::string>();
130 |         }
131 |         return ProcessConfig(config["cmd"].as<std::string>(),
132 |                              config["args"].as<std::vector<std::string> >(),
133 |                              config["log"].as<std::string>(), config["is_to_stdout"].as<bool>(),
134 |                              config["clock_rate"].as<double>(), pre_copy);
135 |     }
136 | 
137 |     /**
138 |      * @brief Extend Environment Variables.
139 |      *
140 |      * Replace $SIMULATOR_ROOT and $BENCHMARK_ROOT with absoluate address.
141 |      */
142 |     void extend_env_var() {
143 |         for (ProcessConfig& config : m_phase1_proc_cfg_list) {
144 |             extend_env_var_proc(config);
145 |         }
146 |         for (ProcessConfig& config : m_phase2_proc_cfg_list) {
147 |             extend_env_var_proc(config);
148 |         }
149 |     }
150 | 
151 |     /**
152 |      * @brief Extend Environment Variables in one process configuration.
153 |      *
154 |      * Replace $SIMULATOR_ROOT and $BENCHMARK_ROOT with absoluate address.
155 |      */
156 |     void extend_env_var_proc(ProcessConfig& proc_config) {
157 |         extend_env_var_string(proc_config.m_command);
158 |         extend_env_var_string(proc_config.m_log_file);
159 |         for (std::string& arg : proc_config.m_args) {
160 |             extend_env_var_string(arg);
161 |         }
162 |     }
163 | 
164 |     /**
165 |      * @brief Extend Environment Variables in one string.
166 |      *
167 |      * Replace $SIMULATOR_ROOT and $BENCHMARK_ROOT with absoluate address.
168 |      */
169 |     void extend_env_var_string(std::string& __str) {
170 |         std::size_t find_pos;
171 |         while ((find_pos = __str.find("$SIMULATOR_ROOT")) != std::string::npos) {
172 |             __str = __str.replace(find_pos, 15, m_simulator_root);
173 |         }
174 |         while ((find_pos = __str.find("$BENCHMARK_ROOT")) != std::string::npos) {
175 |             __str = __str.replace(find_pos, 15, m_benchmark_root);
176 |         }
177 |     }
178 | 
179 |    public:
180 |     /**
181 |      * @brief Environments.
182 |      */
183 |     std::string m_benchmark_root;
184 |     std::string m_simulator_root;
185 |     /**
186 |      * @brief List of configuration structures of phase 1.
187 |      */
188 |     std::vector<ProcessConfig> m_phase1_proc_cfg_list;
189 |     /**
190 |      * @brief List of configuration structures of phase 2.
191 |      */
192 |     std::vector<ProcessConfig> m_phase2_proc_cfg_list;
193 | };
194 | /**
195 |  * @}
196 |  */
197 | 


--------------------------------------------------------------------------------
/interchiplet/includes/cmdline_options.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <iostream>
 4 | 
 5 | #include "CLI/CLI.hpp"
 6 | 
 7 | /**
 8 |  * @defgroup cmdline
 9 |  * @brief Command line parser.
10 |  * @{
11 |  */
12 | /**
13 |  * @brief Options from command line.
14 |  */
15 | class CmdLineOptions {
16 |    public:
17 |     /**
18 |      * @brief Constructor.
19 |      */
20 |     CmdLineOptions()
21 |         : m_bench(), m_cwd(), m_timeout_threshold(5), m_err_rate_threshold(0.005), m_debug(false) {}
22 | 
23 |     /**
24 |      * @brief Read options from command line.
25 |      * @param argc Number of argument.
26 |      * @param argv String of argument.
27 |      */
28 |     int parse(int argc, const char* argv[]) {
29 |         CLI::App app{"Lego Chiplet Simulator"};
30 |         app.add_option("bench", m_bench, "Benchmark configuration file (.yml)")
31 |             ->required()
32 |             ->check(CLI::ExistingFile);
33 |         app.add_option("-t,--timeout", m_timeout_threshold, "Time out threshold, in time of round.")
34 |             ->check(CLI::PositiveNumber);
35 |         app.add_option("-e,--error", m_err_rate_threshold, "Error rate when quit simulation.");
36 |         app.add_option("--cwd", m_cwd, "Woring directory for simulation.")
37 |             ->check(CLI::ExistingPath);
38 |         app.add_flag("--debug", m_debug, "Print debug information.");
39 | 
40 |         try {
41 |             app.parse(argc, argv);
42 |         } catch (const CLI::ParseError& e) {
43 |             int ret = app.exit(e);
44 |             exit(ret);
45 |         }
46 | 
47 |         return 0;
48 |     }
49 | 
50 |    public:
51 |     /**
52 |      * @brief Path of benchmark configuration yaml.
53 |      */
54 |     std::string m_bench;
55 |     /**
56 |      * @brief New working directory.
57 |      */
58 |     std::string m_cwd;
59 | 
60 |     /**
61 |      * @brief Timeout threshold, in term of round.
62 |      */
63 |     long m_timeout_threshold;
64 |     /**
65 |      * @brief Error rate threshold, used to quit iteration.
66 |      */
67 |     double m_err_rate_threshold;
68 | 
69 |     /**
70 |      * @brief Print debug information.
71 |      */
72 |     bool m_debug;
73 | };
74 | /**
75 |  * @}
76 |  */
77 | 


--------------------------------------------------------------------------------
/interchiplet/includes/global_define.h:
--------------------------------------------------------------------------------
  1 | #pragma once
  2 | 
  3 | #include <cstdint>
  4 | #include <string>
  5 | #include <vector>
  6 | 
  7 | namespace InterChiplet {
  8 | /**
  9 |  * @brief Syscall ID used in CPU/GPU.
 10 |  */
 11 | enum SysCallID {
 12 |     // SYSCALL_TEST_CHANGE = 500,        // Test
 13 |     // SYSCALL_REG_FUNC = 503,           // Send register function to pin (depreciate)
 14 |     // SYSCALL_CONNECT = 504,            // Setup connection.
 15 |     // SYSCALL_DISCONNECT = 505,         // Stop connection.
 16 |     // SYSCALL_GET_LOCAL_ADDR = 506,     // Get address of current processor.
 17 |     // SYSCALL_CHECK_REMOTE_READ = 507,  // Check remote read
 18 | 
 19 |     SYSCALL_LAUNCH = 501,        // Launch request.
 20 |     SYSCALL_WAITLAUNCH = 502,    // Waiit launch request.
 21 |     SYSCALL_BARRIER = 503,       // Enter barrier.
 22 |     SYSCALL_LOCK = 504,          // Lock mutex.
 23 |     SYSCALL_UNLOCK = 505,        // Unlock mutex.
 24 |     SYSCALL_REMOTE_READ = 506,   // Read cross chiplet
 25 |     SYSCALL_REMOTE_WRITE = 507,  // Write cross chiplet
 26 | };
 27 | 
 28 | /**
 29 |  * @brief Time type used between simulators.
 30 |  */
 31 | typedef unsigned long long TimeType;
 32 | 
 33 | /**
 34 |  * @brief Time type used by interchiplet module.
 35 |  */
 36 | typedef double InnerTimeType;
 37 | 
 38 | /**
 39 |  * @brief Address type;
 40 |  */
 41 | typedef std::vector<long> AddrType;
 42 | 
 43 | #define DIM_X(addr) (addr[0])
 44 | #define DIM_Y(addr) (addr[1])
 45 | #define UNSPECIFIED_ADDR(addr) ((addr[0]) < 0 && (addr[1]) < 0)
 46 | 
 47 | /**
 48 |  * @brief Type of synchronization command between simulators.
 49 |  */
 50 | enum SyncCommType {
 51 |     SC_CYCLE,
 52 |     SC_SEND,
 53 |     SC_RECEIVE,
 54 |     SC_BARRIER,
 55 |     SC_LOCK,
 56 |     SC_UNLOCK,
 57 |     SC_LAUNCH,
 58 |     SC_WAITLAUNCH,
 59 |     SC_READ,
 60 |     SC_WRITE,
 61 |     SC_SYNC,
 62 |     SC_RESULT,
 63 | };
 64 | 
 65 | /**
 66 |  * @brief Behavior descriptor of synchronization protocol.
 67 |  */
 68 | enum SyncProtocolDesc {
 69 |     /**
 70 |      * @brief Acknowledge. bit 0.
 71 |      */
 72 |     SPD_ACK = 0x01,
 73 |     /**
 74 |      * @brief Synchronization before data transmission. bit 1.
 75 |      */
 76 |     SPD_PRE_SYNC = 0x02,
 77 |     /**
 78 |      * @brief Synchronization after data transmission. bit 2.
 79 |      */
 80 |     SPD_POST_SYNC = 0x04,
 81 |     /**
 82 |      * @brief Launch behavior. bit 16.
 83 |      */
 84 |     SPD_LAUNCH = 0x10000,
 85 |     /**
 86 |      * @brief Barrier behavior. bit 17.
 87 |      */
 88 |     SPD_BARRIER = 0x20000,
 89 |     /**
 90 |      * @brief Lock behavior. bit 18.
 91 |      */
 92 |     SPD_LOCK = 0x40000,
 93 |     /**
 94 |      * @brief Lock behavior. bit 19.
 95 |      */
 96 |     SPD_UNLOCK = 0x80000,
 97 | };
 98 | 
 99 | /**
100 |  * @brief Structure of synchronization command.
101 |  */
102 | class SyncCommand {
103 |    public:
104 |     /**
105 |      * @brief Type of synchronization command.
106 |      */
107 |     SyncCommType m_type;
108 |     /**
109 |      * @brief Cycle to send/receive command.
110 |      */
111 |     InnerTimeType m_cycle;
112 |     /**
113 |      * @brief Cycle convert rate.
114 |      */
115 |     double m_clock_rate;
116 |     /**
117 |      * @brief Source address.
118 |      */
119 |     AddrType m_src;
120 |     /**
121 |      * @brief Destiantion address in X-axis.
122 |      */
123 |     AddrType m_dst;
124 |     /**
125 |      * @brief Number of bytes to write.
126 |      */
127 |     int m_nbytes;
128 |     /**
129 |      * @brief Descriptor of synchronization behavior.
130 |      */
131 |     long m_desc;
132 | 
133 |     /**
134 |      * @brief List of result strings.
135 |      */
136 |     std::vector<std::string> m_res_list;
137 | 
138 |     /**
139 |      * @brief File descriptor to write response of this command.
140 |      *
141 |      * For example, if one entity presents READ command, the SYNC command to response this READ
142 |      * command should to send to this file descriptor.
143 |      */
144 |     int m_stdin_fd;
145 | };
146 | }  // namespace InterChiplet
147 | 


--------------------------------------------------------------------------------
/interchiplet/includes/net_bench.h:
--------------------------------------------------------------------------------
  1 | #pragma once
  2 | 
  3 | #include <fstream>
  4 | #include <map>
  5 | 
  6 | #include "global_define.h"
  7 | 
  8 | #define PAC_PAYLOAD_BIT 512
  9 | #define PAC_PAYLOAD_BYTE (PAC_PAYLOAD_BIT / 8)
 10 | 
 11 | /**
 12 |  * @defgroup net_bench
 13 |  * @brief Network benchmark interface.
 14 |  * @{
 15 |  */
 16 | /**
 17 |  * @brief Structure of one package in network.
 18 |  */
 19 | class NetworkBenchItem {
 20 |    public:
 21 |     /**
 22 |      * @brief Package injection cycle from the source side.
 23 |      */
 24 |     InterChiplet::InnerTimeType m_src_cycle;
 25 |     /**
 26 |      * @brief Package injection cycle from the destination side.
 27 |      */
 28 |     InterChiplet::InnerTimeType m_dst_cycle;
 29 |     /**
 30 |      * @brief Packate id. (Not used yet.)
 31 |      */
 32 |     uint64_t m_id;
 33 |     /**
 34 |      * @brief Source address.
 35 |      */
 36 |     InterChiplet::AddrType m_src;
 37 |     /**
 38 |      * @brief Destination address.
 39 |      */
 40 |     InterChiplet::AddrType m_dst;
 41 |     /**
 42 |      * @brief Size of package in bytes.
 43 |      */
 44 |     int m_pac_size;
 45 |     /**
 46 |      * @brief Synchronization protocol descriptor.
 47 |      */
 48 |     long m_desc;
 49 | 
 50 |    public:
 51 |     /**
 52 |      * @brief Construct Empty NetworkBenchItem.
 53 |      */
 54 |     NetworkBenchItem() {}
 55 | 
 56 |     /**
 57 |      * @brief Construct NetworkBenchItem from SyncCommand.
 58 |      * @param __src_cmd Structure of source command.
 59 |      * @param __dst_cmd Structure of destination command.
 60 |      */
 61 |     NetworkBenchItem(const InterChiplet::SyncCommand& __src_cmd,
 62 |                      const InterChiplet::SyncCommand& __dst_cmd)
 63 |         : m_src_cycle(__src_cmd.m_cycle),
 64 |           m_dst_cycle(__dst_cmd.m_cycle),
 65 |           m_dst(__src_cmd.m_dst),
 66 |           m_src(__src_cmd.m_src),
 67 |           m_pac_size(1),
 68 |           m_desc(__src_cmd.m_desc | __dst_cmd.m_desc) {
 69 |         // Calculate the number of flit.
 70 |         // One head flit is required any way.
 71 |         m_pac_size = __src_cmd.m_nbytes / PAC_PAYLOAD_BYTE +
 72 |                      ((__src_cmd.m_nbytes % PAC_PAYLOAD_BYTE) > 0 ? 1 : 0) + 1;
 73 |     }
 74 | 
 75 |     /**
 76 |      * @brief Construct NetworkBenchItem from SyncCommand.
 77 |      * @param __src_cmd Structure of source command.
 78 |      */
 79 |     NetworkBenchItem(const InterChiplet::SyncCommand& __src_cmd)
 80 |         : m_src_cycle(__src_cmd.m_cycle),
 81 |           m_dst_cycle(__src_cmd.m_cycle),
 82 |           m_dst(__src_cmd.m_dst),
 83 |           m_src(__src_cmd.m_src),
 84 |           m_pac_size(1),
 85 |           m_desc(__src_cmd.m_desc) {
 86 |         // Calculate the number of flit.
 87 |         // One head flit is required any way.
 88 |         m_pac_size = __src_cmd.m_nbytes / PAC_PAYLOAD_BYTE +
 89 |                      ((__src_cmd.m_nbytes % PAC_PAYLOAD_BYTE) > 0 ? 1 : 0) + 1;
 90 |     }
 91 | 
 92 |     /**
 93 |      * @brief Overloading operator <<.
 94 |      *
 95 |      * Write NetworkBenchItem to output stream.
 96 |      */
 97 |     friend std::ostream& operator<<(std::ostream& os, const NetworkBenchItem& __item) {
 98 |         os << __item.m_src_cycle << " " << __item.m_dst_cycle << " " << DIM_X(__item.m_src) << " "
 99 |            << DIM_Y(__item.m_src) << " " << DIM_X(__item.m_dst) << " " << DIM_Y(__item.m_dst) << " "
100 |            << __item.m_pac_size << " " << __item.m_desc;
101 |         return os;
102 |     }
103 | 
104 |     /**
105 |      * @brief Overloading operator >>.
106 |      *
107 |      * Read NetworkBenchItem from input stream.
108 |      */
109 |     friend std::istream& operator>>(std::istream& os, NetworkBenchItem& __item) {
110 |         os >> __item.m_src_cycle >> __item.m_dst_cycle;
111 |         long src_x, src_y, dst_x, dst_y;
112 |         os >> src_x >> src_y >> dst_x >> dst_y;
113 |         __item.m_src.push_back(src_x);
114 |         __item.m_src.push_back(src_y);
115 |         __item.m_dst.push_back(dst_x);
116 |         __item.m_dst.push_back(dst_y);
117 |         os >> __item.m_pac_size >> __item.m_desc;
118 |         return os;
119 |     }
120 | };
121 | 
122 | /**
123 |  * @brief List of network benchmark item.
124 |  */
125 | class NetworkBenchList : public std::multimap<InterChiplet::InnerTimeType, NetworkBenchItem> {
126 |    public:
127 |     /**
128 |      * @brief Construct NetworkBenchList.
129 |      */
130 |     NetworkBenchList() : std::multimap<InterChiplet::InnerTimeType, NetworkBenchItem>() {}
131 | 
132 |     /**
133 |      * @brief Insert item into list.
134 |      *
135 |      * Take the start cycle on source side as ordering key.
136 |      */
137 |     void insert(const NetworkBenchItem& __item) {
138 |         std::multimap<InterChiplet::InnerTimeType, NetworkBenchItem>::insert(
139 |             std::pair<InterChiplet::InnerTimeType, NetworkBenchItem>(__item.m_src_cycle, __item));
140 |     }
141 | 
142 |     /**
143 |      * @brief Dump benchmark list to specified file.
144 |      * @param __file_name Path to benchmark file.
145 |      * @param __clock_rate Clock ratio (Simulator clock/Interchiplet clock).
146 |      */
147 |     void dumpBench(const std::string& __file_name, double __clock_rate) {
148 |         std::ofstream bench_of(__file_name, std::ios::out);
149 |         for (auto& it : *this) {
150 |             bench_of << static_cast<InterChiplet::TimeType>(it.second.m_src_cycle * __clock_rate)
151 |                      << " "
152 |                      << static_cast<InterChiplet::TimeType>(it.second.m_dst_cycle * __clock_rate)
153 |                      << " " << DIM_X(it.second.m_src) << " " << DIM_Y(it.second.m_src) << " "
154 |                      << DIM_X(it.second.m_dst) << " " << DIM_Y(it.second.m_dst) << " "
155 |                      << it.second.m_pac_size << " " << it.second.m_desc << std::endl;
156 |         }
157 |         bench_of.flush();
158 |         bench_of.close();
159 |     }
160 | };
161 | /**
162 |  * @}
163 |  */
164 | 


--------------------------------------------------------------------------------
/interchiplet/srcs/apis_c.cpp:
--------------------------------------------------------------------------------
 1 | 
 2 | #include "apis_c.h"
 3 | 
 4 | #include "global_define.h"
 5 | 
 6 | namespace InterChiplet {
 7 | syscall_return_t barrier(int64_t __uid, int64_t __src_x, int64_t __src_y, int64_t __count) {
 8 |     int ret_code = syscall(SYSCALL_BARRIER, __uid, __src_x, __src_y, __count);
 9 |     return ret_code;
10 | }
11 | 
12 | syscall_return_t lock(int64_t __uid, int64_t __src_x, int64_t __src_y) {
13 |     int ret_code = syscall(SYSCALL_LOCK, __uid, __src_x, __src_y);
14 |     return ret_code;
15 | }
16 | 
17 | syscall_return_t unlock(int64_t __uid, int64_t __src_x, int64_t __src_y) {
18 |     int ret_code = syscall(SYSCALL_UNLOCK, __uid, __src_x, __src_y);
19 |     return ret_code;
20 | }
21 | 
22 | syscall_return_t launch(int64_t __dst_x, int64_t __dst_y, int64_t __src_x,
23 |                                 int64_t __src_y) {
24 |     int ret_code = syscall(SYSCALL_LAUNCH, __dst_x, __dst_y, __src_x, __src_y);
25 |     return ret_code;
26 | }
27 | 
28 | syscall_return_t waitLaunch(int64_t __dst_x, int64_t __dst_y, int64_t* __src_x,
29 |                               int64_t* __src_y) {
30 |     int ret_code = syscall(SYSCALL_WAITLAUNCH, __dst_x, __dst_y, __src_x, __src_y);
31 |     return ret_code;
32 | }
33 | 
34 | syscall_return_t sendMessage(int64_t __dst_x, int64_t __dst_y, int64_t __src_x, int64_t __src_y,
35 |                              void* __addr, int64_t __nbyte) {
36 |     int ret_code =
37 |         syscall(SYSCALL_REMOTE_WRITE, __dst_x, __dst_y, __src_x, __src_y, __addr, __nbyte);
38 |     return ret_code;
39 | }
40 | syscall_return_t receiveMessage(int64_t __dst_x, int64_t __dst_y, int64_t __src_x, int64_t __src_y,
41 |                                 void* __addr, int64_t __nbyte) {
42 |     int ret_code =
43 |         syscall(SYSCALL_REMOTE_READ, __dst_x, __dst_y, __src_x, __src_y, __addr, __nbyte);
44 |     return ret_code;
45 | }
46 | }  // namespace InterChiplet
47 | 


--------------------------------------------------------------------------------
/patch.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | if [[ -n "${SIMULATOR_ROOT}" ]]; then
 4 |     echo "SIMULATOR_ROOT is: ${SIMULATOR_ROOT}"
 5 | else
 6 |     echo "The environment variable SIMULATOR_ROOT is not defined."
 7 |     exit
 8 | fi
 9 | 
10 | cd ${SIMULATOR_ROOT}
11 | rm -rf .changed_files
12 | mkdir .changed_files
13 | 
14 | # Pathc for Sniper
15 | cd ${SIMULATOR_ROOT}/snipersim
16 | git diff > ../interchiplet/patch/snipersim.diff
17 | snipersim_changed_file_list="$(git diff --name-only)"
18 | 
19 | cd ${SIMULATOR_ROOT}
20 | file_list=($snipersim_changed_file_list)
21 | echo ${#file_list[@]} "Files has changed."
22 | for item in "${file_list[@]}"; do
23 |     echo $item
24 |     cp --parent snipersim/$item .changed_files/
25 | done
26 | 
27 | # Patch for GPGPUSim
28 | cd ${SIMULATOR_ROOT}/gpgpu-sim
29 | git diff > ../interchiplet/patch/gpgpu-sim.diff
30 | gpgpu_sim_changed_file_list="$(git diff --name-only)"
31 | 
32 | cd ${SIMULATOR_ROOT}
33 | file_list=($gpgpu_sim_changed_file_list)
34 | echo ${#file_list[@]} "Files has changed."
35 | for item in "${file_list[@]}"; do
36 |     echo $item
37 |     cp --parent gpgpu-sim/$item .changed_files/
38 | done
39 | 
40 | # Patch for GEM5
41 | cd ${SIMULATOR_ROOT}/gem5
42 | git diff > ../interchiplet/patch/gem5.diff
43 | gem5_changed_file_list="$(git diff --name-only)"
44 | 
45 | cd ${SIMULATOR_ROOT}
46 | file_list=($gem5_changed_file_list)
47 | echo ${#file_list[@]} "Files has changed."
48 | for item in "${file_list[@]}"; do
49 |     echo $item
50 |     cp --parent gem5/$item .changed_files/
51 | done
52 | 


--------------------------------------------------------------------------------
/setup_env.sh:
--------------------------------------------------------------------------------
1 | export PATH=$PATH:/usr/local/cuda/bin
2 | #export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/cuda/lib:/usr/local/cuda/lib64
3 | export CUDA_INSTALL_PATH=/usr/local/cuda
4 | export SIMULATOR_ROOT="$(pwd)"
5 | 
6 | source gpgpu-sim/setup_environment
7 | 


--------------------------------------------------------------------------------
/test/test_benchmark_config.cpp:
--------------------------------------------------------------------------------
 1 | #include "C:\Users\win10\Desktop\Code\scheme3\includes\benchmark_yaml.h"
 2 | #include <gtest/gtest.h>
 3 | #include <fstream>
 4 | 
 5 | TEST(BenchmarkConfigTest, TestParseConfig) {
 6 |     std::ofstream file("test_config.yaml");
 7 |     file << "cmd: sim_command\n"
 8 |          << "args: [arg1, arg2]\n"
 9 |          << "log: log.txt\n"
10 |          << "is_to_stdout: true\n"
11 |          << "clock_rate: 1.0\n"
12 |          << "pre_copy: pre_copy\n"
13 |          << "chip_frequency: 1000";
14 |     file.close();
15 | 
16 |     BenchmarkConfig config("test_config.yaml");
17 |     EXPECT_EQ(config.m_proc_cfg_list.size(), 1);
18 |     EXPECT_EQ(config.m_proc_cfg_list[0].m_command, "sim_command");
19 |     EXPECT_EQ(config.m_proc_cfg_list[0].m_args.size(), 2);
20 |     EXPECT_EQ(config.m_proc_cfg_list[0].m_log_file, "log.txt");
21 |     EXPECT_EQ(config.m_proc_cfg_list[0].m_to_stdout, true);
22 |     EXPECT_EQ(config.m_proc_cfg_list[0].m_clock_rate, 1.0);
23 |     EXPECT_EQ(config.m_proc_cfg_list[0].m_pre_copy, "pre_copy");
24 |     EXPECT_EQ(config.m_proc_cfg_list[0].m_chip_frequency, 1000);
25 | }


--------------------------------------------------------------------------------
/test/test_cmd_handler.cpp:
--------------------------------------------------------------------------------
 1 | #include "cmd_handler.h"
 2 | #include "global_manager.h"
 3 | #include <gtest/gtest.h>
 4 | 
 5 | class CmdHandlerTest : public ::testing::Test {
 6 | protected:
 7 |     GlobalManager gm;
 8 | 
 9 |     void SetUp() override {
10 |         gm = GlobalManager();
11 |     }
12 | };
13 | 
14 | TEST_F(CmdHandlerTest, TestHandleSendCmd) {
15 |     InterChiplet::SyncCommand cmd;
16 |     cmd.m_type = InterChiplet::SC_SEND;
17 |     cmd.m_src = {"Sim1"};
18 |     cmd.m_dst = {"Sim2"};
19 |     cmd.m_cycle = 10.0;
20 |     cmd.m_clock_rate = 1.0;
21 |     cmd.m_nbytes = 100;
22 |     handle_send_cmd(cmd, &gm);
23 |     EXPECT_EQ(gm.requestList.size(), 1);
24 | }
25 | 
26 | TEST_F(CmdHandlerTest, TestHandleReceiveCmd) {
27 |     InterChiplet::SyncCommand cmd;
28 |     cmd.m_type = InterChiplet::SC_RECEIVE;
29 |     cmd.m_src = {"Sim2"};
30 |     cmd.m_dst = {"Sim1"};
31 |     cmd.m_cycle = 10.0;
32 |     cmd.m_clock_rate = 1.0;
33 |     cmd.m_nbytes = 100;
34 |     handle_receive_cmd(cmd, &gm);
35 |     EXPECT_EQ(gm.requestList.size(), 1);
36 | }


--------------------------------------------------------------------------------
/test/test_cmdline_options:
--------------------------------------------------------------------------------
 1 | #include "cmdline_options.h"
 2 | #include <gtest/gtest.h>
 3 | 
 4 | TEST(CmdLineOptionsTest, TestParseOptions) {
 5 |     const char* argv[] = {"program", "--bench", "bench.yaml", "--cwd", "/tmp", "--timeout", "10", "--error", "0.01", "--debug"};
 6 |     int argc = sizeof(argv) / sizeof(argv[0]);
 7 | 
 8 |     CmdLineOptions options;
 9 |     options.parse(argc, argv);
10 | 
11 |     EXPECT_EQ(options.m_bench, "bench.yaml");
12 |     EXPECT_EQ(options.m_cwd, "/tmp");
13 |     EXPECT_EQ(options.m_timeout_threshold, 10);
14 |     EXPECT_EQ(options.m_err_rate_threshold, 0.01);
15 |     EXPECT_EQ(options.m_debug, true);
16 | }
17 | 


--------------------------------------------------------------------------------
/test/test_global_manager.cpp:
--------------------------------------------------------------------------------
 1 | #include "C:\Users\win10\Desktop\Code\scheme3\includes\global_manager.h"
 2 | #include <gtest/gtest.h>
 3 | 
 4 | class GlobalManagerTest : public ::testing::Test {
 5 | protected:
 6 |     GlobalManager gm;
 7 | 
 8 |     void SetUp() override {
 9 |         gm = GlobalManager();
10 |     }
11 | };
12 | 
13 | TEST_F(GlobalManagerTest, TestAddRequest) {
14 |     gm.AddRequest("Sim1", "Sim2", "Data", 10.0, 1.0, Behavior::SEND);
15 |     gm.AddRequest("Sim2", "Sim1", "Data", 10.0, 1.0, Behavior::RECEIVE);
16 |     EXPECT_EQ(gm.requestList.size(), 2);
17 | }
18 | 
19 | TEST_F(GlobalManagerTest, TestCheckPair) {
20 |     gm.AddRequest("Sim1", "Sim2", "Data", 10.0, 1.0, Behavior::SEND);
21 |     gm.AddRequest("Sim2", "Sim1", "Data", 10.0, 1.0, Behavior::RECEIVE);
22 |     EXPECT_TRUE(gm.CheckPair());
23 | }
24 | 
25 | TEST_F(GlobalManagerTest, TestUpdateWaterline) {
26 |     gm.processClocks["Sim1"] = 5.0;
27 |     gm.processClocks["Sim2"] = 10.0;
28 |     gm.updateWaterline();
29 |     EXPECT_EQ(gm.waterLine, 5.0);
30 | }
31 | 
32 | TEST_F(GlobalManagerTest, TestCalculateNetworkDelay) {
33 |     gm.processClocks["Sim1"] = 0.0;
34 |     gm.processClocks["Sim2"] = 0.0;
35 |     gm.calculateNetworkDelay("Sim1", "Sim2", 100);
36 |     EXPECT_GT(gm.processClocks["Sim1"], 0.0);
37 |     EXPECT_GT(gm.processClocks["Sim2"], 0.0);
38 | }


--------------------------------------------------------------------------------
/test/test_network_lists.cpp:
--------------------------------------------------------------------------------
 1 | #include "net_bench.h"
 2 | #include "net_delay.h"
 3 | #include <gtest/gtest.h>
 4 | 
 5 | TEST(NetworkBenchListTest, TestInsertAndDump) {
 6 |     InterChiplet::NetworkBenchList bench_list;
 7 |     InterChiplet::NetworkBenchItem item(10.0, {1, 2}, {3, 4}, 100, 1);
 8 |     bench_list.insert(item);
 9 |     bench_list.dumpBench("bench.txt", 1.0);
10 | 
11 |     std::ifstream file("bench.txt");
12 |     std::string content((std::istreambuf_iterator<char>(file)), std::istreambuf_iterator<char>());
13 |     EXPECT_NE(content.find("10 10 1 3 100 1"), std::string::npos);
14 | }
15 | 
16 | TEST(NetworkDelayListTest, TestInsertAndDump) {
17 |     InterChiplet::NetworkDelayList delay_list;
18 |     InterChiplet::NetworkDelayItem item(10.0, {1, 2}, {3, 4}, 1, {1.0, 2.0});
19 |     delay_list.insert(10.0, item);
20 |     delay_list.dumpDelay("delay.txt");
21 | 
22 |     std::ifstream file("delay.txt");
23 |     std::string content((std::istreambuf_iterator<char>(file)), std::istreambuf_iterator<char>());
24 |     EXPECT_NE(content.find("10 1 3 1 2 1.0 2.0"), std::string::npos);
25 | }


--------------------------------------------------------------------------------
/test/test_pipe_comm.cpp:
--------------------------------------------------------------------------------
 1 | #include "pipe_comm.h"
 2 | #include <gtest/gtest.h>
 3 | #include <unistd.h>
 4 | 
 5 | TEST(PipeCommTest, TestPipeCommunication) {
 6 |     const char* pipe_name = "/tmp/test_pipe";
 7 |     InterChiplet::PipeComm pipe_comm;
 8 | 
 9 |     // Write data to pipe
10 |     std::string data = "Hello, World!";
11 |     pipe_comm.write_data(pipe_name, data.c_str(), data.size());
12 | 
13 |     // Read data from pipe
14 |     char buffer[1024];
15 |     int bytes_read = pipe_comm.read_data(pipe_name, buffer, sizeof(buffer) - 1);
16 |     buffer[bytes_read] = '\0';
17 |     EXPECT_EQ(std::string(buffer), data);
18 | }


--------------------------------------------------------------------------------
/test/test_process_struct.cpp:
--------------------------------------------------------------------------------
 1 | #include "interchiplets.cpp"
 2 | #include <gtest/gtest.h>
 3 | 
 4 | TEST(ProcessStructTest, TestProcessStruct) {
 5 |     ProcessConfig config("sim_command", {"arg1", "arg2"}, "log.txt", true, 1.0, "pre_copy", 1000);
 6 |     ProcessStruct proc_struct(config);
 7 |     EXPECT_EQ(proc_struct.m_command, "sim_command");
 8 |     EXPECT_EQ(proc_struct.m_args.size(), 2);
 9 |     EXPECT_EQ(proc_struct.m_log_file, "log.txt");
10 |     EXPECT_EQ(proc_struct.m_to_stdout, true);
11 |     EXPECT_EQ(proc_struct.m_clock_rate, 1.0);
12 |     EXPECT_EQ(proc_struct.m_pre_copy, "pre_copy");
13 |     EXPECT_EQ(proc_struct.m_chip_frequency, 1000);
14 | }


--------------------------------------------------------------------------------
/test_benchmark_config.cpp:
--------------------------------------------------------------------------------
 1 | #include "C:\Users\win10\Desktop\Code\scheme3\includes\benchmark_yaml.h"
 2 | #include <gtest/gtest.h>
 3 | #include <fstream>
 4 | 
 5 | TEST(BenchmarkConfigTest, TestParseConfig) {
 6 |     std::ofstream file("test_config.yaml");
 7 |     file << "cmd: sim_command\n"
 8 |          << "args: [arg1, arg2]\n"
 9 |          << "log: log.txt\n"
10 |          << "is_to_stdout: true\n"
11 |          << "clock_rate: 1.0\n"
12 |          << "pre_copy: pre_copy\n"
13 |          << "chip_frequency: 1000";
14 |     file.close();
15 | 
16 |     BenchmarkConfig config("test_config.yaml");
17 |     EXPECT_EQ(config.m_proc_cfg_list.size(), 1);
18 |     EXPECT_EQ(config.m_proc_cfg_list[0].m_command, "sim_command");
19 |     EXPECT_EQ(config.m_proc_cfg_list[0].m_args.size(), 2);
20 |     EXPECT_EQ(config.m_proc_cfg_list[0].m_log_file, "log.txt");
21 |     EXPECT_EQ(config.m_proc_cfg_list[0].m_to_stdout, true);
22 |     EXPECT_EQ(config.m_proc_cfg_list[0].m_clock_rate, 1.0);
23 |     EXPECT_EQ(config.m_proc_cfg_list[0].m_pre_copy, "pre_copy");
24 |     EXPECT_EQ(config.m_proc_cfg_list[0].m_chip_frequency, 1000);
25 | }


--------------------------------------------------------------------------------
/test_cmd_handler.cpp:
--------------------------------------------------------------------------------
 1 | #include "cmd_handler.h"
 2 | #include "global_manager.h"
 3 | #include <gtest/gtest.h>
 4 | 
 5 | class CmdHandlerTest : public ::testing::Test {
 6 | protected:
 7 |     GlobalManager gm;
 8 | 
 9 |     void SetUp() override {
10 |         gm = GlobalManager();
11 |     }
12 | };
13 | 
14 | TEST_F(CmdHandlerTest, TestHandleSendCmd) {
15 |     InterChiplet::SyncCommand cmd;
16 |     cmd.m_type = InterChiplet::SC_SEND;
17 |     cmd.m_src = {"Sim1"};
18 |     cmd.m_dst = {"Sim2"};
19 |     cmd.m_cycle = 10.0;
20 |     cmd.m_clock_rate = 1.0;
21 |     cmd.m_nbytes = 100;
22 |     handle_send_cmd(cmd, &gm);
23 |     EXPECT_EQ(gm.requestList.size(), 1);
24 | }
25 | 
26 | TEST_F(CmdHandlerTest, TestHandleReceiveCmd) {
27 |     InterChiplet::SyncCommand cmd;
28 |     cmd.m_type = InterChiplet::SC_RECEIVE;
29 |     cmd.m_src = {"Sim2"};
30 |     cmd.m_dst = {"Sim1"};
31 |     cmd.m_cycle = 10.0;
32 |     cmd.m_clock_rate = 1.0;
33 |     cmd.m_nbytes = 100;
34 |     handle_receive_cmd(cmd, &gm);
35 |     EXPECT_EQ(gm.requestList.size(), 1);
36 | }


--------------------------------------------------------------------------------