├── omega-code ├── stonne │ ├── include │ │ ├── ms_size │ │ ├── current_tile, │ │ ├── testbench.h │ │ ├── LookupTable.h │ │ ├── define.h │ │ ├── Unit.h │ │ ├── CompilerComponent.h │ │ ├── Fifo.h │ │ ├── Component.h │ │ ├── DistributionNetwork.h │ │ ├── CollectionBus.h │ │ ├── Connection.h │ │ ├── DSNetworkTop.h │ │ ├── AccumulationBuffer.h │ │ ├── DNNModel.h │ │ ├── CompilerMultiplierMesh.h │ │ ├── types.h │ │ ├── ReduceNetwork.h │ │ ├── CollectionBusLine.h │ │ ├── MultiplierNetwork.h │ │ ├── DSNetwork.h │ │ ├── TemporalRN.h │ │ ├── MemoryController.h │ │ ├── DNNLayer.h │ │ ├── CompilerFEN.h │ │ ├── CompilerMSN.h │ │ ├── OSMeshMN.h │ │ ├── CompilerART.h │ │ ├── Tile.h │ │ ├── utility.h │ │ ├── DSwitch.h │ │ ├── MSNetwork.h │ │ ├── MultiplierOS.h │ │ ├── FENetwork.h │ │ ├── Accumulator.h │ │ ├── ASNetwork.h │ │ ├── DataPackage.h │ │ ├── OSMeshSDMemory.h │ │ ├── MSwitch.h │ │ ├── STONNEModel.h │ │ ├── SparseSDMemory.h │ │ └── SDMemory.h │ ├── src │ │ ├── other_main │ │ │ ├── interleaving.cpp │ │ │ ├── main_testbench.cpp │ │ │ ├── main_read_tests.cpp │ │ │ ├── main_read_arch_file.cpp │ │ │ └── main_gemm_test.cpp │ │ ├── LookupTable.cpp │ │ ├── Fifo.cpp │ │ ├── DNNLayer.cpp │ │ ├── CompilerMultiplierMesh.cpp │ │ ├── Connection.cpp │ │ ├── DataPackage.cpp │ │ ├── CollectionBus.cpp │ │ ├── TemporalRN.cpp │ │ ├── DSNetworkTop.cpp │ │ ├── CollectionBusLine.cpp │ │ ├── AccumulationBuffer.cpp │ │ ├── Tile.cpp │ │ └── CompilerMSN.cpp │ ├── groups.txt │ ├── json_tutorial.txt │ ├── tiles │ │ ├── alexnet │ │ │ ├── tile_configuration_fc6.txt │ │ │ ├── tile_configuration_fc7.txt │ │ │ ├── tile_configuration_fc8.txt │ │ │ ├── tile_configuration_conv2.txt │ │ │ ├── tile_configuration_conv3.txt │ │ │ ├── tile_configuration_conv4.txt │ │ │ ├── tile_configuration_conv5.txt │ │ │ └── tile_configuration_conv1.txt │ │ └── tile_configuration.txt │ ├── objs │ │ ├── Fifo.o │ │ ├── Stats.o │ │ ├── Tile.o │ │ ├── main.o │ │ ├── omega.o │ │ ├── ASwitch.o │ │ ├── Config.o │ │ ├── DSwitch.o │ │ ├── MSwitch.o │ │ ├── utility.o │ │ ├── ASNetwork.o │ │ ├── Connection.o │ │ ├── DNNLayer.o │ │ ├── DNNModel.o │ │ ├── DSNetwork.o │ │ ├── FEASwitch.o │ │ ├── FENetwork.o │ │ ├── MSNetwork.o │ │ ├── OSMeshMN.o │ │ ├── SDMemory.o │ │ ├── TemporalRN.o │ │ ├── testbench.o │ │ ├── Accumulator.o │ │ ├── CompilerART.o │ │ ├── CompilerFEN.o │ │ ├── CompilerMSN.o │ │ ├── DSNetworkTop.o │ │ ├── DataPackage.o │ │ ├── LookupTable.o │ │ ├── MultiplierOS.o │ │ ├── STONNEModel.o │ │ ├── CollectionBus.o │ │ ├── OSMeshSDMemory.o │ │ ├── SparseSDMemory.o │ │ ├── CollectionBusLine.o │ │ ├── AccumulationBuffer.o │ │ ├── SparseDenseSDMemory.o │ │ └── CompilerMultiplierMesh.o │ ├── architectures │ │ ├── arch_test.cfg │ │ └── sigma_64mses_64_bw.cfg │ ├── other │ │ ├── idea.txt │ │ ├── diseño.txt │ │ ├── progreso_bugs │ │ ├── asuntos_mejora.txt │ │ └── problema.txt │ ├── energy_tables │ │ ├── .out │ │ ├── energy_model.txt │ │ └── out │ ├── test.txt │ ├── execution_examples.txt │ ├── test │ ├── stonne_linker_src │ │ └── stonne_linker.h │ ├── outputs │ │ └── nuevos_tests │ │ │ └── README.txt │ └── correccion_bugs.txt ├── example_simulation.sh ├── Makefile └── sample_graphs │ └── vertex_mutag_batch64.txt ├── omega.png ├── OMEGA LOGO.jpg ├── computations.png ├── Docker.md ├── LICENSE └── README.md /omega-code/stonne/include/ms_size: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /omega-code/stonne/include/current_tile,: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /omega-code/stonne/src/other_main/interleaving.cpp: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /omega-code/stonne/groups.txt: -------------------------------------------------------------------------------- 1 | https://blog.yani.io/filter-group-tutorial/ 2 | -------------------------------------------------------------------------------- /omega.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stonne-simulator/omega/HEAD/omega.png -------------------------------------------------------------------------------- /OMEGA LOGO.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stonne-simulator/omega/HEAD/OMEGA LOGO.jpg -------------------------------------------------------------------------------- /omega-code/stonne/json_tutorial.txt: -------------------------------------------------------------------------------- 1 | https://www.w3schools.com/js/js_json_syntax.asp 2 | -------------------------------------------------------------------------------- /computations.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stonne-simulator/omega/HEAD/computations.png -------------------------------------------------------------------------------- /omega-code/stonne/tiles/alexnet/tile_configuration_fc6.txt: -------------------------------------------------------------------------------- 1 | tile_type="FC" 2 | T_S=32 3 | T_K=1 4 | T_N=1 5 | -------------------------------------------------------------------------------- /omega-code/stonne/tiles/alexnet/tile_configuration_fc7.txt: -------------------------------------------------------------------------------- 1 | tile_type="FC" 2 | T_S=32 3 | T_K=1 4 | T_N=1 5 | -------------------------------------------------------------------------------- /omega-code/stonne/tiles/alexnet/tile_configuration_fc8.txt: -------------------------------------------------------------------------------- 1 | tile_type="FC" 2 | T_S=32 3 | T_K=1 4 | T_N=1 5 | -------------------------------------------------------------------------------- /omega-code/stonne/objs/Fifo.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stonne-simulator/omega/HEAD/omega-code/stonne/objs/Fifo.o -------------------------------------------------------------------------------- /omega-code/stonne/objs/Stats.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stonne-simulator/omega/HEAD/omega-code/stonne/objs/Stats.o -------------------------------------------------------------------------------- /omega-code/stonne/objs/Tile.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stonne-simulator/omega/HEAD/omega-code/stonne/objs/Tile.o -------------------------------------------------------------------------------- /omega-code/stonne/objs/main.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stonne-simulator/omega/HEAD/omega-code/stonne/objs/main.o -------------------------------------------------------------------------------- /omega-code/stonne/objs/omega.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stonne-simulator/omega/HEAD/omega-code/stonne/objs/omega.o -------------------------------------------------------------------------------- /omega-code/stonne/objs/ASwitch.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stonne-simulator/omega/HEAD/omega-code/stonne/objs/ASwitch.o -------------------------------------------------------------------------------- /omega-code/stonne/objs/Config.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stonne-simulator/omega/HEAD/omega-code/stonne/objs/Config.o -------------------------------------------------------------------------------- /omega-code/stonne/objs/DSwitch.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stonne-simulator/omega/HEAD/omega-code/stonne/objs/DSwitch.o -------------------------------------------------------------------------------- /omega-code/stonne/objs/MSwitch.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stonne-simulator/omega/HEAD/omega-code/stonne/objs/MSwitch.o -------------------------------------------------------------------------------- /omega-code/stonne/objs/utility.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stonne-simulator/omega/HEAD/omega-code/stonne/objs/utility.o -------------------------------------------------------------------------------- /omega-code/stonne/objs/ASNetwork.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stonne-simulator/omega/HEAD/omega-code/stonne/objs/ASNetwork.o -------------------------------------------------------------------------------- /omega-code/stonne/objs/Connection.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stonne-simulator/omega/HEAD/omega-code/stonne/objs/Connection.o -------------------------------------------------------------------------------- /omega-code/stonne/objs/DNNLayer.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stonne-simulator/omega/HEAD/omega-code/stonne/objs/DNNLayer.o -------------------------------------------------------------------------------- /omega-code/stonne/objs/DNNModel.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stonne-simulator/omega/HEAD/omega-code/stonne/objs/DNNModel.o -------------------------------------------------------------------------------- /omega-code/stonne/objs/DSNetwork.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stonne-simulator/omega/HEAD/omega-code/stonne/objs/DSNetwork.o -------------------------------------------------------------------------------- /omega-code/stonne/objs/FEASwitch.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stonne-simulator/omega/HEAD/omega-code/stonne/objs/FEASwitch.o -------------------------------------------------------------------------------- /omega-code/stonne/objs/FENetwork.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stonne-simulator/omega/HEAD/omega-code/stonne/objs/FENetwork.o -------------------------------------------------------------------------------- /omega-code/stonne/objs/MSNetwork.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stonne-simulator/omega/HEAD/omega-code/stonne/objs/MSNetwork.o -------------------------------------------------------------------------------- /omega-code/stonne/objs/OSMeshMN.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stonne-simulator/omega/HEAD/omega-code/stonne/objs/OSMeshMN.o -------------------------------------------------------------------------------- /omega-code/stonne/objs/SDMemory.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stonne-simulator/omega/HEAD/omega-code/stonne/objs/SDMemory.o -------------------------------------------------------------------------------- /omega-code/stonne/objs/TemporalRN.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stonne-simulator/omega/HEAD/omega-code/stonne/objs/TemporalRN.o -------------------------------------------------------------------------------- /omega-code/stonne/objs/testbench.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stonne-simulator/omega/HEAD/omega-code/stonne/objs/testbench.o -------------------------------------------------------------------------------- /omega-code/stonne/tiles/tile_configuration.txt: -------------------------------------------------------------------------------- 1 | T_R=3 2 | T_S=3 3 | T_C=1 4 | T_G=1 5 | T_K=2 6 | T_N=1 7 | T_X'=1 8 | T_Y'=1 9 | -------------------------------------------------------------------------------- /omega-code/stonne/objs/Accumulator.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stonne-simulator/omega/HEAD/omega-code/stonne/objs/Accumulator.o -------------------------------------------------------------------------------- /omega-code/stonne/objs/CompilerART.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stonne-simulator/omega/HEAD/omega-code/stonne/objs/CompilerART.o -------------------------------------------------------------------------------- /omega-code/stonne/objs/CompilerFEN.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stonne-simulator/omega/HEAD/omega-code/stonne/objs/CompilerFEN.o -------------------------------------------------------------------------------- /omega-code/stonne/objs/CompilerMSN.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stonne-simulator/omega/HEAD/omega-code/stonne/objs/CompilerMSN.o -------------------------------------------------------------------------------- /omega-code/stonne/objs/DSNetworkTop.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stonne-simulator/omega/HEAD/omega-code/stonne/objs/DSNetworkTop.o -------------------------------------------------------------------------------- /omega-code/stonne/objs/DataPackage.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stonne-simulator/omega/HEAD/omega-code/stonne/objs/DataPackage.o -------------------------------------------------------------------------------- /omega-code/stonne/objs/LookupTable.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stonne-simulator/omega/HEAD/omega-code/stonne/objs/LookupTable.o -------------------------------------------------------------------------------- /omega-code/stonne/objs/MultiplierOS.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stonne-simulator/omega/HEAD/omega-code/stonne/objs/MultiplierOS.o -------------------------------------------------------------------------------- /omega-code/stonne/objs/STONNEModel.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stonne-simulator/omega/HEAD/omega-code/stonne/objs/STONNEModel.o -------------------------------------------------------------------------------- /omega-code/stonne/objs/CollectionBus.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stonne-simulator/omega/HEAD/omega-code/stonne/objs/CollectionBus.o -------------------------------------------------------------------------------- /omega-code/stonne/objs/OSMeshSDMemory.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stonne-simulator/omega/HEAD/omega-code/stonne/objs/OSMeshSDMemory.o -------------------------------------------------------------------------------- /omega-code/stonne/objs/SparseSDMemory.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stonne-simulator/omega/HEAD/omega-code/stonne/objs/SparseSDMemory.o -------------------------------------------------------------------------------- /omega-code/stonne/objs/CollectionBusLine.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stonne-simulator/omega/HEAD/omega-code/stonne/objs/CollectionBusLine.o -------------------------------------------------------------------------------- /omega-code/stonne/objs/AccumulationBuffer.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stonne-simulator/omega/HEAD/omega-code/stonne/objs/AccumulationBuffer.o -------------------------------------------------------------------------------- /omega-code/stonne/objs/SparseDenseSDMemory.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stonne-simulator/omega/HEAD/omega-code/stonne/objs/SparseDenseSDMemory.o -------------------------------------------------------------------------------- /Docker.md: -------------------------------------------------------------------------------- 1 | Refer to the following docker image for stable version 2 | 3 | ``` 4 | docker run -it franciscomunoz/stonne_omega_img /bin/bash 5 | ``` 6 | -------------------------------------------------------------------------------- /omega-code/stonne/objs/CompilerMultiplierMesh.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stonne-simulator/omega/HEAD/omega-code/stonne/objs/CompilerMultiplierMesh.o -------------------------------------------------------------------------------- /omega-code/stonne/tiles/alexnet/tile_configuration_conv2.txt: -------------------------------------------------------------------------------- 1 | tile_type="CONV" 2 | T_R=5 3 | T_S=5 4 | T_C=1 5 | T_G=1 6 | T_K=2 7 | T_N=1 8 | T_X'=1 9 | T_Y'=1 10 | -------------------------------------------------------------------------------- /omega-code/stonne/tiles/alexnet/tile_configuration_conv3.txt: -------------------------------------------------------------------------------- 1 | tile_type="CONV" 2 | T_R=3 3 | T_S=3 4 | T_C=1 5 | T_G=1 6 | T_K=4 7 | T_N=1 8 | T_X'=1 9 | T_Y'=1 10 | -------------------------------------------------------------------------------- /omega-code/stonne/tiles/alexnet/tile_configuration_conv4.txt: -------------------------------------------------------------------------------- 1 | tile_type="CONV" 2 | T_R=3 3 | T_S=3 4 | T_C=1 5 | T_G=1 6 | T_K=4 7 | T_N=1 8 | T_X'=1 9 | T_Y'=1 10 | -------------------------------------------------------------------------------- /omega-code/stonne/tiles/alexnet/tile_configuration_conv5.txt: -------------------------------------------------------------------------------- 1 | tile_type="CONV" 2 | T_R=3 3 | T_S=3 4 | T_C=1 5 | T_G=1 6 | T_K=4 7 | T_N=1 8 | T_X'=1 9 | T_Y'=1 10 | -------------------------------------------------------------------------------- /omega-code/stonne/tiles/alexnet/tile_configuration_conv1.txt: -------------------------------------------------------------------------------- 1 | tile_type="CONV" 2 | T_R=11 3 | T_S=1 4 | T_C=1 5 | T_G=1 6 | T_K=2 7 | T_N=1 8 | T_X'=1 9 | T_Y'=1 10 | -------------------------------------------------------------------------------- /omega-code/stonne/architectures/arch_test.cfg: -------------------------------------------------------------------------------- 1 | [MSNetwork] 2 | ms_size=16 3 | [ReduceNetwork] 4 | type="ASNETWORK" 5 | [SDMemory] 6 | dn_bw=8 7 | rn_bw=8 8 | controller_type="MAERI_DENSE_WORKLOAD" 9 | -------------------------------------------------------------------------------- /omega-code/stonne/architectures/sigma_64mses_64_bw.cfg: -------------------------------------------------------------------------------- 1 | [MSNetwork] 2 | ms_size=16 3 | [ReduceNetwork] 4 | type="ASNETWORK" 5 | [SDMemory] 6 | dn_bw=8 7 | rn_bw=8 8 | controller_type="SIGMA_SPARSE_GEMM" 9 | -------------------------------------------------------------------------------- /omega-code/stonne/other/idea.txt: -------------------------------------------------------------------------------- 1 | Añadir a los multiplicadores que hacen el forwarding de la psum la funcionalidad para que el mismo genere un cero. Para ello debemos de pasarle el numero de ciclos que tiene que esperar hasta generar la psum 2 | -------------------------------------------------------------------------------- /omega-code/example_simulation.sh: -------------------------------------------------------------------------------- 1 | ./omega -V=1168 -F=28 -G=2 -E=2590 -T_Va=18 -T_N=1 -T_Fa=28 -T_Vc=18 -T_G=1 -T_Fc=28 -pe_agg=512 -pe_cmb=512 -dn_bw_agg=512 -rn_bw_agg=512 -dn_bw_cmb=512 -rn_bw_cmb=512 -vertex_path="sample_graphs/vertex_mutag_batch64.txt" -edge_path="sample_graphs/edge_mutag_batch64.txt" 2 | -------------------------------------------------------------------------------- /omega-code/stonne/other/diseño.txt: -------------------------------------------------------------------------------- 1 | En MSwitch existen dos funciones: setInputForwardingConnection y setOutputForwardingConnection. Estas dos funciones activan y desactivan los fw links para recibir y enviar informacion a través 2 | de su fw link. Estas funciones deben de ser llamadas por tile.cpp al mismo timepo que se configura la VN en cada MS. 3 | -------------------------------------------------------------------------------- /omega-code/stonne/other/progreso_bugs: -------------------------------------------------------------------------------- 1 | Ahora creo que el algoritmo generacion de senales no puede ser ya que me falla al variar C pero no al variar 2 | el tile y por tanto el arbol. 3 | 4 | Bueno no, esto de arriba es una mierda. En realidad VN vale uno mas para procesar el folding y eso es lo que modifica los arboles de la ART y por tanto probableemente el algoritmo de generacion de señales esta bugeado. 5 | -------------------------------------------------------------------------------- /omega-code/stonne/other/asuntos_mejora.txt: -------------------------------------------------------------------------------- 1 | Si dejamos un MS libre para acumular patial sums cuando hay folding significa que los MS de una VN tendrán que esperar a ese MS auxiliar a recibir la partial sum de la iteracion de folding anterior. Esto quiere decir que los MSwitches tendran que esperar ociosos a que la anterior iteracion se haya calculado, sin poder por tanto solapar el computo del ART con el de los multiplicadores. 2 | -------------------------------------------------------------------------------- /omega-code/stonne/energy_tables/.out: -------------------------------------------------------------------------------- 1 | DYNAMIC ENERGY TABLE USED 2 | ------------------------------------------- 3 | WIRE WRITE 2 4 | WIRE READ 3 5 | FIFO PUSH 4 6 | FIFO POP 6 7 | FIFO FRONT 5 8 | ADDER ADD_2_1 7 9 | ADDER ADD_3_1 8 10 | SWITCH ROUTE_UNICAST 0 11 | SWITCH ROUTE_BROADCAST 5 12 | MULTIPLIER MULTIPLICATION 3 13 | MULTIPLIER FORWARD_PSUM 4 14 | GLOBALBUFFER READ 5 15 | GLOBALBUFFER WRITE 5 16 | ------------------------------------------- 17 | -------------------------------------------------------------------------------- /omega-code/stonne/test.txt: -------------------------------------------------------------------------------- 1 | ./stonne -R=3 -S=3 -C=4 -K=8 -N=1 -G=1 -X=9 -Y=9 -T_R=1 -T_S=1 -T_C=4 -T_K=1 -T_N=1 -T_G=1 -T_X_=1 -T_Y_=1 -num_ms=128 -dn_bw=128 -rn_bw=128 2 | 3 | ./stonne -R=3 -S=3 -C=4 -K=8 -N=1 -G=1 -X=9 -Y=9 -T_R=1 -T_S=1 -T_C=4 -T_K=1 -T_N=1 -T_G=1 -T_X_=1 -T_Y_=1 -num_ms=128 -dn_bw=128 -rn_bw=128 -rn_type="FENETWORK" 4 | 5 | ./stonne -R=1 -S=4 -C=1 -K=2 -G=1 -N=4 -X=1 -Y=4 -T_R=1 -T_S=4 -T_C=1 -T_K=1 -T_G=1 -T_N=1 -T_X_=1 -T_Y_=1 -num_ms=16 -dn_bw=16 -rn_bw=16 6 | -------------------------------------------------------------------------------- /omega-code/stonne/execution_examples.txt: -------------------------------------------------------------------------------- 1 | ./stonne -R=3 -S=3 -C=4 -K=8 -N=1 -G=1 -X=9 -Y=9 -T_R=1 -T_S=1 -T_C=4 -T_K=1 -T_N=1 -T_G=1 -T_X_=1 -T_Y_=1 -num_ms=128 -dn_bw=128 -rn_bw=128 2 | 3 | ./stonne -R=3 -S=3 -C=128 -K=8 -N=1 -G=1 -X=9 -Y=9 -T_R=1 -T_S=1 -T_C=64 -T_K=2 -T_N=1 -T_G=1 -T_X_=1 -T_Y_=1 -num_ms=128 -dn_bw=128 -rn_bw=128 4 | 5 | 6 | Alexnet test not working: ./stonne -R=3 -S=3 -C=256 -K=384 -G=1 -N=1 -X=14 -Y=14 -T_R=3 -T_S=3 -T_C=1 -T_K=6 -T_G=1 -T_N=1 -T_X_=1 -T_Y_=1 -num_ms=64 -dn_bw=64 -rn_bw=64 7 | 8 | -------------------------------------------------------------------------------- /omega-code/stonne/include/testbench.h: -------------------------------------------------------------------------------- 1 | #ifndef _TESTBENCH_H 2 | #define _TESTBENCH_H 3 | void sequential_layer(unsigned int R, unsigned int S, unsigned int C, unsigned int K, unsigned int G, unsigned int N, unsigned int X, unsigned int Y, unsigned int strides, 4 | float* input, float* filters, float * outputs); 5 | 6 | void cpu_gemm(float* MK_dense_matrix, float* KN_dense_matrix, float* output, unsigned int M, unsigned int N, unsigned int K); 7 | 8 | 9 | void run_simple_tests(); 10 | 11 | void run_stonne_architecture_tests(layerTest layer, unsigned int num_ms); 12 | 13 | void hand_tests(); 14 | 15 | #endif 16 | -------------------------------------------------------------------------------- /omega-code/stonne/src/other_main/main_testbench.cpp: -------------------------------------------------------------------------------- 1 | //Created by Francisco Munoz-Martinez on 17/06/2019 2 | 3 | #include 4 | #include "MAERIModel.h" 5 | #include "types.h" 6 | #include 7 | #include 8 | #include "testbench.h" 9 | 10 | using namespace std; 11 | int main(int argc, char** argv) { 12 | // hand_tests(); 13 | // run_simple_tests(); 14 | unsigned int num_ms=32; 15 | run_maeri_architecture_tests(LATE_SYNTHETIC, 32); 16 | } 17 | 18 | -------------------------------------------------------------------------------- /omega-code/stonne/include/LookupTable.h: -------------------------------------------------------------------------------- 1 | //Created by Francisco Munoz Martinez on 25/06/2019 2 | 3 | #ifndef __lookuptable__h 4 | #define __lookuptable__h 5 | 6 | #include "Connection.h" 7 | #include "Unit.h" 8 | #include "Config.h" 9 | #include 10 | 11 | class LookupTable : Unit { 12 | private: 13 | Connection* inputConnection; //From the ART 14 | Connection* outputConnection; //Torwards the memory 15 | cycles_t latency; 16 | unsigned int port_width; 17 | public: 18 | LookupTable(id_t id, std::string name, Config stonne_cfg, Connection* inputConnection, Connection* outputConnection); 19 | void cycle(); 20 | }; 21 | 22 | 23 | #endif 24 | -------------------------------------------------------------------------------- /omega-code/stonne/energy_tables/energy_model.txt: -------------------------------------------------------------------------------- 1 | RN_WIRE AREA=9.46 STATIC=0 WRITE=0.001839831 READ=0 2 | DN_WIRE AREA=11.48 STATIC=0 WRITE=0.00223226 READ=0 3 | MN_WIRE AREA=9.46 STATIC=0 WRITE=0.001839831 READ=0 4 | CB_WIRE AREA=9.46 STATIC=0 WRITE=0.001839831 READ=0 5 | FIFO AREA=0 STATIC=0 PUSH=0 POP=0 FRONT=0 6 | ADDER AREA=1059.156 STATIC=0.001505 ADD_2_1=0.111 ADD_3_1=0.111 CONFIGURATION=0 7 | SWITCH AREA=108.99 STATIC=0.000157 ROUTE_UNICAST=0.0360 ROUTE_BROADCAST=0.0360 8 | MULTIPLIER AREA=1001.0 STATIC=0.001614 MULTIPLICATION=0.00741 FORWARD_PSUM=0.00741 CONFIGURATION=0 9 | GLOBALBUFFER AREA=115029 STATIC=0.0101 READ=0.044 WRITE=0.044 10 | ACCUMULATOR AREA=1059.156 STATIC=0.001505 READ=0 WRITE=0 ADD=0.111 11 | -------------------------------------------------------------------------------- /omega-code/stonne/include/define.h: -------------------------------------------------------------------------------- 1 | // 2 | // Created by Zhongyuan Zhao on 9/20/18. 3 | // 4 | 5 | #ifndef DEFINE_H_ 6 | #define DEFINE_H_ 7 | 8 | enum OptGoal{ 9 | performance = 0, 10 | energy = 1, 11 | energy_efficiency = 2, 12 | all = 3, 13 | }; 14 | 15 | enum Opcode { 16 | Add = 0, 17 | Compare = 1, 18 | Add_fowd = 2, 19 | Multiply = 3, 20 | Mul_fowd = 4, 21 | Mpush = 5, 22 | Cpush = 6, 23 | Pull = 7, 24 | Distribute = 8, 25 | }; 26 | 27 | enum ConfigType { 28 | conv = 0, 29 | ps = 1, 30 | fc = 2, 31 | lstm = 3, 32 | hmdpadd = 4, 33 | singlehmdp = 5, 34 | }; 35 | 36 | #define PUSH_LENGTH 12 37 | #define PULL_LENGTH 12 38 | #define NORMALIZE_MAC 1000 39 | 40 | 41 | 42 | #endif //DEFINE_H_ 43 | -------------------------------------------------------------------------------- /omega-code/stonne/include/Unit.h: -------------------------------------------------------------------------------- 1 | #ifndef _UNIT_h_ 2 | #define _UNIT_h_ 3 | 4 | #include 5 | #include 6 | #include 7 | #include "Config.h" 8 | #include "Stats.h" 9 | 10 | class Unit { 11 | private: 12 | id_t id; //Id of the component 13 | std::string name; //Name of the component 14 | 15 | public: 16 | Unit(id_t id, std::string name) { 17 | this->id=id; 18 | this->name=name; 19 | } 20 | 21 | virtual void printStats(std::ofstream& out, unsigned int indent) {} //Print the stats of the component 22 | virtual void printEnergy(std::ofstream& out, unsigned int indent) {} //Print the counters to get the consumption of the unit 23 | virtual void cycle() {} //Execute a cycle in the component 24 | virtual void setConfiguration(Config cfg) {} //set the configuration parameters of the component 25 | }; 26 | 27 | 28 | #endif 29 | -------------------------------------------------------------------------------- /omega-code/stonne/src/other_main/main_read_tests.cpp: -------------------------------------------------------------------------------- 1 | //Created by Francisco Munoz-Martinez on 17/06/2019 2 | 3 | #include 4 | #include "MAERIModel.h" 5 | #include "types.h" 6 | #include 7 | #include 8 | #include "testbench.h" 9 | #include "Tile.h" 10 | 11 | using namespace std; 12 | int main(int argc, char** argv) { 13 | 14 | string name_file="tile_configuration.txt"; 15 | Tile tile(name_file); 16 | //tile is supposed to have the values 17 | std::cout << "T_R: " << tile.get_T_R() << std::endl; 18 | std::cout << "T_S: " << tile.get_T_S() << std::endl; 19 | std::cout << "T_C: " << tile.get_T_C() << std::endl; 20 | std::cout << "T_K: " << tile.get_T_K() << std::endl; 21 | std::cout << "T_N: " << tile.get_T_N() << std::endl; 22 | std::cout << "T_X': " << tile.get_T_X_() << std::endl; 23 | std::cout << "T_Y_': " << tile.get_T_Y_() << std::endl; 24 | 25 | 26 | 27 | } 28 | 29 | -------------------------------------------------------------------------------- /omega-code/stonne/include/CompilerComponent.h: -------------------------------------------------------------------------------- 1 | #ifndef _COMPILER_COMPONENT_h_ 2 | #define _COMPILER_COMPONENT_h_ 3 | 4 | #include 5 | #include 6 | #include 7 | #include "Tile.h" 8 | #include "DNNLayer.h" 9 | #include 10 | #include 11 | 12 | class CompilerComponent { 13 | 14 | 15 | public: 16 | Tile* current_tile; 17 | std::vector sparseVNs; 18 | DNNLayer* dnn_layer; 19 | unsigned int num_ms; 20 | bool signals_configured; 21 | unsigned int n_folding; 22 | 23 | CompilerComponent() { 24 | current_tile = NULL; 25 | signals_configured = false; 26 | this->dnn_layer=NULL; 27 | } 28 | virtual void configureSignals(Tile* current_tile, DNNLayer* dnn_layer, unsigned int num_ms, unsigned int n_folding) {} //Print the stats of the component 29 | virtual void configureSparseSignals(std::vector sparseVNs, DNNLayer* dnn_layer, unsigned int num_ms) {} 30 | Tile* getTile() {assert(signals_configured); return this->current_tile;} 31 | }; 32 | 33 | 34 | #endif 35 | -------------------------------------------------------------------------------- /omega-code/stonne/test: -------------------------------------------------------------------------------- 1 | ./stonne -SparseGEMM -M=4 -N=4 -K=256 -num_ms=8 -dn_bw=8 -rn_bw=8 -rn_bw=8 -MK_sparsity=80 -KN_sparsity=10 -dataflow=MK_STA_KN_STR 2 | 3 | -optimize=1 4 | 5 | ./stonne -CONV -R=3 -S=3 -C=6 -G=1 -K=6 -N=1 -X=20 -Y=20 -T_R=3 -T_S=3 -T_C=1 -T_G=1 -T_K=1 -T_N=1 -T_X_=3 -T_Y_=1 -num_ms=64 -dn_bw=8 6 | 7 | ./stonne -SparseDense -M=1 -N=8 -K=16 -num_ms=4 -dn_bw=4 -rn_bw=4 -MK_sparsity=50 -T_N=1 -T_K=4 -accumulation_buffer=1 8 | 9 | ./stonne -DenseGEMM -M=4 -N=4 -K=16 -num_ms=4 -dn_bw=4 -rn_bw=4 -T_N=1 -T_M=1 -T_K=4 -accumulation_buffer=1 10 | 11 | ./stonne -DenseGEMM -M=4 -N=4 -K=16 -num_ms=4 -dn_bw=4 -rn_bw=4 -T_N=4 -T_M=1 -T_K=1 -accumulation_buffer=1 -rn_type="TEMPORALRN" 12 | 13 | ./stonne -DenseGEMM -M=4 -N=4 -K=16 -ms_rows=4 -ms_cols=4 -dn_bw=8 -rn_bw=16 -T_N=4 -T_M=1 -T_K=1 -accumulation_buffer=1 -rn_type="TEMPORALRN" -mn_type="OS_MESH" -mem_ctrl="TPU_OS_DENSE" 14 | 15 | 16 | ./stonne -DenseGEMM -M=5 -N=2 -K=2 -ms_rows=4 -ms_cols=4 -dn_bw=8 -rn_bw=16 -T_N=4 -T_M=4 -T_K=1 -accumulation_buffer=1 -rn_type="TEMPORALRN" -mn_type="OS_MESH" -mem_ctrl="TPU_OS_DENSE" 17 | 18 | -------------------------------------------------------------------------------- /omega-code/stonne/include/Fifo.h: -------------------------------------------------------------------------------- 1 | 2 | //Created by Francisco Munoz Martinez on 25/06/2019 3 | 4 | // This class is used in the simulator in order to limit the size of the fifo. 5 | 6 | #ifndef __Fifo_h__ 7 | #define __Fifo_h__ 8 | 9 | #include 10 | #include "DataPackage.h" 11 | #include "types.h" 12 | #include "Stats.h" 13 | 14 | class Fifo { 15 | private: 16 | std::queue fifo; 17 | unsigned int capacity; //Capacity in number of bits 18 | unsigned int capacity_words; //Capacity in number of words allowed. i.e., capacity_words = capacity / size_word 19 | FifoStats fifoStats; //Tracking parameters 20 | public: 21 | Fifo(unsigned int capacity); 22 | bool isEmpty(); 23 | bool isFull(); 24 | void push(DataPackage* data); 25 | DataPackage* pop(); 26 | DataPackage* front(); 27 | unsigned int size(); //Return the number of elements in the fifo 28 | void printStats(std::ofstream& out, unsigned int indent); 29 | void printEnergy(std::ofstream& out, unsigned int indent); 30 | FifoStats getStats() {return this->fifoStats;} 31 | }; 32 | #endif 33 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2022 STONNE 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /omega-code/stonne/src/LookupTable.cpp: -------------------------------------------------------------------------------- 1 | //Created by Francisco Munoz on 25/06/2019 2 | 3 | #include "LookupTable.h" 4 | #include 5 | #include "DataPackage.h" 6 | 7 | LookupTable::LookupTable(id_t id, std::string name, Config stonne_cfg, Connection* inputConnection, Connection* outputConnection) : Unit(id, name) { 8 | // Collecting parameters from the configuration file 9 | this->latency=stonne_cfg.m_LookUpTableCfg.latency; 10 | this->port_width = stonne_cfg.m_LookUpTableCfg.port_width; 11 | // End collecting parameters from the configuration file 12 | this->inputConnection = inputConnection; 13 | this->outputConnection = outputConnection; 14 | } 15 | 16 | void LookupTable::cycle() { 17 | if(this->inputConnection->existPendingData()) { 18 | //std::cout << "LookupTABLE is executing" << std::endl; 19 | std::vector pck_to_receive = this->inputConnection->receive(); 20 | //TODO apply activation function 21 | this->outputConnection->send(pck_to_receive); 22 | //for(int i=0; iget_data() << std::endl; 24 | //} 25 | 26 | } 27 | } 28 | -------------------------------------------------------------------------------- /omega-code/stonne/src/other_main/main_read_arch_file.cpp: -------------------------------------------------------------------------------- 1 | //Created by Francisco Munoz-Martinez on 17/06/2019 2 | 3 | #include 4 | #include "STONNEModel.h" 5 | #include "types.h" 6 | #include 7 | #include 8 | #include "testbench.h" 9 | #include "Tile.h" 10 | 11 | using namespace std; 12 | int main(int argc, char** argv) { 13 | 14 | /* 15 | string name_file="tile_configuration.txt"; 16 | Tile tile(name_file); 17 | //tile is supposed to have the values 18 | std::cout << "T_R: " << tile.get_T_R() << std::endl; 19 | std::cout << "T_S: " << tile.get_T_S() << std::endl; 20 | std::cout << "T_C: " << tile.get_T_C() << std::endl; 21 | std::cout << "T_K: " << tile.get_T_K() << std::endl; 22 | std::cout << "T_N: " << tile.get_T_N() << std::endl; 23 | std::cout << "T_X': " << tile.get_T_X_() << std::endl; 24 | std::cout << "T_Y_': " << tile.get_T_Y_() << std::endl;*/ 25 | 26 | string architecture_file = "/home/paco/Desktop/STONNE/STONNE/architectures/arch_test.cfg"; 27 | Config stonne_cfg; 28 | stonne_cfg.loadFile(architecture_file); 29 | std::ofstream out; 30 | out.open("fichero.txt"); 31 | stonne_cfg.printConfiguration(out, 0); 32 | out.close(); 33 | 34 | 35 | 36 | } 37 | 38 | -------------------------------------------------------------------------------- /omega-code/stonne/include/Component.h: -------------------------------------------------------------------------------- 1 | #ifndef _COMPONENT_H_ 2 | #define _COMPONENT_H_ 3 | 4 | #include 5 | #include "Types.h" 6 | #include "Connection.hpp" 7 | #include 8 | 9 | class Component { 10 | private: 11 | string componentName; // Name of the component 12 | bool enabled; // This flag set if the device is on 13 | cycles_t idleCycles; // Number of cycles in which the component is idle. 14 | cycles_t totalCycles; // Amount of total cycles. This includes idleCycles. 15 | id_t id; 16 | public: 17 | Component(id_t id, const string& componentName) { 18 | this->id = id; 19 | this->componentName = componentName; 20 | this->enabled = true; 21 | this->idleCycles = 0; 22 | this->totalCycles = 0; 23 | } 24 | 25 | const string& getComponentName() const {return componentName;} 26 | const bool isEnabled() const {return enabled;} 27 | const cycles_t getIdleCycles() const {return idleCycles;} 28 | const cycles_t getTotalCycles() const {return totalCycles;} 29 | const id_t getId() const {return id;} 30 | 31 | virtual void cycle() = 0; 32 | virtual void printStats(ofstream& out, unsigned int indent) = 0; 33 | //virtual void reset() = 0; 34 | 35 | 36 | 37 | 38 | 39 | }; 40 | 41 | #endif 42 | -------------------------------------------------------------------------------- /omega-code/stonne/include/DistributionNetwork.h: -------------------------------------------------------------------------------- 1 | //Abstract class that represents the distribution network. 2 | 3 | #ifndef __DistributionNetworkAbstract__ 4 | #define __DistributionNetworkAbstract__ 5 | 6 | #include 7 | #include 8 | 9 | #include "DSNetwork.h" 10 | #include "Connection.h" 11 | #include 12 | #include 13 | #include 14 | 15 | 16 | //This class represents a general case of a distribution network and cannot be instantiated 17 | 18 | class DistributionNetwork : public Unit { 19 | private: 20 | 21 | public: 22 | //General constructor, just used to heritage with unit 23 | DistributionNetwork(id_t id, std::string name) : Unit(id, name) {} 24 | //This just executes cycle over all the dsnetworks 25 | virtual void cycle() {assert(false);} 26 | //Get last levels connections together. Useful to connect with mswitches later. 27 | virtual std::map getLastLevelConnections() {assert(false);} 28 | // Get the top connections (i.e., the ones that connect the SDMemory ports) 29 | virtual std::vector getTopConnections() {assert(false);} 30 | virtual void printStats(std::ofstream& out, unsigned int indent) {assert(false);} 31 | virtual void printEnergy(std::ofstream& out, unsigned int indent) {assert(false);} 32 | virtual DSNetworkStats getStats() {assert(false);} 33 | 34 | }; 35 | 36 | #endif 37 | -------------------------------------------------------------------------------- /omega-code/stonne/energy_tables/out: -------------------------------------------------------------------------------- 1 | DYNAMIC ENERGY TABLE USED 2 | ------------------------------------------- 3 | RN_WIRE AREA=9.46 STATIC=0 WRITE=0.00000808 READ=0 4 | DN_WIRE AREA=11.48 STATIC=0 WRITE=0.00000808 READ=0 5 | MN_WIRE AREA=9.46 STATIC=0 WRITE=0.00000808 READ=0 6 | CB_WIRE AREA=9.46 STATIC=0 WRITE=0.00000808 READ=0 7 | FIFO AREA=0 STATIC=0 PUSH=0 POP=0 FRONT=0 8 | ADDER AREA=1059.156 STATIC=0.00534 ADD_2_1=0.106 ADD_3_1=0.106 CONFIGURATION=0 9 | SWITCH AREA=108.99 STATIC=0.0157 ROUTE_UNICAST=0.0203 ROUTE_BROADCAST=0.0203 10 | MULTIPLIER AREA=1001.0 STATIC=0.0103 MULTIPLICATION=0.06375 FORWARD_PSUM=0.03 CONFIGURATION=0 11 | GLOBALBUFFER AREA=4161.536 STATIC=0 READ=4.0 WRITE=4.0 12 | ACCUMULATOR AREA=1059.156 STATIC=0.00534 READ=0 WRITE=0 ADD=0.106 13 | ------------------------------------------- 14 | COMPONENT BREAKDOWN 15 | ------------------------------------------- 16 | DSNetwork: STATIC=0.0 AREA=1469.4400000000016 DYNAMIC=0.14169088000000002 17 | MSNetwork: STATIC=17202.483199999966 AREA=129329.42 DYNAMIC=993.7199999999999 18 | ReduceNetwork: STATIC=8848.892639999982 AREA=137454.87200000015 DYNAMIC=1122.1320580000001 19 | GlobalBuffer: STATIC=0.0 AREA=4161.536 DYNAMIC=84256.0 20 | CollectionBus: STATIC=0.0 AREA=3632.6400000000112 DYNAMIC=0.057012480000000004 21 | Total STATIC Energy: 26051.375839999797 22 | Total DYNAMIC Energy: 86372.05076136002 23 | Total Area: 276047.9080000046 24 | Total Energy: 112423.42660135982 -------------------------------------------------------------------------------- /omega-code/stonne/include/CollectionBus.h: -------------------------------------------------------------------------------- 1 | // Created by Francisco Munoz on 28/02/2019 2 | 3 | 4 | #ifndef _BUS_CPP 5 | #define _BUS_CPP 6 | 7 | #include 8 | #include 9 | #include "Unit.h" 10 | #include "Connection.h" 11 | #include "CollectionBusLine.h" 12 | #include "Unit.h" 13 | #include "Config.h" 14 | 15 | class Bus : public Unit { 16 | 17 | private: 18 | unsigned int n_bus_lines; //Number of outputs from the bus 19 | unsigned int input_ports_bus_line; 20 | unsigned int connection_width; 21 | unsigned int fifo_size; 22 | std::vector collection_bus_lines; 23 | 24 | public: 25 | Bus(id_t id, std::string name, Config stonne_cfg); 26 | unsigned int getNBusLines() {return this->n_bus_lines;} 27 | unsigned int getInputPortsBusLine() {return this->input_ports_bus_line;} 28 | std::vector> getInputConnections(); 29 | std::vector getOutputConnections(); //Get the output connections of all the lines 30 | Connection* getInputConnectionFromBusLine(unsigned int busID, unsigned int inputID); //Get a specific inpur from a specific bus line 31 | 32 | 33 | void cycle(); //Get the inputs and send as many as posssible to the outputs 34 | 35 | void printStats(std::ofstream& out, unsigned int indent); 36 | void printEnergy(std::ofstream& out, unsigned int indent); 37 | ~Bus(); 38 | 39 | 40 | 41 | }; 42 | 43 | #endif 44 | -------------------------------------------------------------------------------- /omega-code/Makefile: -------------------------------------------------------------------------------- 1 | CXX=g++ 2 | CXXFLAGS= -O3 -Istonne/include/ -Istonne/external/ -std=c++1y #-ltcmalloc -fno-builtin-malloc -fno-builtin-calloc -fno-builtin-realloc -fno-builtin-free 3 | #DEBUGFLAGS=-D DEBUG_MEM_OUTPUT -D DEBUG_MSWITCH_FUNC 4 | #-D DEBUG_MEM_OUTPUT -D DEBUG_MEM_INPUT -D DEBUG_ASWITCH_CONFIG -D DEBUG_ASWITCH_FUNC -D DEBUG_MSWITCH_CONFIG -D DEBUG_MSWITCH_FUNC 5 | BIN_OMEGA=omega 6 | BIN_STONNE=stonne_single 7 | SOURCE=$(wildcard stonne/src/*.cpp) 8 | OBJSDIR=stonne/objs 9 | OBJS_OMEGA=$(patsubst stonne/src/%,$(OBJSDIR)/%,$(patsubst %.cpp,%.o,$(SOURCE))) 10 | OBJS_OMEGA := $(filter-out stonne/objs/main.o, $(OBJS_OMEGA)) 11 | OBJS_STONNE=$(patsubst stonne/src/%,$(OBJSDIR)/%,$(patsubst %.cpp,%.o,$(SOURCE))) 12 | OBJS_STONNE := $(filter-out stonne/objs/omega.o, $(OBJS_STONNE)) 13 | #OBJS=$(patsubst stonne/src/%,$(OBJSDIR)/%,$(patsubst %.cpp,%.o,$(SOURCE))) 14 | INCLUDES=$(wildcard stonne/include/*.h) 15 | $(warning OMEGA is $(OBJS_OMEGA)) 16 | $(warning STONNE is $(OBJS_STONNE)) 17 | 18 | all: $(BIN_OMEGA) $(BIN_STONNE) 19 | 20 | $(BIN_OMEGA): $(OBJSDIR) $(OBJS_OMEGA) 21 | $(CXX) $(CXXFLAGS) $(DEBUGFLAGS) -o $@ $(OBJS_OMEGA) #-pthread -ltcmalloc 22 | 23 | 24 | $(BIN_STONNE): $(OBJSDIR) $(OBJS_STONNE) 25 | $(CXX) $(CXXFLAGS) $(DEBUGFLAGS) -o $@ $(OBJS_STONNE) #-pthread -ltcmalloc 26 | 27 | 28 | $(OBJSDIR): 29 | mkdir -p $@ 30 | 31 | $(OBJSDIR)/%.o: stonne/src/%.cpp $(INCLUDES) 32 | $(CXX) $(CXXFLAGS) $(DEBUGFLAGS) -c $< -o $@ #-ltcmalloc 33 | 34 | 35 | 36 | clean: 37 | rm -rf $(OBJSDIR) 38 | 39 | 40 | -------------------------------------------------------------------------------- /omega-code/stonne/include/Connection.h: -------------------------------------------------------------------------------- 1 | //Created 13/06/2019 2 | 3 | #ifndef __Connection__h 4 | #define __Connection__h 5 | 6 | #include "types.h" 7 | #include "DataPackage.h" 8 | #include 9 | #include "Stats.h" 10 | 11 | /* 12 | This class Connection does not need ACK responses since in the accelerator the values are sent without a need of a request. Everything is controlled 13 | by the control of the accelerator. 14 | */ 15 | 16 | 17 | class Connection { 18 | private: 19 | bool pending_data; // Indicates if data exists 20 | size_t bw; // Size in bytes of actual data. In the simulator this size is greater since we wrap the data into wrappers to track. 21 | std::vector data; // Array of packages that are send/receive in a certain cycle. The number of packages depends on the bw of the connection 22 | unsigned int current_capacity; // the capacity must not exceed the bw of the connection 23 | ConnectionStats connectionStats; //Tracking parameters 24 | 25 | public: 26 | Connection(int bw); 27 | void send(std::vector data); //Package of data to be send. The sum of all the size_package of each package must not be greater than bw. 28 | std::vector receive(); //Receive a packages from the connection 29 | bool existPendingData(); 30 | 31 | void printEnergy(std::ofstream &out, unsigned int indent, std::string wire_type); 32 | ConnectionStats getStats() {return this->connectionStats;} 33 | 34 | 35 | }; 36 | 37 | 38 | #endif 39 | 40 | -------------------------------------------------------------------------------- /omega-code/stonne/include/DSNetworkTop.h: -------------------------------------------------------------------------------- 1 | // Created on 06/11/2019 by Francisco Munoz Martinez 2 | 3 | #ifndef __DSNetworkTop__ 4 | #define __DSNetworkTop__ 5 | 6 | #include 7 | #include 8 | 9 | #include "DSNetwork.h" 10 | #include "Connection.h" 11 | #include 12 | #include 13 | #include "DistributionNetwork.h" 14 | 15 | 16 | //This class represents the whole DSNetwork that is composed by several DSNetworks trees. Basically, a DSNetwork has as many trees as input ports has the architecture to fetch input data. 17 | 18 | class DSNetworkTop : public DistributionNetwork { 19 | private: 20 | unsigned int n_input_ports; 21 | unsigned int ms_size_per_port; //Number of multipliers per each ds tree 22 | unsigned int port_width; 23 | 24 | std::vector dsnetworks; //one per port 25 | std::vector connections; //one per port 26 | 27 | public: 28 | //The constructor creates n_input_ports dsnetworks with portWidth port_width 29 | DSNetworkTop(id_t id, std::string name, Config stonne_cfg); 30 | ~DSNetworkTop(); 31 | void cycle(); //This just executes cycle over all the dsnetworks 32 | std::map getLastLevelConnections(); //Get last levels connections from all the astrees together. Useful to connect with mswitches later. 33 | std::vector getTopConnections(); // Get the top connections (i.e., the ones that connect the SDMemory ports with the DS subtrees) 34 | void printStats(std::ofstream& out, unsigned int indent); 35 | void printEnergy(std::ofstream& out, unsigned int indent); 36 | DSNetworkStats getStats(); 37 | 38 | }; 39 | 40 | #endif 41 | -------------------------------------------------------------------------------- /omega-code/stonne/include/AccumulationBuffer.h: -------------------------------------------------------------------------------- 1 | // 2 | // Created by Francisco Munoz on 29/06/2020. 3 | // 4 | 5 | #ifndef __ACCUMULATIONBUFFER__H__ 6 | #define __ACCUMULATIONBUFFER__H__ 7 | 8 | #include "Accumulator.h" 9 | #include 10 | #include 11 | #include 12 | #include "types.h" 13 | #include "Config.h" 14 | #include "Tile.h" 15 | #include "DNNLayer.h" 16 | 17 | 18 | 19 | class AccumulationBuffer : public Unit { 20 | private: 21 | unsigned int port_width; //Width in bits of each port 22 | unsigned int n_accumulators; //Number of accumulator array 23 | std::map accumulatortable; //Map with the accumulators 24 | 25 | std::map inputconnectiontable; // input connections 26 | std::map outputconnectiontable; // Output connections 27 | public: 28 | AccumulationBuffer(id_t id, std::string name, Config stonne_cfg, unsigned int n_accumulators); 29 | ~AccumulationBuffer(); 30 | void setMemoryConnections(std::vector memoryConnections); 31 | void setInputConnections(std::vector inputConnections); 32 | void resetSignals(); 33 | void NPSumsConfiguration(unsigned int n_psums); 34 | 35 | void configureSignals(Tile* current_tile, DNNLayer* dnn_layer, unsigned int ms_size, unsigned int n_folding); 36 | //Cycle function 37 | void cycle(); 38 | 39 | void printConfiguration(std::ofstream& out, unsigned int indent); 40 | void printStats(std::ofstream& out, unsigned int indent); //This functions prints the stats 41 | void printEnergy(std::ofstream& out, unsigned int indent); //Print the counters 42 | AccumulationBufferStats getStats(); 43 | 44 | 45 | }; 46 | 47 | #endif 48 | -------------------------------------------------------------------------------- /omega-code/stonne/stonne_linker_src/stonne_linker.h: -------------------------------------------------------------------------------- 1 | #include "../include/Config.h" 2 | #include 3 | 4 | #ifndef __stonne_linker__ 5 | #define __stonne_linker__ 6 | void simulateDenseConvForward(std::string layer_name, float* input, float* weight, float* output, int R, int S, int C, int K, int G, int N, int X, int Y, int X_, int Y_, int strides, int pad_x, int pad_y, std::string path_to_tile, Config stonne_cfg); 7 | 8 | //This function performs the prunning on its own and gets the bitmaps and sparse representation according to that prunning configuration. The prunning is done by prunning the sparsity_level% lowest amount of data in the STA matrix. 9 | void simulateSparseGemmForward(std::string layer_name, float* KN_matrix_raw, float* MK_matrix_raw, float* output_raw, int N, int G, int gemm_M, int gemm_K, int gemm_N, float sparsity_level, Config stonne_cfg, Dataflow dataflow); 10 | 11 | //This function already gets the bitmaps and the matrices in a sparse representaion. 12 | void simulateSparseGemmWithBitmapsForward(std::string layer_name, float* KN_matrix_raw, float* MK_matrix_raw, float* output_raw, int N, int G, int gemm_M, int gemm_K, int gemm_N, unsigned int* MK_bitmap, unsigned int* KN_bitmap, Config stonne_cfg, Dataflow dataflow); 13 | 14 | void simulateDenseGemmForward(std::string layer_name, float* KN_matrix_raw, float* MK_matrix_raw, float* output_raw, int N, int G, int gemm_M, int gemm_K, int gemm_N, std::string path_to_tile, Config stonne_cfg); 15 | 16 | //Sparse Dense GEMM with CSR as encoding technique 17 | void simulateSparseDenseGemm(std::string layer_name, float* MK_sparse_matrix, float* KN_dense_matrix, float* output_raw, int M, int K, int N, unsigned int* MK_col_id, unsigned int* MK_row_pointer, int T_N, int T_K, Config stonne_cfg); 18 | #endif 19 | -------------------------------------------------------------------------------- /omega-code/stonne/include/DNNModel.h: -------------------------------------------------------------------------------- 1 | #ifndef DNNMODEL_H_ 2 | #define DNNMODEL_H_ 3 | 4 | #include 5 | #include "utility.h" 6 | 7 | 8 | class CNNInput { 9 | public: 10 | CNNInput() {} 11 | int input_batch; 12 | int input_x; 13 | int input_y; 14 | int input_channel; 15 | }; 16 | 17 | class CNNFilter { 18 | public: 19 | CNNFilter() {} 20 | int filter_x; 21 | int filter_y; 22 | int filter_number; 23 | int filter_channel; 24 | int window_stride; 25 | }; 26 | 27 | class CNNOutput { 28 | public: 29 | CNNOutput() {} 30 | int output_batch; 31 | int output_x; 32 | int output_y; 33 | int output_channel; 34 | }; 35 | 36 | class RNNHidden { 37 | public: 38 | RNNHidden() {} 39 | int hidden_x; 40 | int hidden_y; 41 | int hidden_channel; 42 | }; 43 | 44 | //It should be noticed that CNNInput can represent all the input of different DNNModel type. Including fully connected layer, cnn layer and rnn layer. 45 | class DNNModel { 46 | public: 47 | DNNModel() { 48 | cnn_input = new CNNInput(); 49 | cnn_filter = new CNNFilter(); 50 | cnn_output = new CNNOutput(); 51 | dnn_hidden = new RNNHidden(); 52 | }; 53 | std::string model_name; 54 | std::string layer_type; 55 | std::string layer_num; 56 | CNNInput* cnn_input; 57 | CNNFilter* cnn_filter; 58 | CNNOutput* cnn_output; 59 | RNNHidden* dnn_hidden; 60 | 61 | void parseModelName(std::istringstream& instr); 62 | void parseLayerType(std::istringstream& instr); 63 | void parseLayerNumber(std::istringstream& instr); 64 | void parseInput(std::ifstream& infile); 65 | void parseWeight(std::ifstream& infile); 66 | void parseOutput(std::ifstream& infile); 67 | void parseHidden(std::ifstream& infile); 68 | void parsefile(std::ifstream& infile); 69 | }; 70 | 71 | #endif 72 | -------------------------------------------------------------------------------- /omega-code/stonne/include/CompilerMultiplierMesh.h: -------------------------------------------------------------------------------- 1 | #ifndef _COMPILER_MULTIPLIERMESH_h_ 2 | #define _COMPILER_MULTIPLIERMESH_h_ 3 | 4 | #include 5 | #include 6 | #include 7 | #include "Tile.h" 8 | #include 9 | #include "CompilerComponent.h" 10 | 11 | class CompilerMultiplierMesh : public CompilerComponent{ 12 | private: 13 | 14 | //Multiplier signals 15 | std::map, bool> forwarding_bottom_enabled; //Forwarding to the bottom MS 16 | std::map, bool> forwarding_right_enabled; //Forwrding to the right ms 17 | std::map, unsigned int> ms_vn_configuration; 18 | unsigned int ms_rows; 19 | unsigned int ms_cols; 20 | 21 | //Aux functions 22 | void generate_ms_signals(unsigned int ms_rows, unsigned int ms_cols); //The function in charge to generate the signals for the Multiplier 23 | 24 | 25 | 26 | public: 27 | 28 | CompilerMultiplierMesh() { 29 | current_tile = NULL; 30 | signals_configured = false; 31 | dnn_layer=NULL; 32 | } 33 | void configureSignals(Tile* current_tile, DNNLayer* dnn_layer, unsigned int ms_rows, unsigned int ms_cols) ; 34 | void configureSparseSignals(std::vector sparseVNs, DNNLayer* dnn_layer, unsigned int num_ms); 35 | Tile* getTile() {assert(signals_configured); return this->current_tile;} 36 | 37 | //Get the signals 38 | std::map, unsigned int> get_ms_vn_configuration() const {return this->ms_vn_configuration;} 39 | std::map, bool> get_forwarding_bottom_enabled() const {return this->forwarding_bottom_enabled;} 40 | std::map, bool> get_forwarding_right_enabled() const {return this->forwarding_right_enabled;} 41 | 42 | }; 43 | 44 | 45 | #endif 46 | -------------------------------------------------------------------------------- /omega-code/stonne/src/Fifo.cpp: -------------------------------------------------------------------------------- 1 | #include "Fifo.h" 2 | #include 3 | #include 4 | #include "utility.h" 5 | Fifo::Fifo(unsigned int capacity) { 6 | this->capacity = capacity; 7 | this->capacity_words = capacity / sizeof(data_t); //Data size 8 | } 9 | 10 | bool Fifo::isFull() { 11 | return this->fifo.size() >= this->capacity_words; // > is forbidden 12 | } 13 | 14 | bool Fifo::isEmpty() { 15 | return this->fifo.size()==0; 16 | } 17 | 18 | void Fifo::push(DataPackage* data) { 19 | // assert(!isFull()); //The fifo must not be full 20 | fifo.push(data); //Inserting at the end of the queue 21 | if(this->size() > this->fifoStats.max_occupancy) { 22 | this->fifoStats.max_occupancy = this->size(); 23 | } 24 | this->fifoStats.n_pushes+=1; // To track information 25 | 26 | } 27 | 28 | DataPackage* Fifo::pop() { 29 | assert(!isEmpty()); 30 | this->fifoStats.n_pops+=1; //To track information 31 | DataPackage* pck = fifo.front(); //Accessing the first element of the queue 32 | fifo.pop(); //Extracting the first element 33 | return pck; 34 | } 35 | 36 | DataPackage* Fifo::front() { 37 | assert(!isEmpty()); 38 | DataPackage* pck = fifo.front(); 39 | this->fifoStats.n_fronts+=1; //To track information 40 | return pck; 41 | } 42 | 43 | unsigned int Fifo::size() { 44 | return fifo.size(); 45 | } 46 | 47 | void Fifo::printStats(std::ofstream& out, unsigned int indent) { 48 | this->fifoStats.print(out, indent); 49 | } 50 | 51 | void Fifo::printEnergy(std::ofstream& out, unsigned int indent) { 52 | out << ind(indent) << "FIFO PUSH=" << fifoStats.n_pushes; //Same line 53 | out << ind(indent) << " POP=" << fifoStats.n_pops; //Same line 54 | out << ind(indent) << " FRONT=" << fifoStats.n_fronts << std::endl; //New line 55 | } 56 | 57 | 58 | -------------------------------------------------------------------------------- /omega-code/stonne/include/types.h: -------------------------------------------------------------------------------- 1 | //Created 13/06/2019 2 | 3 | #ifndef __types__h__ 4 | 5 | #define __types__h__ 6 | 7 | const int word_size=1; 8 | const int IND_SIZE=4; 9 | 10 | typedef float data_t; 11 | typedef unsigned int bandwidth_t; 12 | typedef unsigned int id_t; 13 | typedef unsigned int cycles_t; 14 | typedef float* address_t; 15 | typedef unsigned int counter_t; 16 | typedef unsigned int latency_t; 17 | typedef unsigned int* metadata_address_t; 18 | 19 | 20 | enum operand_t {WEIGHT, IACTIVATION, OACTIVATION, PSUM}; 21 | enum traffic_t {BROADCAST, MULTICAST, UNICAST}; 22 | enum direction_t {LEFT, RIGHT}; 23 | //Adder configuration signals 24 | enum fl_t {RECEIVE, SEND, NOT_CONFIGURED}; ///forwarding link type 25 | enum adderconfig_t {ADD_2_1, ADD_3_1, ADD_1_1_PLUS_FW_1_1, FW_2_2, NO_MODE, FOLD}; // To the best of my knowledge, FW_2_2 corresponds with sum left and right and send the result to the FW. 26 | ///// 27 | enum Layer_t{CONV, POOL, FC, GEMM, SPARSE_DENSE}; 28 | enum ReduceNetwork_t{ASNETWORK, FENETWORK, TEMPORALRN}; 29 | enum MultiplierNetwork_t{LINEAR, OS_MESH}; 30 | ///// 31 | enum MemoryController_t{MAERI_DENSE_WORKLOAD, SIGMA_SPARSE_GEMM, MAGMA_SPARSE_DENSE, TPU_OS_DENSE}; 32 | enum MemoryHierarchy_t{DOUBLE_BUFFER, BUFFET}; 33 | enum SparsityControllerState{CONFIGURING, DIST_STA_MATRIX, DIST_STR_MATRIX, WAITING_FOR_NEXT_STA_ITER, ALL_DATA_SENT}; 34 | enum OSMeshControllerState{OS_CONFIGURING, OS_DIST_INPUTS, OS_WAITING_FOR_NEXT_ITER, OS_ALL_DATA_SENT}; 35 | ///// 36 | enum Dataflow{CNN_DATAFLOW, MK_STA_KN_STR, MK_STR_KN_STA, SPARSE_DENSE_DATAFLOW}; 37 | enum GENERATION_TYPE{GEN_BY_ROWS, GEN_BY_COLS}; 38 | enum WIRE_TYPE{RN_WIRE, MN_WIRE, DN_WIRE}; 39 | 40 | 41 | enum adderoperation_t {ADDER, COMPARATOR, MULTIPLIER, NOP}; 42 | 43 | //Testbench 44 | enum layerTest {TINY, LATE_SYNTHETIC, EARLY_SYNTHETIC, VGG_CONV11, VGG_CONV1}; 45 | #endif 46 | -------------------------------------------------------------------------------- /omega-code/stonne/include/ReduceNetwork.h: -------------------------------------------------------------------------------- 1 | // 2 | // Created by Francisco Munoz on 19/06/19. 3 | // 4 | 5 | #ifndef __REDUCENETWORK__H__ 6 | #define __REDUCENETWORK__H__ 7 | 8 | #include 9 | #include 10 | #include 11 | #include "types.h" 12 | #include "Config.h" 13 | #include "CompilerComponent.h" 14 | #include "Connection.h" 15 | #include "Unit.h" 16 | #include "Tile.h" 17 | #include "DNNLayer.h" 18 | 19 | class ReduceNetwork : public Unit{ 20 | 21 | public: 22 | ReduceNetwork(id_t id, std::string name) : Unit(id, name) {} 23 | virtual void setMemoryConnections(std::vector> memoryConnections) {assert(false);} //Connect all the memory ports from buses (busID, lineID) to its corresponding switches 24 | virtual std::map getLastLevelConnections() {assert(false);} 25 | virtual void setOutputConnection(Connection* outputConnection) {assert(false);} //This function set the outputConnection with the Prefetch buffer 26 | virtual void configureSignals(Tile* current_tile, DNNLayer* dnn_layer, unsigned int ms_size, unsigned int n_folding) {assert(false);} 27 | virtual void configureSparseSignals(std::vector sparseVNs, DNNLayer* dnn_layer, unsigned int ms_size) {assert(false);} 28 | virtual void resetSignals() {assert(false);} 29 | 30 | 31 | //Cycle function 32 | virtual void cycle(){} 33 | 34 | virtual void printConfiguration(std::ofstream& out, unsigned int indent) {} //This function prints the configuration of the ASNetwork (i.e., ASwitches configuration such as ADD_2_1, ADD_3_1, etc) 35 | virtual void printStats(std::ofstream& out, unsigned int indent) {} //This functions prints the statistics obtained during the execution. 36 | virtual void printEnergy(std::ofstream& out, unsigned int indent){} 37 | virtual ASNetworkStats getStats() {assert(false);} 38 | 39 | 40 | }; 41 | 42 | #endif 43 | -------------------------------------------------------------------------------- /omega-code/stonne/include/CollectionBusLine.h: -------------------------------------------------------------------------------- 1 | // Created the 4th of november of 2019 by Francisco Munoz Martinez 2 | 3 | #ifndef __CollectionBusLine__h__ 4 | #define __CollectionBusLine__h__ 5 | 6 | #include "Fifo.h" 7 | #include "Connection.h" 8 | #include 9 | #include "Unit.h" 10 | #include "Stats.h" 11 | 12 | class CollectionBusLine : public Unit { 13 | 14 | private: 15 | unsigned int input_ports; //Number of input connections that correspond with input_connections.size() and input_fifos.size() 16 | std::vector input_connections; //Every input connection for this bus line 17 | std::vector input_fifos; //Every fifo corresponds with an inputConnection for this busLine 18 | Connection* output_port; //Output connection with memory 19 | unsigned int next_input_selected; //Using RR policy 20 | unsigned int busID; //Output port ID of this line 21 | 22 | void receive(); 23 | CollectionBusLineStats collectionbuslineStats; //To track information 24 | 25 | public: 26 | //Getters useful to make the connections with the ART switches and the memory 27 | std::vector getInputConnections() {return this->input_connections;} 28 | Connection* getOutputPort() {return this->output_port;} 29 | Connection* getInputPort(unsigned int inputID); 30 | 31 | //Creates the input_connections, the input_fifos and the output_port 32 | CollectionBusLine(id_t id, std::string name, unsigned int busID, unsigned int input_ports_bus_line, unsigned int connection_width, unsigned int fifo_size); 33 | ~CollectionBusLine(); //Destroy connection, fifos, and output connection 34 | void cycle(); //Select one input and send it trough the output 35 | 36 | void printStats(std::ofstream& out, unsigned int indent); 37 | void printEnergy(std::ofstream& out, unsigned int indent); 38 | 39 | 40 | 41 | }; 42 | 43 | 44 | 45 | 46 | 47 | #endif 48 | -------------------------------------------------------------------------------- /omega-code/stonne/other/problema.txt: -------------------------------------------------------------------------------- 1 | El switch con lvel 5 y num 21 esta recibiendo datos del fw link pero no esta recibiendo de los padres. Eso hace que en el cycle 2 | el switch no haga pop() de nuevo valor que le ha entrado del fw link ya que no hay nada en los padres. 3 | Esta recibiendo del switch 22 que esta comprobado de estar configurado con el fw link y como SEND y con configuracion 2:1 4 | 5 | Comprobado que el orden de ejecucion de los switchws 21 y 22 es correcto... 6 | 7 | El problema esque el switch 22 (que solo tiene un child left) no deja de recibir datos de entrada incluso a pesar de que hay una parada de por edio debido a la distribucion de pesos. Para ello creo que el problema es en mirar a los switches 44 y 45 (solo el unico hijo 44) que nunca para, del siguiente nivel). 8 | 9 | OUTPUT SIZE EACH CYCLE: 1 10 | input_psum_right size 44 SWITCHHHHH 1 11 | input_psum_left size 44 SWITCHHHHH 0 12 | input_psum_right size in receive childs is 1 13 | Fw link received data 14 | fw_link_input_fifo size is 1 15 | input_psum_left_size: 1 16 | input_psum_right_size: 1 17 | Pop Done 18 | Cycle 19 | OUTPUT SIZE EACH CYCLE: 1 20 | input_psum_right size 44 SWITCHHHHH 0 21 | input_psum_left size 44 SWITCHHHHH 1 22 | input_psum_right size in receive childs is 1 23 | Fw link received data 24 | fw_link_input_fifo size is 1 25 | input_psum_left_size: 1 26 | input_psum_right_size: 1 27 | 28 | El switch (5,21 no recibe datos a traves del fw link). Esto se debe a que el switch (5,22) no reccibe datos de su link derecho. 29 | Esto se debe a que el switch (6,45) (el switch derecho) esta desactivado. El problema es que el switch (5,22) lo tiene activado 30 | cuando no deberia de ser asi. Fallo en el algoritmo de activar los links? 31 | 32 | - Tener en cuenta que los FW Links pueden tener que sincronizarse tambien y que no envien de un bando tan pronto reciben informacion 33 | - Reducir el tiempo de ejecucion del DS que se debe al recorrido del array de booleanos en el multicast 34 | -------------------------------------------------------------------------------- /omega-code/stonne/src/DNNLayer.cpp: -------------------------------------------------------------------------------- 1 | // Created by Francisco Munoz Martinez on 02/07/2019 2 | 3 | #include "DNNLayer.h" 4 | #include "utility.h" 5 | 6 | DNNLayer::DNNLayer(Layer_t layer_type, std::string layer_name, unsigned int R, unsigned int S, unsigned int C, unsigned int K, unsigned int G, unsigned int N, unsigned int X, unsigned int Y, unsigned int strides) { 7 | this->R = R; 8 | this->S = S; 9 | this->C = C / G; //The user has to specify this parameter in terms of the whole feature map 10 | this->K = K / G; //Idem 11 | this->G = G; 12 | this->N = N; 13 | this->X = X; 14 | this->Y = Y; 15 | this->strides = strides; 16 | this->layer_name = layer_name; 17 | this->layer_type = layer_type; 18 | 19 | this->X_ = (X - R + strides) / strides; 20 | this->Y_ = (Y - S + strides) / strides; 21 | 22 | } 23 | 24 | void DNNLayer::printConfiguration(std::ofstream& out, unsigned int indent) { 25 | out << ind(indent) << "\"LayerConfiguration\" : {" << std::endl; 26 | out << ind(indent+IND_SIZE) << "\"Layer_Type\" : " << this->layer_type << "," << std::endl; 27 | out << ind(indent+IND_SIZE) << "\"R\" : " << this->R << "," << std::endl; 28 | out << ind(indent+IND_SIZE) << "\"S\" : " << this->S << "," << std::endl; 29 | out << ind(indent+IND_SIZE) << "\"C\" : " << this->C << "," << std::endl; 30 | out << ind(indent+IND_SIZE) << "\"K\" : " << this->K << "," << std::endl; 31 | out << ind(indent+IND_SIZE) << "\"G\" : " << this->G << "," << std::endl; 32 | out << ind(indent+IND_SIZE) << "\"N\" : " << this->N << "," << std::endl; 33 | out << ind(indent+IND_SIZE) << "\"X\" : " << this->X << "," << std::endl; 34 | out << ind(indent+IND_SIZE) << "\"Y\" : " << this->Y << "," << std::endl; 35 | out << ind(indent+IND_SIZE) << "\"X_\" : " << this->X_ << "," << std::endl; 36 | out << ind(indent+IND_SIZE) << "\"Y_\" : " << this->Y_ << std::endl; 37 | out << ind(indent) << "}"; 38 | } 39 | -------------------------------------------------------------------------------- /omega-code/stonne/include/MultiplierNetwork.h: -------------------------------------------------------------------------------- 1 | 2 | #ifndef __MULTIPLIERNETWORK__H__ 3 | #define __MULTIPLIERNETWORK__H__ 4 | 5 | #include "Connection.h" 6 | #include "MSwitch.h" 7 | #include "DSwitch.h" 8 | #include "Unit.h" 9 | #include 10 | #include "CompilerMSN.h" 11 | #include "Tile.h" 12 | #include "DNNLayer.h" 13 | #include 14 | 15 | #include 16 | 17 | class MultiplierNetwork : public Unit{ 18 | public: 19 | /* 20 | By the default the implementation of the MS just receives a single element, calculate a single psum and/or send a single input activation to the neighbour. This way, the parameters 21 | input_ports, output_ports and forwarding_ports will be set as the single data size. If this implementation change for future tests, this can be change easily bu mofifying these three parameters. 22 | */ 23 | MultiplierNetwork(id_t id, std::string name) : Unit(id, name){} 24 | //set connections from the distribution network to the multiplier network 25 | virtual void setInputConnections(std::map input_connections) {assert(false);} 26 | //Set connections from the Multiplier Network to the Reduction Network 27 | virtual void setOutputConnections(std::map output_connections) {assert(false);} 28 | virtual void cycle() {assert(false);} 29 | virtual void configureSignals(Tile* current_tile, DNNLayer* dnn_layer, unsigned int ms_size, unsigned int n_folding) {assert(false);} 30 | virtual void configureSparseSignals(std::vector sparseVNs, DNNLayer* dnn_layer, unsigned int ms_size) {assert(false);} 31 | 32 | virtual void resetSignals() {assert(false);} 33 | virtual void printConfiguration(std::ofstream& out, unsigned int indent) {assert(false);} 34 | virtual void printStats(std::ofstream &out, unsigned int indent) {assert(false);} 35 | virtual void printEnergy(std::ofstream& out, unsigned int indent) {assert(false);} 36 | virtual MSNetworkStats getStats() {assert(false);} 37 | }; 38 | #endif 39 | -------------------------------------------------------------------------------- /omega-code/stonne/include/DSNetwork.h: -------------------------------------------------------------------------------- 1 | // 2 | // Created by Francisco Munoz on 17/06/19. 3 | // 4 | 5 | #ifndef __DSNETWORK__H__ 6 | #define __DSNETWORK__H__ 7 | 8 | #include "MSNetwork.h" 9 | #include "DSwitch.h" 10 | #include "Unit.h" 11 | #include 12 | #include 13 | #include 14 | #include "Config.h" 15 | 16 | 17 | #define CONNECTIONS_PER_SWITCH 2 18 | #define LEFT 0 19 | #define RIGHT 1 20 | 21 | 22 | class DSNetwork : public Unit{ 23 | private: 24 | unsigned int ms_size; //Number of multipliers. i.e., the leaves of the network 25 | unsigned int port_width; 26 | int nlevels; //Number of levels of the DS without taking into account the MS level 27 | std::map, DSwitch* > dswitchtable; //Map with the switches of the topology. The connection among them will be different depending on the topology used 28 | std::map, Connection*> connectiontable; // Outputs connections of each level. 29 | Connection* inputConnection; //Given by external 30 | 31 | 32 | public: 33 | DSNetwork(id_t id, std::string name, Config stonne_cfg, unsigned int ms_size, Connection* inputConnection); //ms_size = ms_size of the group that contain this tree 34 | ~DSNetwork(); 35 | const int getNLevels() const { return this->nlevels; } 36 | const int getMsSize() const { return this->ms_size; } 37 | std::map getLastLevelConnections(); 38 | void setInputConnection(Connection* inputConnection) { this->inputConnection = inputConnection; } //This function set the inputConnection with the Prefetch buffer 39 | 40 | //Useful functions 41 | 42 | //Cycle function 43 | void cycle(); 44 | unsigned long get_time_routing(); 45 | void printStats(std::ofstream& out, unsigned int indent); //Print the stats of the component 46 | void printEnergy(std::ofstream& out, unsigned int indent); 47 | //void setConfiguration(Config cfg); 48 | DSNetworkStats getStats(); 49 | 50 | 51 | 52 | 53 | }; 54 | 55 | #endif 56 | -------------------------------------------------------------------------------- /omega-code/stonne/include/TemporalRN.h: -------------------------------------------------------------------------------- 1 | 2 | #ifndef __TEMPORALREDUCTIONNETWORK__H__ 3 | #define __TEMPORALREDUCTIONNETWORK__H__ 4 | 5 | #include "MSNetwork.h" 6 | #include "ASwitch.h" 7 | #include 8 | #include 9 | #include 10 | #include "types.h" 11 | #include "Config.h" 12 | #include "CompilerART.h" 13 | #include "ReduceNetwork.h" 14 | #include "AccumulationBuffer.h" 15 | 16 | 17 | 18 | class TemporalRN : public ReduceNetwork { 19 | private: 20 | unsigned int port_width; //Width in bits of each port 21 | std::vector inputconnectiontable; //Connections to the accumulation buffer 22 | std::vector outputconnectiontable; //Connection to the collection bus 23 | Connection* outputConnection; //Given by external 24 | AccumulationBuffer* accumulationBuffer; //Array of accumulators to perform the folding accumulation 25 | unsigned int accumulation_buffer_size; //Number of accumulation elements in the RN 26 | Config stonne_cfg; 27 | 28 | 29 | public: 30 | TemporalRN(id_t id, std::string name, Config stonne_cfg, Connection* output_connection); 31 | ~TemporalRN(); 32 | void setMemoryConnections(std::vector> memoryConnections); //Connect all the memory ports (busID, lineID) to its corresponding accumulator 33 | std::map getLastLevelConnections(); 34 | void setOutputConnection(Connection* outputConnection) { this->outputConnection = outputConnection; } //This function set the outputConnection with the Prefetch buffer 35 | void configureSignals(Tile* current_tile, DNNLayer* dnn_layer, unsigned int ms_size, unsigned int n_folding); 36 | void configureSparseSignals(std::vector sparseVNs, DNNLayer* dnn_layer, unsigned int ms_size); 37 | void resetSignals(); 38 | 39 | 40 | //Cycle function 41 | void cycle(); 42 | 43 | void printConfiguration(std::ofstream& out, unsigned int indent); 44 | void printStats(std::ofstream& out, unsigned int indent); 45 | void printEnergy(std::ofstream& out, unsigned int indent); 46 | 47 | 48 | }; 49 | 50 | #endif 51 | -------------------------------------------------------------------------------- /omega-code/stonne/src/CompilerMultiplierMesh.cpp: -------------------------------------------------------------------------------- 1 | #include "CompilerMultiplierMesh.h" 2 | #include "Tile.h" 3 | #include "utility.h" 4 | #include 5 | #include "types.h" 6 | #include 7 | #include "cpptoml.h" 8 | 9 | void CompilerMultiplierMesh::configureSignals(Tile* current_tile, DNNLayer* dnn_layer, unsigned int ms_rows, unsigned int ms_cols) { 10 | assert(current_tile->get_T_K() <= ms_cols); //Number of filters 11 | assert(current_tile->get_T_X_()*current_tile->get_T_Y_() <= ms_rows); //Number of conv windows 12 | this->current_tile = current_tile; 13 | this->dnn_layer = dnn_layer; 14 | this->ms_rows = ms_rows; 15 | this->ms_cols = ms_cols; 16 | this->signals_configured = true; 17 | //Configuring Multiplier switches 18 | this->generate_ms_signals(ms_rows, ms_cols); 19 | 20 | } 21 | 22 | void CompilerMultiplierMesh::configureSparseSignals(std::vector sparseVNs, DNNLayer* dnn_layer, unsigned int num_ms) { 23 | assert(false); //TPU implementation does not allow sprsity due to its rigit nature 24 | } 25 | 26 | 27 | void CompilerMultiplierMesh::generate_ms_signals(unsigned int ms_rows, unsigned int ms_cols) { 28 | unsigned int rows_used = this->current_tile->get_T_X_()*this->current_tile->get_T_Y_(); 29 | unsigned int cols_used = this->current_tile->get_T_K(); 30 | //Bottom and right signals 31 | for(int i=0; i ms_index(i,j); 34 | 35 | if((i < rows_used) && (j < cols_used)) { 36 | unsigned int VN = i*cols_used+j; 37 | ms_vn_configuration[ms_index]=VN; 38 | } 39 | 40 | if((i < (rows_used-1)) && (j < cols_used)) { 41 | forwarding_bottom_enabled[ms_index]=true; 42 | } 43 | 44 | else { 45 | forwarding_bottom_enabled[ms_index]=false; 46 | } 47 | 48 | if((j < (cols_used-1)) && (i < rows_used)) { 49 | forwarding_right_enabled[ms_index]=true; 50 | } 51 | 52 | else { 53 | forwarding_right_enabled[ms_index]=false; 54 | } 55 | } 56 | 57 | } 58 | 59 | } 60 | -------------------------------------------------------------------------------- /omega-code/stonne/include/MemoryController.h: -------------------------------------------------------------------------------- 1 | //Created by Francisco Munoz Martinez on 02/07/2019 2 | #ifndef __MEMORYCONTROLLER__H__ 3 | #define __MEMORYCONTROLLER__H__ 4 | 5 | #include 6 | #include "Tile.h" 7 | #include "Connection.h" 8 | #include "Fifo.h" 9 | #include "types.h" 10 | #include "DNNLayer.h" 11 | #include "Unit.h" 12 | #include "Config.h" 13 | #include "DataPackage.h" 14 | #include "Stats.h" 15 | #include 16 | #include "ReduceNetwork.h" 17 | #include "MultiplierNetwork.h" 18 | 19 | class MemoryController : Unit { 20 | public: 21 | MemoryController(id_t id, std::string name) : Unit(id, name){} 22 | virtual void setLayer(DNNLayer* dnn_layer, address_t input_address, address_t filter_address, address_t output_address, Dataflow dataflow) {assert(false);} 23 | virtual void setTile(Tile* current_tile) {assert(false);} 24 | virtual void setReadConnections(std::vector read_connections) {assert(false);} 25 | virtual void setWriteConnections(std::vector write_port_connections) {assert(false);} //All the write connections must be set at a time 26 | virtual void setSparseMetadata(metadata_address_t MK_metadata, metadata_address_t KN_metadata, metadata_address_t output_metadata) {assert(false);} 27 | //Used to configure the ReduceNetwork according to the controller if needed 28 | virtual void setReduceNetwork(ReduceNetwork* reduce_network) {assert(false);} 29 | //Used to configure the MultiplierNetwork according to the controller if needed 30 | virtual void setMultiplierNetwork(MultiplierNetwork* multiplier_network) {assert(false);} 31 | virtual void cycle() {assert(false);} 32 | virtual bool isExecutionFinished() {assert(false);} 33 | virtual void setDenseSpatialData(unsigned int T_N, unsigned int T_K){assert(false);} 34 | virtual void setSparseMatrixMetadata(metadata_address_t MK_metadata_id, metadata_address_t MK_metadata_pointer){assert(false);} 35 | 36 | virtual void setClocking(unsigned int* clocked_op){} 37 | 38 | virtual void printStats(std::ofstream& out, unsigned int indent) {assert(false);} 39 | virtual void printEnergy(std::ofstream& out, unsigned int indent) {assert(false);} 40 | virtual SDMemoryStats getStats() {assert(false);} 41 | }; 42 | 43 | 44 | #endif //SDMEMORY_H_ 45 | -------------------------------------------------------------------------------- /omega-code/stonne/include/DNNLayer.h: -------------------------------------------------------------------------------- 1 | // Created by Francisco Munoz Martinez on 02/07/2019 2 | 3 | #ifndef __DNN_LAYER__H 4 | #define __DNN_LAYER__H 5 | 6 | #include "types.h" 7 | #include 8 | 9 | class DNNLayer { 10 | private: 11 | Layer_t layer_type; 12 | std::string layer_name; // Layer name used to create the output file 13 | unsigned int R; // Number of Filter Rows 14 | unsigned int S; // Number of filter columns 15 | unsigned int C; // Number of filter and input channels 16 | unsigned int K; // Number of filters and output channels per group 17 | unsigned int G; // Number of grups 18 | unsigned int N; // Number of inputs (batch size) 19 | unsigned int X; // Number of input fmap rows 20 | unsigned int Y; // Number of input fmap columns 21 | unsigned int X_; // Number of output fmap rows 22 | unsigned int Y_; // Number of output fmap columns 23 | unsigned int strides; // Strides 24 | 25 | 26 | 27 | 28 | public: 29 | //K = Number of total filters in the network. C= Number of input channels (the whole feature map). G=Number of groups 30 | DNNLayer(Layer_t layer_type, std::string layer_name, unsigned int R, unsigned int S, unsigned int C, unsigned int K, unsigned int G, unsigned int N, unsigned int X, unsigned int Y, unsigned int strides); 31 | unsigned int get_R() const {return this->R;} 32 | unsigned int get_S() const {return this->S;} 33 | unsigned int get_C() const {return this->C;} 34 | unsigned int get_K() const {return this->K;} 35 | unsigned int get_G() const {return this->G;} 36 | unsigned int get_N() const {return this->N;} 37 | unsigned int get_X() const {return this->X;} 38 | unsigned int get_Y() const {return this->Y;} 39 | unsigned int get_X_() const {return this->X_;} 40 | unsigned int get_Y_() const {return this->Y_;} 41 | unsigned int get_strides() const {return this->strides;} 42 | std::string get_name() const {return this->layer_name;} 43 | Layer_t get_layer_type() const {return this->layer_type;} 44 | 45 | void printConfiguration(std::ofstream& out, unsigned int indent); 46 | }; 47 | 48 | #endif 49 | 50 | -------------------------------------------------------------------------------- /omega-code/stonne/src/Connection.cpp: -------------------------------------------------------------------------------- 1 | //Created 13/06/2019 2 | 3 | #include "Connection.h" 4 | #include 5 | #include 6 | 7 | using namespace std; 8 | 9 | Connection::Connection(int bw) { //Constructor 10 | this->bw = bw; //Maximum bw allowed in the connection 11 | this->current_capacity = 0; 12 | this->pending_data=false; 13 | } 14 | 15 | 16 | bool Connection::existPendingData() { 17 | return this->pending_data; 18 | } 19 | 20 | //Send a package to the interconnection. If there is no remaining bandiwth an exception is raised 21 | void Connection::send(vector data_p) { 22 | #ifdef DEBUG 23 | //Check the connection is not busy 24 | assert(pending_data==false); 25 | //Check there is enouth bandwidth. This case should not happen so if happens, an assert is raised. 26 | this->current_capacity=0; 27 | for(int i=0; iget_size_package() + this->current_capacity) <= this->bw ); 31 | this->current_capacity += current_package->get_size_package(); //Increasing the amount of data in the connection 32 | } 33 | #endif 34 | this->data = data_p; //list of pointers assignment. All the vectors are replicated to save a copy and track it. 35 | this->pending_data = true; 36 | 37 | //Tracking parameters 38 | this->connectionStats.n_sends+=1; 39 | return; 40 | } 41 | 42 | //Return the packages from the interconnection 43 | vector Connection::receive() { 44 | if(this->pending_data) { 45 | this->pending_data = false; 46 | return this->data; 47 | } 48 | //If there is no pending data 49 | data.clear(); //Set the list of elements to return to 0 50 | this->pending_data = false; 51 | 52 | //Tracking parameters 53 | this->connectionStats.n_receives+=1; 54 | 55 | return data; //Return empty list indicating that there is no data 56 | } 57 | 58 | void Connection::printEnergy(std::ofstream &out, unsigned int indent, std::string wire_type) { 59 | out << wire_type << " WRITE=" << connectionStats.n_sends; //Same line 60 | out << " READ=" << connectionStats.n_receives << std::endl; 61 | } 62 | 63 | 64 | 65 | 66 | -------------------------------------------------------------------------------- /omega-code/stonne/include/CompilerFEN.h: -------------------------------------------------------------------------------- 1 | #ifndef _COMPILER_FEN_h_ 2 | #define _COMPILER_FEN_h_ 3 | 4 | #include 5 | #include 6 | #include 7 | #include "Tile.h" 8 | #include 9 | #include "CompilerComponent.h" 10 | 11 | /* This class configure the signals for an ANEtwork following the steps presented in MAERI Paper. */ 12 | class CompilerFEN : public CompilerComponent { 13 | private: 14 | // Tile* current_tile; 15 | // unsigned int num_ms; 16 | // bool signals_configured; 17 | 18 | //Aux struct data to store the signals 19 | std::map, adderconfig_t> switches_configuration; //Adders configuration 20 | std::map, fl_t> fwlinks_configuration; //Indicates for each adder if has connection with the neighbour 21 | std::map, std::pair> childs_enabled; //Indicates for each adder whether its child is enabled or not. 22 | std::map, bool> forwarding_to_memory_enabled; //Indicates for each adder whether the forwarding_to_memory link is enabled or not. 23 | std::map, bool> forwarding_to_fold_node_enabled; //Indicates to each affer whether the forwarding_to_next_node link is enabled or not 24 | 25 | //Aux functions 26 | void generate_fen_signals(unsigned int num_ms); //Generate the signals for the Adder swithces 27 | void generate_fen_enabling_links(unsigned int num_ms); //Generate the signals for the Adder switches 28 | 29 | 30 | 31 | 32 | 33 | 34 | public: 35 | 36 | CompilerFEN() { 37 | current_tile = NULL; 38 | signals_configured = false; 39 | dnn_layer=NULL; 40 | } 41 | void configureSignals(Tile* current_tile, DNNLayer* dnn_layer, unsigned int num_ms, unsigned int n_folding); //Print the stats of the component 42 | Tile* getTile() {assert(signals_configured); return this->current_tile;} 43 | 44 | //Get the signals 45 | std::map, adderconfig_t> get_switches_configuration() const {return this->switches_configuration;} 46 | std::map, fl_t> get_fwlinks_configuration() const {return this->fwlinks_configuration;} 47 | // Indicates for each as (level, id) which one of their childs links are enabled 48 | // Position 0 of the pair: child left 49 | // Position 1 of the pair: child right 50 | std::map, std::pair> get_childs_enabled() const {return this->childs_enabled;} 51 | std::map, bool> get_forwarding_to_memory_enabled() const {return this->forwarding_to_memory_enabled;} 52 | std::map, bool> get_forwarding_to_fold_node_enabled() const {return this->forwarding_to_fold_node_enabled;} 53 | }; 54 | 55 | 56 | #endif 57 | -------------------------------------------------------------------------------- /omega-code/stonne/include/CompilerMSN.h: -------------------------------------------------------------------------------- 1 | #ifndef _COMPILER_MSN_h_ 2 | #define _COMPILER_MSN_h_ 3 | 4 | #include 5 | #include 6 | #include 7 | #include "Tile.h" 8 | #include 9 | #include "CompilerComponent.h" 10 | 11 | /* This class configure the signals for an ANEtwork following the steps presented in MAERI Paper. */ 12 | class CompilerMSN : public CompilerComponent{ 13 | private: 14 | 15 | //Aux struct data to store the signals 16 | //Multiplier signals 17 | std::map ms_vn_configuration; //Virtual neuron of each MS configuration 18 | std::map ms_fwsend_enabled; //Indicates for each MS if must send data to the fw link (MS LEFT) 19 | std::map ms_fwreceive_enabled; //Indicates for each MS if must receive data from the fw link (MS RIGHT) 20 | std::map forwarding_psum_enabled; //Indicates if the MS has to forward psums or otherwise has to act as a normal multiplier. 21 | std::map direct_forwarding_psum_enabled; //Indicates if the MS has to forward psums WITHOUT any control. 22 | std::map n_folding_configuration; //Indicates the number of folds that each MS is going to perform 23 | 24 | //Aux functions 25 | void generate_ms_signals(unsigned int num_ms); //The function in charge to generate the signals for the MSwitches 26 | void generate_ms_sparse_signals(unsigned int num_ms); //Generate signals for the MSwitches taking into account the different size clusters. 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | public: 35 | 36 | CompilerMSN() { 37 | current_tile = NULL; 38 | signals_configured = false; 39 | dnn_layer=NULL; 40 | } 41 | void configureSignals(Tile* current_tile, DNNLayer* dnn_layer, unsigned int num_ms, unsigned int n_folging) ; 42 | void configureSparseSignals(std::vector sparseVNs, DNNLayer* dnn_layer, unsigned int num_ms); 43 | Tile* getTile() {assert(signals_configured); return this->current_tile;} 44 | 45 | //Get the signals 46 | std::map get_ms_vn_configuration() const {return this->ms_vn_configuration;} 47 | std::map get_ms_fwsend_enabled() const {return this->ms_fwsend_enabled;} 48 | std::map get_ms_fwreceive_enabled() const {return this->ms_fwreceive_enabled;} 49 | std::map get_forwarding_psum_enabled() const {return this->forwarding_psum_enabled;} 50 | std::map get_direct_forwarding_psum_enabled() const {return this->direct_forwarding_psum_enabled;} 51 | std::map get_n_folding_configuration() const {return this->n_folding_configuration;} 52 | 53 | }; 54 | 55 | 56 | #endif 57 | -------------------------------------------------------------------------------- /omega-code/stonne/include/OSMeshMN.h: -------------------------------------------------------------------------------- 1 | // 2 | // Created by Francisco Munoz on 17/06/19. 3 | // 4 | 5 | #ifndef __OSMeshMN__H__ 6 | #define __OSMeshMN__H__ 7 | 8 | 9 | #include "DSNetwork.h" 10 | #include "Connection.h" 11 | #include "MultiplierOS.h" 12 | #include "DSwitch.h" 13 | #include "Unit.h" 14 | #include 15 | #include "CompilerMultiplierMesh.h" 16 | #include "Tile.h" 17 | #include "DNNLayer.h" 18 | #include "MultiplierNetwork.h" 19 | 20 | #include 21 | 22 | class OSMeshMN : public MultiplierNetwork{ 23 | private: 24 | std::map, MultiplierOS* > mswitchtable; 25 | std::map, Connection*> verticalconnectiontable; //Table with the vertical connections 26 | std::map, Connection*> horizontalconnectiontable; //Table with the horizontal connections 27 | std::map, Connection*> accbufferconnectiontable; //Table with the accbuff connections 28 | unsigned int ms_rows; // Number of rows in the ms array 29 | unsigned int ms_cols; // Number of columns in the ms array 30 | unsigned int forwarding_ports; //MSNetwork needs this parameter to create the network 31 | unsigned int buffers_capacity; //Capacity of the buffers in the MSwitches. This is neccesary to check if it is feasible to manage the folding. 32 | unsigned int port_width; //Not used yet 33 | 34 | void setPhysicalConnection(); //Create the links 35 | std::map, MultiplierOS*> getMSwitches(); 36 | //std::map, Connection*> getTopConnections(); //Return the connections 37 | 38 | 39 | 40 | public: 41 | /* 42 | By the default the implementation of the MS just receives a single element, calculate a single psum and/or send a single input activation to the neighbour. This way, the parameters 43 | input_ports, output_ports and forwarding_ports will be set as the single data size. If this implementation change for future tests, this can be change easily bu mofifying these three parameters. 44 | */ 45 | OSMeshMN(id_t id, std::string name, Config stonne_cfg); 46 | ~OSMeshMN(); 47 | //set connections from the distribution network to the multiplier network 48 | void setInputConnections(std::map input_connections); 49 | //Set connections from the Multiplier Network to the Reduction Network 50 | void setOutputConnections(std::map output_connections); 51 | void cycle(); 52 | void configureSignals(Tile* current_tile, DNNLayer* dnn_layer, unsigned int ms_size, unsigned int n_folding); 53 | void configureSparseSignals(std::vector sparseVNs, DNNLayer* dnn_layer, unsigned int ms_size); 54 | void resetSignals(); 55 | void printConfiguration(std::ofstream& out, unsigned int indent); 56 | void printStats(std::ofstream &out, unsigned int indent); 57 | void printEnergy(std::ofstream& out, unsigned int indent); 58 | }; 59 | #endif 60 | -------------------------------------------------------------------------------- /omega-code/stonne/include/CompilerART.h: -------------------------------------------------------------------------------- 1 | #ifndef _COMPILER_ART_h_ 2 | #define _COMPILER_ART_h_ 3 | 4 | #include 5 | #include 6 | #include 7 | #include "Tile.h" 8 | #include 9 | #include "CompilerComponent.h" 10 | 11 | /* This class configure the signals for an ANEtwork following the steps presented in MAERI Paper. */ 12 | class CompilerART : public CompilerComponent { 13 | private: 14 | // Tile* current_tile; 15 | // unsigned int num_ms; 16 | // bool signals_configured; 17 | 18 | //Aux struct data to store the signals 19 | std::map, adderconfig_t> switches_configuration; //Adders configuration 20 | std::map, fl_t> fwlinks_configuration; //Indicates for each adder if has connection with the neighbour 21 | std::map, std::pair> childs_enabled; //Indicates for each adder whether its child is enabled or not. 22 | std::map, bool> forwarding_to_memory_enabled; //Indicates for each adder whether the forwarding_to_memory link is enabled or not. 23 | 24 | //Aux functions 25 | void generate_art_signals(unsigned int num_ms); //Generate the signals for the Adder swithces 26 | void generate_art_enabling_links(unsigned int num_ms); //Generate the signals for the Adder switches 27 | 28 | //TODO. We can try to merge both sparse a non sparse. We do this now to keep the same interface 29 | //Aux sparse functions 30 | void generate_art_signals_sparse(unsigned int num_ms); //Generate the signals for the Adder swithces in sparse mode (different VN sizes) 31 | void generate_art_enabling_links_sparse(unsigned int num_ms); //Generate the signals for the Adder switches in sparse mode (different VN sizes) 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | public: 40 | 41 | CompilerART() { 42 | current_tile = NULL; 43 | signals_configured = false; 44 | dnn_layer=NULL; 45 | } 46 | void configureSignals(Tile* current_tile, DNNLayer* dnn_layer, unsigned int num_ms, unsigned int n_folding); //Print the stats of the component 47 | void configureSparseSignals(std::vector sparseVNs, DNNLayer* dnn_layer, unsigned int num_ms); 48 | Tile* getTile() {assert(signals_configured); return this->current_tile;} 49 | 50 | //Get the signals 51 | std::map, adderconfig_t> get_switches_configuration() const {return this->switches_configuration;} 52 | std::map, fl_t> get_fwlinks_configuration() const {return this->fwlinks_configuration;} 53 | // Indicates for each as (level, id) which one of their childs links are enabled 54 | // Position 0 of the pair: child left 55 | // Position 1 of the pair: child right 56 | std::map, std::pair> get_childs_enabled() const {return this->childs_enabled;} 57 | std::map, bool> get_forwarding_to_memory_enabled() const {return this->forwarding_to_memory_enabled;} 58 | }; 59 | 60 | 61 | #endif 62 | -------------------------------------------------------------------------------- /omega-code/stonne/src/DataPackage.cpp: -------------------------------------------------------------------------------- 1 | //Created 13/06/2019 2 | 3 | #include "DataPackage.h" 4 | #include 5 | #include 6 | 7 | //General constructor implementation 8 | 9 | DataPackage::DataPackage(size_t size_package, data_t data, operand_t data_type, id_t source) { 10 | this->size_package = size_package; 11 | this->data = data; 12 | this->data_type =data_type; 13 | this->source = source; 14 | this->traffic_type = UNICAST; //Default 15 | } 16 | 17 | DataPackage::DataPackage(size_t size_package, data_t data, operand_t data_type, id_t source,traffic_t traffic_type) : DataPackage(size_package, data, data_type, source) { 18 | this->traffic_type = traffic_type; 19 | this->dests = NULL; 20 | } 21 | // Unicast package constructor. 22 | DataPackage::DataPackage(size_t size_package, data_t data, operand_t data_type, id_t source,traffic_t traffic_type, unsigned int unicast_dest) : 23 | DataPackage(size_package, data, data_type, source, traffic_type) { 24 | assert(traffic_type == UNICAST); 25 | this->unicast_dest = unicast_dest; 26 | } 27 | //Multicast package. dests must be dynamic memory since the array is not copied. 28 | DataPackage::DataPackage(size_t size_package, data_t data, operand_t data_type, id_t source,traffic_t traffic_type, bool* dests, unsigned int n_dests) : DataPackage(size_package, data, data_type, source, traffic_type) { 29 | this->dests = dests; 30 | this->n_dests = n_dests; 31 | } 32 | 33 | //psum package 34 | DataPackage::DataPackage(size_t size_package, data_t data, operand_t data_type, id_t source, unsigned int VN, adderoperation_t operation_mode): DataPackage(size_package, data, data_type, source) { 35 | this->VN = VN; 36 | this->operation_mode = operation_mode; 37 | } 38 | 39 | void DataPackage::setOutputPort(unsigned int output_port) { 40 | this->output_port = output_port; 41 | } 42 | 43 | void DataPackage::setIterationK(unsigned int iteration_k) { 44 | this->iteration_k = iteration_k; 45 | } 46 | 47 | //Copy constructor 48 | DataPackage::DataPackage(DataPackage* pck) { 49 | this->size_package = pck->get_size_package(); 50 | this->data = pck->get_data(); 51 | this->data_type = pck->get_data_type(); 52 | this->source = pck->get_source(); 53 | this->traffic_type = pck->get_traffic_type(); 54 | this->unicast_dest = pck->get_unicast_dest(); 55 | this->VN = pck->get_vn(); 56 | this->operation_mode = pck->get_operation_mode(); 57 | this->output_port = output_port; 58 | this->iteration_k=pck->getIterationK(); 59 | if(this->traffic_type == MULTICAST) { 60 | this->n_dests = pck->get_n_dests(); 61 | const bool* prev_pck_dests = pck->get_dests(); 62 | this->dests = new bool[this->n_dests]; 63 | //for(int i=0; idests[i]=prev_pck_dests[i]; 65 | //} 66 | memcpy(this->dests, prev_pck_dests, sizeof(bool)*this->n_dests); 67 | 68 | } 69 | 70 | } 71 | 72 | DataPackage::~DataPackage() { 73 | 74 | if(this->traffic_type==MULTICAST) { 75 | delete[] dests; 76 | } 77 | } 78 | 79 | 80 | -------------------------------------------------------------------------------- /omega-code/stonne/include/Tile.h: -------------------------------------------------------------------------------- 1 | //Created by Francisco Munoz Martinez on 26/06/2019 2 | 3 | #ifndef __TILE__H 4 | #define __TILE__H 5 | 6 | #include 7 | #include "types.h" 8 | #include 9 | 10 | 11 | /* 12 | * This class represents a sparse cluster mapped onto the architecture. 13 | * Basically, size is the number of multipliers needed, and folding indicates if this cluster 14 | * requires an extra multiplier to act as a forwarder. 15 | */ 16 | class SparseVN { 17 | private: 18 | unsigned int size; 19 | bool folding; 20 | 21 | public: 22 | SparseVN(unsigned int size, bool folding) {this->size=size; this->folding=folding;} 23 | unsigned int get_VN_Size() {if(this->folding) {return this->size+1;} else {return this->size;}} 24 | bool getFolding() {return this->folding;} 25 | }; 26 | 27 | 28 | 29 | 30 | /* 31 | This class represent a tile 32 | */ 33 | 34 | 35 | class Tile { 36 | private: 37 | unsigned int T_R; // Number of filter rows 38 | unsigned int T_S; // Number of filter columns 39 | unsigned int T_C; // Number of input and filter channels 40 | unsigned int T_K; // Number of filters and number of ofmap channels per group 41 | unsigned int T_G; // Number of groups 42 | unsigned int T_N; // Batch size 43 | unsigned int T_X_; // Number of output fmap rows 44 | unsigned int T_Y_; // Number of output fmap columns 45 | unsigned int VN_Size; // Virtual Neuron Size (i.e., T_R*T_S*T_C) 46 | unsigned int Num_VNs; // Number of Virtual Neurons (i.e., T_K*T_N*T_X_*T_Y_) 47 | bool folding; // T_R x T_S x T_C < R*S*C. Neccesary to generate the signals 48 | 49 | 50 | public: 51 | Tile(unsigned int T_R, unsigned int T_S, unsigned int T_C, unsigned int T_K, unsigned int T_G, unsigned int T_N, unsigned int T_X_, unsigned int T_Y_, bool folding); //Used by the architecture 52 | 53 | Tile(std::string tile_file); //Used by some external front-end to get the tile values from an input file. 54 | 55 | //Signals generation 56 | void generate_signals(int num_ms); 57 | 58 | //Getters 59 | unsigned int get_T_R() const {return this->T_R;} 60 | unsigned int get_T_S() const {return this->T_S;} 61 | unsigned int get_T_C() const {return this->T_C;} 62 | unsigned int get_T_K() const {return this->T_K;} 63 | unsigned int get_T_G() const {return this->T_G;} 64 | unsigned int get_T_N() const {return this->T_N;} 65 | unsigned int get_T_X_() const {return this->T_X_;} 66 | unsigned int get_T_Y_() const {return this->T_Y_;} 67 | unsigned int get_VN_Size() const {return this->VN_Size;} 68 | unsigned int get_Num_VNs() const {return this->Num_VNs;} 69 | 70 | 71 | bool get_folding_enabled() const {return this->folding;} //Return whether this tile implies folding for the current configured network 72 | 73 | void printConfiguration(std::ofstream& out, unsigned int indent); 74 | 75 | 76 | 77 | }; 78 | 79 | 80 | 81 | 82 | #endif 83 | -------------------------------------------------------------------------------- /omega-code/stonne/include/utility.h: -------------------------------------------------------------------------------- 1 | 2 | #ifndef UTILITY_H_ 3 | #define UTILITY_H_ 4 | 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include "types.h" 11 | 12 | bool isNum(std::string str); 13 | 14 | bool ispowerof2(unsigned int x); 15 | 16 | std::string getstr(std::istringstream& instr); 17 | 18 | std::string get_string_adder_configuration(adderconfig_t config); 19 | 20 | std::string get_string_fwlink_direction(fl_t fl_direction); 21 | 22 | std::string ind(unsigned int indent); //Get a string with as many spaces as indent value. 23 | 24 | std::string get_string_reduce_network_type(ReduceNetwork_t reduce_network_type); 25 | 26 | ReduceNetwork_t get_type_reduce_network_type(std::string reduce_network_type); 27 | 28 | MemoryController_t get_type_memory_controller_type(std::string memory_controller_type); 29 | 30 | std::string get_string_memory_controller_type(MemoryController_t memory_controller_type); 31 | 32 | std::string get_string_multiplier_network_type(MultiplierNetwork_t multiplier_network_type); 33 | 34 | MultiplierNetwork_t get_type_multiplier_network_type(std::string multiplier_network_type); 35 | 36 | MemoryHierarchy_t get_type_memory_hierarchy_type(std::string memory_hierarchy_type); 37 | 38 | std::string get_string_memory_hierarchy_type(MemoryHierarchy_t memory_hierarchy_type); 39 | 40 | Dataflow get_type_dataflow_type(std::string dataflow_type); 41 | 42 | std::string get_string_dataflow_type(Dataflow dataflow); 43 | 44 | float* generateMatrixDense(unsigned int rows, unsigned int cols, float sparsity); 45 | 46 | float* generateMatrixDenseSampled(unsigned int rows, unsigned int cols, int nbr); 47 | 48 | unsigned int* generateBitMapFromDense(float* denseMatrix, unsigned int rows, unsigned int cols, GENERATION_TYPE gen_type); 49 | 50 | 51 | float* generateMatrixSparseFromDenseNoBitmap(float* denseMatrix, unsigned int rows, unsigned int cols, GENERATION_TYPE gen_type) ; 52 | float* generateMatrixSparseFromDense(float* denseMatrix, unsigned int* bitmap, unsigned int rows, unsigned int cols, GENERATION_TYPE gen_type); 53 | 54 | ///// 55 | int* generateMinorIDFromDense(float* denseMatrix, unsigned int rows, unsigned int cols, int &nnz, GENERATION_TYPE gen_type); 56 | ///// 57 | //int* generateMajorIDFromDense(float* denseMatrix, unsigned int rows, unsigned int cols, GENERATION_TYPE gen_type); 58 | ///// 59 | int* generateMajorPointerFromDense(float* denseMatrix, unsigned int rows, unsigned int cols, GENERATION_TYPE gen_type); 60 | 61 | void printDenseMatrix(float* matrix, unsigned int rows, unsigned int cols); 62 | 63 | void printBitMap(unsigned int* bitmap, unsigned int rows, unsigned int cols); 64 | 65 | void printSparseMatrix(float* sparseMatrix, unsigned int* bitmap, unsigned int rows, unsigned int cols); 66 | 67 | float* generatePrunnedMatrix(const float* src_matrix, unsigned int size, float pr_ratio); 68 | 69 | //Opt functions 70 | void organizeMatrix (float* matrix, unsigned int M, unsigned int K, unsigned int* pointer_table, GENERATION_TYPE gen_type); 71 | 72 | void organizeMatrixBack (float* matrix, unsigned int M, unsigned int K, unsigned int* pointer_table, GENERATION_TYPE gen_type); 73 | 74 | unsigned int* calculateOrdering (float* matrix, unsigned int M, unsigned int K, GENERATION_TYPE gen_type, int num_ms); 75 | 76 | #endif //UTILITY_H 77 | -------------------------------------------------------------------------------- /omega-code/stonne/outputs/nuevos_tests/README.txt: -------------------------------------------------------------------------------- 1 | Hola Antonio, 2 | 3 | aqui te resumo las ejecuciones que he realizado para la generacion de los JSON 4 | 5 | Execution_4_VNs_32_mswitches.txt: 6 | 7 | - Comando usado: ./stonne -R=2 -S=2 -C=2 -K=32 -G=1 -N=1 -X=8 -Y=8 -T_R=2 -T_S=2 -T_C=2 -T_K=4 -T_G=1 -T_N=1 -T_X_=1 -T_Y_=1 8 | -num_ms=32 -dn_bw=8 -rn_bw=8. 9 | 10 | - Numero de multiplicadores: 32 11 | 12 | - Numero de neuronas virtuales: 4 13 | 14 | - Notas extras: Se usan todos los multiplicadores, repartidos en 4 neuronas virutales de 8 de tamanio cada uno. 15 | 16 | 17 | Execution_2_VNs_32_mswitches.txt: 18 | 19 | - Comando usado: ./stonne -R=2 -S=2 -C=2 -K=32 -G=1 -N=1 -X=8 -Y=8 -T_R=2 -T_S=2 -T_C=2 -T_K=2 -T_G=1 -T_N=1 20 | -T_X_=1 -T_Y_=1 -num_ms=32 -dn_bw=8 -rn_bw=8 21 | 22 | - Numero de multiplicadores: 32 23 | 24 | - Numero de neuronas virtuales: 2 25 | 26 | - Notas extra: Hay 32 multiplicadores pero solo 16 estan en uso por medio de 2 neuronas virtuales de 8 cada una. 27 | 28 | Execution_2_larger_VNs_32_mswitches.txt: 29 | 30 | - Comando usado: /stonne -R=2 -S=2 -C=4 -K=32 -G=1 -N=1 -X=8 -Y=8 -T_R=2 -T_S=2 -T_C=4 -T_K=2 -T_G=1 31 | -T_N=1 -T_X_=1 -T_Y_=1 -num_ms=32 -dn_bw=8 -rn_bw=8 32 | 33 | - Numero de multiplicadores: 32 34 | 35 | - Numero de neuronas virtuales: 2 36 | 37 | - Notas extra: Esta vez solo hay 2 neuronas virtuales, pero cada una tiene tamanio 16, por lo que se usan todos los multiplicadores 38 | 39 | Execution_2_VNs_64_mswitches.txt: 40 | 41 | - Comando usado: ./stonne -R=2 -S=2 -C=8 -K=32 -G=1 -N=1 -X=8 -Y=8 -T_R=2 -T_S=2 42 | -T_C=8 -T_K=2 -T_G=1 -T_N=1 -T_X_=1 -T_Y_=1 -num_ms=64 -dn_bw=8 -rn_bw=8 43 | 44 | - Numero de multiplicadores: 64 45 | 46 | - Numero de neuronas virtuales: 2 47 | 48 | - Notas extra: Usamos 2 neuronas virtuales de 32 cada una, usando todos los multiplicadores 49 | 50 | 51 | Execution_8_VNs_64_mswitches.txt: 52 | 53 | - Comando usado: ./stonne -R=2 -S=2 -C=2 -K=32 -G=1 -N=1 -X=8 -Y=8 -T_R=2 -T_S=2 54 | -T_C=2 -T_K=8 -T_G=1 -T_N=1 -T_X_=1 -T_Y_=1 -num_ms=64 -dn_bw=8 -rn_bw=8 55 | 56 | - Numero de multiplicadores: 64 57 | 58 | - Numero de neuronas virtuales: 8 59 | 60 | - Notas extra: Usamos 8 neuronas virtuales de 8 multiplicadores cada una, usando asi todos los multiplicadores disponibles. 61 | 62 | Execution_2_VNs_128_mswitches.txt: 63 | 64 | - Comando usado: ./stonne -R=2 -S=2 -C=16 -K=32 -G=1 -N=1 -X=8 -Y=8 -T_R=2 -T_S=2 -T_C=16 65 | -T_K=2 -T_G=1 -T_N=1 -T_X_=1 -T_Y_=1 -num_ms=128 -dn_bw=8 -rn_bw=8 66 | 67 | - Numero de multiplicadores: 128 68 | 69 | - Numero de neuronas virtuales: 2 70 | 71 | - Notas extra: Se usan dos neuronas virtuales de 64 multiplicadores cada una, usando tods los multiplicadores disponibles. 72 | 73 | 74 | Execution Execution_4_VNs_SIZE_27_128_mswitches.txt: 75 | 76 | - Comando usado: ./stonne -R=3 -S=3 -C=3 -K=32 -G=1 -N=1 -X=8 -Y=8 -T_R=3 -T_S=3 -T_C=3 77 | -T_K=4 -T_G=1 -T_N=1 -T_X_=1 -T_Y_=1 -num_ms=128 -dn_bw=8 -rn_bw=8 78 | 79 | - Numero de multiplicadores: 128 80 | 81 | - Numero de neuronas virtuales: 4 82 | 83 | - Notas extra: WARNING, se usan 4 neuronas virtuales de 27 multiplicadores cada una, usando 108 totales y dejando libres 20. CUIDADO, porque esta ejecucion usa los enlaces forwarding links entre los Adders de un mismo nivel. Los enlaces ESPECIALES. 84 | 85 | 86 | 87 | -------------------------------------------------------------------------------- /omega-code/stonne/src/CollectionBus.cpp: -------------------------------------------------------------------------------- 1 | //Created by Francisco Munoz on 28/02/2019 2 | #include 3 | #include "CollectionBus.h" 4 | #include "utility.h" 5 | 6 | Bus::Bus(id_t id, std::string name, Config stonne_cfg) : Unit(id, name) { 7 | this->n_bus_lines=stonne_cfg.m_SDMemoryCfg.n_write_ports; 8 | this->input_ports_bus_line=(stonne_cfg.m_MSNetworkCfg.ms_size / this->n_bus_lines)+ 1; 9 | this->connection_width = stonne_cfg.m_SDMemoryCfg.port_width; 10 | this->fifo_size = 100; //TODO 11 | for(int i=0; iinput_ports_bus_line, this->connection_width, this->fifo_size); 14 | collection_bus_lines.push_back(busline); 15 | } 16 | } 17 | 18 | Bus::~Bus() { 19 | for(int i=0; i> Bus::getInputConnections() { 25 | std::vector> connections; 26 | for(int i=0; i connections_collection_bus = collection_bus->getInputConnections(); 29 | connections.push_back(connections_collection_bus); //Setting the connections for that busID 30 | } 31 | return connections; 32 | } 33 | 34 | Connection* Bus::getInputConnectionFromBusLine(unsigned int busID, unsigned int inputID) { 35 | return this->collection_bus_lines[busID]->getInputPort(inputID); 36 | } 37 | 38 | std::vector Bus::getOutputConnections() { 39 | std::vector output_connections; 40 | for(int i=0; igetOutputPort()); 42 | } 43 | return output_connections; 44 | } 45 | 46 | void Bus::cycle() { 47 | for(int i=0; icycle(); 49 | } 50 | } 51 | 52 | void Bus::printStats(std::ofstream &out, unsigned int indent) { 53 | out << ind(indent) << "\"CollectionBusStats\" : {" << std::endl; 54 | //out << ind(indent+IND_SIZE) << "\"ms_size\" : " << this->ms_size << std::endl; DSNetwork global parameters 55 | out << ind(indent+IND_SIZE) << "\"CollectionBusLineStats\" : [" << std::endl; //One entry per BusLine 56 | for(int i=0; i < this->collection_bus_lines.size(); i++) { 57 | collection_bus_lines[i]->printStats(out, indent+IND_SIZE+IND_SIZE); 58 | if(i==(this->collection_bus_lines.size()-1)) { //If I am in the last BusLine, the comma to separate the BusLines is not added 59 | out << std::endl; //This is added because the call to ms print do not show it (to be able to put the comma, if neccesary) 60 | } 61 | else { 62 | out << "," << std::endl; 63 | } 64 | 65 | 66 | 67 | } 68 | out << ind(indent+IND_SIZE) << "]" << std::endl; 69 | out << ind(indent) << "}"; 70 | 71 | } 72 | 73 | void Bus::printEnergy(std::ofstream& out, unsigned int indent) { 74 | /* 75 | This component prints the counters for each bus line 76 | */ 77 | 78 | for(int i=0; i < this->collection_bus_lines.size(); i++) { 79 | collection_bus_lines[i]->printEnergy(out, indent); 80 | } 81 | } 82 | -------------------------------------------------------------------------------- /omega-code/stonne/include/DSwitch.h: -------------------------------------------------------------------------------- 1 | //Created 13/06/2019 2 | 3 | #ifndef __DSwitch__h 4 | #define __DSwitch__h 5 | 6 | #include "types.h" 7 | #include "DataPackage.h" 8 | #include "Connection.h" 9 | #include "Unit.h" 10 | #include "Config.h" 11 | #include 12 | #include "Stats.h" 13 | /* 14 | */ 15 | 16 | class DSwitch : public Unit{ 17 | private: 18 | unsigned int level; //Level where the switch is set in the tree 19 | unsigned int num_in_level; 20 | unsigned int num_ms; //These three parameters are for routing. In hardware it is not neccesary since it is used a bit vector 21 | bool pending_data; // Indicates if data exists 22 | unsigned int input_ports; // Number of input ports in the DSwitch 23 | unsigned int output_ports; //Number of output ports in the DSwitch 24 | unsigned int port_width; 25 | std::vector data; // Array of packages that are send/receive in a certain cycle. The number of packages depends on the bw of the connection. Even though the switches are bufferless, this is prepared for future implementations. In the first case in which the switches are bufferless, 26 | //in every cycle the elements will be writen in the array and read right after. 27 | unsigned int current_capacity; // the capacity must not exceed the input bw of the connection 28 | Connection* leftConnection; // This is the left connection of the switch 29 | Connection* rightConnection; // This is the right connection of the switch 30 | Connection* inputConnection; 31 | latency_t latency; 32 | 33 | ///Aux functions 34 | void route_packages(); // Used to send the packages depending on the type (BROADCAST, UNICAST or MULTICAST) 35 | 36 | //DEBUG PARAMETERS 37 | unsigned long time_routing; 38 | //unsigned long time_receive; 39 | //unsigned long time_send; 40 | DSwitchStats dswitchStats; //contains the counters to track the behaviour of the DSwitch 41 | 42 | 43 | public: 44 | //Since input_ports and output_ports depends on the level of the tree, this cannot be a configuring parameter and has to be set at the moment of creating the network 45 | DSwitch(id_t id, std::string name, unsigned int level, unsigned int num_in_level, Config stonne_cfg, unsigned int ms_size); //Output bandwidth is the bw per branch 46 | DSwitch(id_t id, std::string name, unsigned int level, unsigned int num_in_level, Config stonne_cfg, unsigned int ms_size, Connection* leftConnection, Connection* rightConnection, Connection* inputConnection); 47 | void setLeftConnection(Connection* leftConnection); //Set the left connection of the switch 48 | void setRightConnection(Connection* rightConnection); //Set the right connection of the switch 49 | void setInputConnection(Connection* inputConnection); //Set the input connection of the switch 50 | const unsigned int getInputPorts() const {return this->input_ports;} //Get the input ports 51 | const unsigned int getOutputPorts() const {return this->output_ports;} //get the output ports 52 | void send(std::vector data, Connection* connection); //Packages of data to be send depending on routing. 53 | void receive(); //Receive a list of packages from the Inputconnection and save it in this->data 54 | void cycle(); 55 | void printStats(std::ofstream& out, unsigned int indent); 56 | void printEnergy(std::ofstream& out, unsigned int indent); 57 | unsigned long get_time_routing() const {return this->time_routing;} 58 | DSwitchStats getStats() {return this->dswitchStats;} 59 | 60 | 61 | }; 62 | 63 | #endif 64 | 65 | -------------------------------------------------------------------------------- /omega-code/stonne/include/MSNetwork.h: -------------------------------------------------------------------------------- 1 | // 2 | // Created by Francisco Munoz on 17/06/19. 3 | // 4 | 5 | #ifndef __MSNETWORK__H__ 6 | #define __MSNETWORK__H__ 7 | 8 | #include "DSNetwork.h" 9 | #include "Connection.h" 10 | #include "MSwitch.h" 11 | #include "DSwitch.h" 12 | #include "Unit.h" 13 | #include 14 | #include "CompilerMSN.h" 15 | #include "Tile.h" 16 | #include "DNNLayer.h" 17 | #include "MultiplierNetwork.h" 18 | #include "Stats.h" 19 | 20 | #include 21 | 22 | class MSNetwork : public MultiplierNetwork{ 23 | private: 24 | std::map mswitchtable; //Table with the multiplier switches 25 | std::map fwconnectiontable; //Table with the forwarding connections. Each position is the input for the MS in that certain position. 26 | // The connections between the DS and the RS is not needed. Once connected by the external MAERi object, the MSwitches and DSwitches communicate 27 | //each other 28 | unsigned int ms_size; // Number of multipliers 29 | unsigned int forwarding_ports; //MSNetwork needs this parameter to create the network 30 | unsigned int buffers_capacity; //Capacity of the buffers in the MSwitches. This is neccesary to check if it is feasible to manage the folding. 31 | void virtualNetworkConfig(std::map vn_conf); //set the VN of each MS 32 | void fwLinksConfig(std::map ms_fwsend_enabled, std::map ms_fwreceive_enabled); //Set to each MS if it must receive and/or send data from/to the fw link 33 | void forwardingPsumConfig(std::map forwarding_psum_enabled); //Set to each MS if it has to act as a normal multiplier or an extra MS to accumulate psums 34 | 35 | void directForwardingPsumConfig(std::map direct_forwarding_psum_enabled); //Same as previous one, but the forwarding is always carried out without control 36 | 37 | void setPhysicalConnection(); //Create the forwarding links in this MSNetwork 38 | void nWindowsConfig(unsigned int n_windows); 39 | void nFoldingConfig(std::map n_folding_configuration); //Set number of folds for each MS 40 | std::map getMSwitches(); 41 | std::map getForwardingConnections(); //Return the connections 42 | 43 | 44 | public: 45 | /* 46 | By the default the implementation of the MS just receives a single element, calculate a single psum and/or send a single input activation to the neighbour. This way, the parameters 47 | input_ports, output_ports and forwarding_ports will be set as the single data size. If this implementation change for future tests, this can be change easily bu mofifying these three parameters. 48 | */ 49 | MSNetwork(id_t id, std::string name, Config stonne_cfg); 50 | ~MSNetwork(); 51 | //set connections from the distribution network to the multiplier network 52 | void setInputConnections(std::map input_connections); 53 | //Set connections from the Multiplier Network to the Reduction Network 54 | void setOutputConnections(std::map output_connections); 55 | void cycle(); 56 | void configureSignals(Tile* current_tile, DNNLayer* dnn_layer, unsigned int ms_size, unsigned int n_folding); 57 | void configureSparseSignals(std::vector sparseVNs, DNNLayer* dnn_layer, unsigned int ms_size); 58 | void resetSignals(); 59 | void printConfiguration(std::ofstream& out, unsigned int indent); 60 | void printStats(std::ofstream &out, unsigned int indent); 61 | void printEnergy(std::ofstream& out, unsigned int indent); 62 | MSNetworkStats getStats(); 63 | 64 | }; 65 | #endif 66 | -------------------------------------------------------------------------------- /omega-code/stonne/include/MultiplierOS.h: -------------------------------------------------------------------------------- 1 | //Created 27/10/2020 2 | 3 | #ifndef __MultiplierOS__h 4 | #define __MultiplierOS__h 5 | 6 | #include "types.h" 7 | #include "DataPackage.h" 8 | #include "Connection.h" 9 | #include "Fifo.h" 10 | #include "Unit.h" 11 | #include 12 | #include "Config.h" 13 | #include "Stats.h" 14 | /* 15 | */ 16 | 17 | class MultiplierOS : public Unit { 18 | private: 19 | Fifo* top_fifo; // Packages received from top (i.e., weights) 20 | Fifo* left_fifo; //Packages recieved from legt (i.e., activations) 21 | Fifo* right_fifo; //Packages to be sent to the right (i.e., activations) 22 | Fifo* bottom_fifo; //Packages to be sent to the bottom (i.e., weights) 23 | Fifo* accbuffer_fifo; //Psum ready to be sent to the parent 24 | 25 | Connection* left_connection; // To the left neighbour or memory port 26 | Connection* right_connection; //To the right neighbour 27 | Connection* top_connection; //To the top neighbour or the memory port 28 | Connection* bottom_connection; //Input from the neighbour 29 | Connection* accbuffer_connection; //To the accbuffer to keep OS 30 | cycles_t latency; //latency in number of cycles 31 | int row_num; 32 | int col_num; 33 | int num; //General num, just used for information (num = row_num*ms_cols + col_num) 34 | //This values are in esence the size of a single element in the architecture (by default) 35 | unsigned int input_ports; 36 | unsigned int output_ports; 37 | unsigned int forwarding_ports; 38 | unsigned int buffers_capacity; 39 | unsigned int port_width; 40 | unsigned int ms_rows; 41 | unsigned int ms_cols; 42 | 43 | 44 | cycles_t local_cycle; 45 | MultiplierOSStats mswitchStats; //Object to track the behaviour of the MSwitch 46 | 47 | //Signals 48 | unsigned int VN; 49 | bool forward_right=false; //Based on rows (windows) left and dimensions 50 | bool forward_bottom=false; 51 | 52 | public: 53 | MultiplierOS(id_t id, std::string name, int row_num, int col_num, Config stonne_cfg); 54 | MultiplierOS(id_t id, std::string name, int row_num, int col_num, Config stonne_cfg, Connection* left_connection, Connection* right_connection, Connection* top_connection, Connection* bottom_connection); 55 | ~MultiplierOS(); 56 | void setTopConnection(Connection* top_connection); //Set the top connection 57 | void setLeftConnection(Connection* left_connection); //Set the left connection 58 | void setRightConnection(Connection* right_connection); //Set the right connection 59 | void setBottomConnection(Connection* bottom_connection); 60 | void setAccBufferConnection(Connection* accbuffer_connection); 61 | 62 | void send(); //Send right, bottom and psum fifos 63 | void receive(); //Receive from top and left 64 | 65 | DataPackage* perform_operation_2_operands(DataPackage* pck_left, DataPackage* pck_right); //Perform multiplication and returns result. 66 | 67 | void cycle(); //Computing a cyclels 68 | void resetSignals(); 69 | 70 | //Configure the forwarding signals that indicate if this ms has to forward data to the bottom and or right neighbours 71 | void configureBottomSignal(bool bottom_signal); 72 | void configureRightSignal(bool right_signal); 73 | void setVirtualNeuron(unsigned int VN); 74 | 75 | void printConfiguration(std::ofstream& out, unsigned int indent); //This function prints the configuration of MSwitch such us the VN ID 76 | void printStats(std::ofstream& out, unsigned int indent); 77 | void printEnergy(std::ofstream& out, unsigned int indent); 78 | MultiplierOSStats getStats() {return this->mswitchStats;} 79 | 80 | 81 | 82 | }; 83 | 84 | #endif 85 | 86 | -------------------------------------------------------------------------------- /omega-code/stonne/src/other_main/main_gemm_test.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include "STONNEModel.h" 3 | #include "types.h" 4 | #include 5 | #include 6 | #include "testbench.h" 7 | #include 8 | #include 9 | #include 10 | 11 | using namespace std; 12 | 13 | 14 | int main(int argc, char *argv[]) { 15 | float EPSILON=0.05; 16 | unsigned int MAX_RANDOM=10; //Variable used to generate the random values 17 | /** Generating the inputs and outputs **/ 18 | 19 | //Layer parameters (See MAERI paper to find out the taxonomy meaning) 20 | std::string layer_name="TestLayer"; 21 | unsigned int K=8; 22 | unsigned int M=4; 23 | unsigned int N=4; 24 | Config stonne_cfg; //Hardware parameters 25 | stonne_cfg.m_MSNetworkCfg.ms_size=8; 26 | stonne_cfg.m_SDMemoryCfg.n_read_ports=8; 27 | stonne_cfg.m_SDMemoryCfg.n_write_ports=8; 28 | 29 | //Calculating output parameters 30 | unsigned int O_rows=M; 31 | unsigned int O_columns = N; 32 | 33 | //Creating arrays to store the ifmap ofmap and weights 34 | 35 | unsigned int MK_size=M*K; 36 | unsigned int NK_size=N*K; 37 | unsigned int output_size=M*N; 38 | 39 | float MK_matrix[] = {1.0, 3.0, 5.0, 2.0, 3.0, 1.0, 5.0, 3.0, 2.0, 1.0, 2.0, 5.0, 1.0, 2.0, 3.0, 4.0, 2.0, 3.0, 5.0}; 40 | float KN_matrix[] = {1.0 ,3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 3.0, 1.0, 2.0, 4.0, 1.0, 4.0}; 41 | unsigned int metadata_MK[] = {1,1,0,0,1,0,0,0, 42 | 0,0,1,1,1,1,1,1, 43 | 1,1,1,1,1,1,1,0, 44 | 0,0,0,1,1,1,0,0}; //Actually these are bits 45 | 46 | unsigned int metadata_KN[]={0,1,0,1, 47 | 1,1,0,1, 48 | 1,0,0,1, 49 | 1,0,1,0, 50 | 1,1,1,1, 51 | 1,0,0,0, 52 | 0,0,1,1, 53 | 1,0,0,0}; 54 | float output_matrix[output_size]; //Used to store the CPU computed values to compare with the simulator version 55 | unsigned int metadata_output[output_size]; 56 | 57 | //computing CPU version 58 | //sequential_layer(R, S, C, K, G, N, X, Y, strides, ifmap, filter, ofmap_cpu); 59 | 60 | //Computing the CNN Layer with the simulator 61 | Stonne* stonne_instance = new Stonne(stonne_cfg); //Creating instance of the simulator 62 | stonne_instance->loadGEMM("GEMM_test", N, K, M, MK_matrix, KN_matrix, metadata_MK, metadata_KN, output_matrix, metadata_output, MK_STA_KN_STR ); //Loading GEMM 63 | stonne_instance->run(); //Running the simulator 64 | //Printing the results 65 | std::cout << "MK bitmap:" << std::endl; 66 | for(int i=0; i 11 | #include 12 | #include 13 | #include "types.h" 14 | #include "Config.h" 15 | #include "CompilerFEN.h" 16 | #include "ReduceNetwork.h" 17 | 18 | #define CONNECTIONS_PER_SWITCH 2 19 | #define LEFT 0 20 | #define RIGHT 1 21 | 22 | 23 | class FENetwork : public ReduceNetwork { 24 | private: 25 | unsigned int port_width; //Width in bits of each port 26 | unsigned int ms_size; //Number of multipliers. i.e., the leaves of the network 27 | int nlevels; //Number of levels of the AS without taking into account the MS level 28 | std::map, FEASwitch* > aswitchtable; //Map with the switches of the topology. The connection among them will be different depending on the topology used 29 | 30 | //Copy of the pointers of the map aswitchtable used to generate the connections between the ART and the bus in the same way as the implementation in bluespec does. 31 | std::vector single_switches; //List of switches that are single reduction switches (see blueSpec implementation. i.e., the do not have forwarding connections) 32 | std::vector double_switches; //List of double switches that are double reduction switches (i.e., in bluespec implementation these are sw that have fw links). 33 | 34 | std::map, Connection*> inputconnectiontable; // input connections of each level. 35 | std::map, Connection*> forwardingconnectiontable; // Forwarding connections of each level (intermedium links) 36 | std::map, Connection*> foldingconnectiontable; //Forwarding connections between each node and its folding manager 37 | Connection* outputConnection; //Given by external 38 | 39 | 40 | public: 41 | FENetwork(id_t id, std::string name, Config stonne_cfg, Connection* output_connection); 42 | ~FENetwork(); 43 | const int getNLevels() const { return this->nlevels; } 44 | const int getMsSize() const { return this->ms_size; } 45 | std::vector getSingleSwitches() {return this->single_switches;} 46 | std::vector getDoubleSwitches() {return this->double_switches;} 47 | void setMemoryConnections(std::vector> memoryConnections); //Connect all the memory ports (busID, lineID) to its corresponding AS 48 | std::map getLastLevelConnections(); 49 | void setOutputConnection(Connection* outputConnection) { this->outputConnection = outputConnection; } //This function set the outputConnection with the Prefetch buffer 50 | void addersConfiguration(std::map, adderconfig_t> adder_configurations); 51 | void forwardingConfiguration(std::map, fl_t> fl_configurations); //Configure the forwarding links. Enable the required ones. 52 | void childsLinksConfiguration(std::map, std::pair> childs_configuration); 53 | void forwardingToMemoryConfiguration(std::map, bool> forwarding_to_memory_enabled); 54 | void forwardingToFoldNodeConfiguration(std::map, bool> forwarding_to_fold_node_enabled); 55 | void NPSumsConfiguration(unsigned int n_psums); 56 | 57 | void configureSignals(Tile* current_tile, DNNLayer* dnn_layer, unsigned int ms_size, unsigned int n_folding); 58 | 59 | 60 | //Cycle function 61 | void cycle(); 62 | 63 | void printConfiguration(std::ofstream& out, unsigned int indent); //This function prints the configuration of the ASNetwork (i.e., FEASwitches configuration such as ADD_2_1, ADD_3_1, etc) 64 | void printStats(std::ofstream& out, unsigned int indent); //This functions prints the statistics obtained during the execution. 65 | void printEnergy(std::ofstream& out, unsigned int indent); 66 | 67 | 68 | }; 69 | 70 | #endif 71 | -------------------------------------------------------------------------------- /omega-code/stonne/include/Accumulator.h: -------------------------------------------------------------------------------- 1 | //Created 19/02/2020 2 | 3 | #ifndef __ACCUMULATOR__h 4 | #define __ACCUMULATOR__h 5 | 6 | #include "types.h" 7 | #include "DataPackage.h" 8 | #include "Connection.h" 9 | #include 10 | #include "Fifo.h" 11 | #include "Unit.h" 12 | #include "Config.h" 13 | #include "Stats.h" 14 | /* 15 | */ 16 | 17 | class Accumulator : public Unit { 18 | private: 19 | unsigned int input_ports; // Input port 20 | unsigned int output_ports; // output port 21 | unsigned int buffers_capacity; //Buffers size in bytes 22 | unsigned int port_width; //Bit width of each port 23 | 24 | unsigned int busID; //CollectionBus connected to this ASwitch 25 | unsigned int inputID; //Number of input of the Collection Bus busID connected to this AS. 26 | 27 | //Inputs fifos 28 | Fifo* input_fifo; // Array of packages that are received from the adders 29 | 30 | // Output Fifos 31 | Fifo* output_fifo; // Output fifo to the parent 32 | 33 | adderoperation_t operation_mode; //Adder or comp 34 | 35 | unsigned int current_capacity; // the capacity must not exceed the input bw of the connection 36 | Connection* inputConnection; // This is the input left connection of the Adder 37 | Connection* outputConnection; // This is the output connection of the adder 38 | 39 | cycles_t latency; // Number of cycles to compute a sum. This is configurable since can vary depending on the implementation technology and frequency 40 | 41 | //Operation functions. This functions can be changed in order to perform different types of length operations 42 | DataPackage* perform_operation_2_operands(DataPackage* pck_left, DataPackage* pck_right); //Perform 2:1 sum 43 | 44 | cycles_t local_cycle; 45 | ASwitchStats aswitchStats; //To track the behaviour of the FEASwitch 46 | 47 | //Extensions 48 | DataPackage* temporal_register; //Temporal register to accumulate partial sums 49 | unsigned int n_psums; //Number of psums before accumulation 50 | unsigned int current_psum; //Current psum performed 51 | unsigned int n_accumulator; 52 | 53 | AccumulatorStats accumulatorStats; //Object to track the behaviour of the Accumulator 54 | 55 | 56 | //Private functions 57 | void route(); 58 | 59 | 60 | public: 61 | Accumulator(id_t id, std::string name, Config stonne_cfg, unsigned int n_accumulator); 62 | Accumulator(id_t id, std::string name, Config stonne_cfg, unsigned int n_accumulator, Connection* inputConnection, Connection* outputConnection); 63 | ~Accumulator(); 64 | 65 | //Connection setters 66 | void setInputConnection(Connection* inputLeftConnection); // Set the input left connection of the Adder 67 | void setOutputConnection(Connection* outputConnection); // Set the output connection of the Adder 68 | void setNPSums(unsigned int n_psums); 69 | void resetSignals(); 70 | 71 | // Getters 72 | const unsigned int getNAcummulator() const {return this->n_accumulator;} 73 | const unsigned int getOutputPorts() const {return this->output_ports;} // Get the output ports 74 | 75 | // Functionality 76 | void send(); //Packages of data to be sent depending on routing. 77 | void receive(); //take data from connections 78 | 79 | 80 | void cycle(); //Computing a cycle. Based on routing the AS decides where the data goes. 81 | 82 | void printConfiguration(std::ofstream& out, unsigned int indent); //This function prints the configuration of FEASwitch such as the operation mode, augmented link enabled, etc 83 | void printStats(std::ofstream& out, unsigned int indent); 84 | void printEnergy(std::ofstream& out, unsigned int indent); 85 | AccumulatorStats getStats() {return this->accumulatorStats;} 86 | 87 | 88 | }; 89 | 90 | #endif 91 | 92 | -------------------------------------------------------------------------------- /omega-code/stonne/include/ASNetwork.h: -------------------------------------------------------------------------------- 1 | // 2 | // Created by Francisco Munoz on 19/06/19. 3 | // 4 | 5 | #ifndef __ASNETWORK__H__ 6 | #define __ASNETWORK__H__ 7 | 8 | #include "MSNetwork.h" 9 | #include "ASwitch.h" 10 | #include 11 | #include 12 | #include 13 | #include "types.h" 14 | #include "Config.h" 15 | #include "CompilerART.h" 16 | #include "ReduceNetwork.h" 17 | #include "AccumulationBuffer.h" 18 | 19 | #define CONNECTIONS_PER_SWITCH 2 20 | #define LEFT 0 21 | #define RIGHT 1 22 | 23 | 24 | class ASNetwork : public ReduceNetwork { 25 | private: 26 | unsigned int port_width; //Width in bits of each port 27 | unsigned int ms_size; //Number of multipliers. i.e., the leaves of the network 28 | int nlevels; //Number of levels of the AS without taking into account the MS level 29 | std::map, ASwitch* > aswitchtable; //Map with the switches of the topology. The connection among them will be different depending on the topology used 30 | 31 | //Copy of the pointers of the map aswitchtable used to generate the connections between the ART and the bus in the same way as the implementation in bluespec does. 32 | std::vector single_switches; //List of switches that are single reduction switches (see blueSpec implementation. i.e., the do not have forwarding connections) 33 | std::vector double_switches; //List of double switches that are double reduction switches (i.e., in bluespec implementation these are sw that have fw links). 34 | 35 | std::map, Connection*> inputconnectiontable; // input connections of each level. 36 | std::map, Connection*> forwardingconnectiontable; // Forwarding connections of each level (intermedium links) 37 | std::vector accumulationbufferconnectiontable; //Connections to the accumulation buffer if it is used 38 | Connection* outputConnection; //Given by external 39 | AccumulationBuffer* accumulationBuffer; //Array of accumulators to perform the folding accumulation 40 | bool accumulation_buffer_enabled; 41 | Config stonne_cfg; 42 | 43 | 44 | public: 45 | ASNetwork(id_t id, std::string name, Config stonne_cfg, Connection* output_connection); 46 | ~ASNetwork(); 47 | const int getNLevels() const { return this->nlevels; } 48 | const int getMsSize() const { return this->ms_size; } 49 | std::vector getSingleSwitches() {return this->single_switches;} 50 | std::vector getDoubleSwitches() {return this->double_switches;} 51 | void setMemoryConnections(std::vector> memoryConnections); //Connect all the memory ports (busID, lineID) to its corresponding AS 52 | std::map getLastLevelConnections(); 53 | void setOutputConnection(Connection* outputConnection) { this->outputConnection = outputConnection; } //This function set the outputConnection with the Prefetch buffer 54 | void addersConfiguration(std::map, adderconfig_t> adder_configurations); 55 | void forwardingConfiguration(std::map, fl_t> fl_configurations); //Configure the forwarding links. Enable the required ones. 56 | void childsLinksConfiguration(std::map, std::pair> childs_configuration); 57 | void forwardingToMemoryConfiguration(std::map, bool> forwarding_to_memory_enabled); 58 | void configureSignals(Tile* current_tile, DNNLayer* dnn_layer, unsigned int ms_size, unsigned int n_folding); 59 | void configureSparseSignals(std::vector sparseVNs, DNNLayer* dnn_layer, unsigned int ms_size); 60 | void resetSignals(); 61 | 62 | 63 | //Cycle function 64 | void cycle(); 65 | 66 | void printConfiguration(std::ofstream& out, unsigned int indent); //This function prints the configuration of the ASNetwork (i.e., ASwitches configuration such as ADD_2_1, ADD_3_1, etc) 67 | void printStats(std::ofstream& out, unsigned int indent); //This functions prints the statistics obtained during the execution. 68 | void printEnergy(std::ofstream& out, unsigned int indent); 69 | ASNetworkStats getStats(); 70 | 71 | 72 | }; 73 | 74 | #endif 75 | -------------------------------------------------------------------------------- /omega-code/stonne/correccion_bugs.txt: -------------------------------------------------------------------------------- 1 | Dia sabado 16/11/2019- Corregido el problema de sustitucion de nuevos pesos por antiguos sin haber terminado la anterior iteracion. Solucion: en el fichero SDMemory a la hora de enviar los datos de entrada (iacts y weights) distinguir entre iteraciones. Si el paquete a enviar pertenece a una iteracion superior (nuevo canal), el paquete no sera enviado. Esto lo que hace basicamente es secuenciar los output channels. Dos mismos output channels no estaran a partir de ahora en el pipeline de la aruitectura. Para enviar los datos de entrada de un ochannel subsecuente, todas las output psums del canal anterior deben haber sido calculados. 2 | 3 | Dia sabado 16/11/2019- Corregido fallo al usar T_K>1. El fallo erra que a la hora de enviar los pesos se usaba current_K para acceder a memoria en lugar de unsigned index_K=current_K*this->current_tile->get_T_K(); para saltarnos los T_K anteriores y no solo current_K. 4 | 5 | Dia domingo 17/11/2019 - Corregido fallo al usar T_C>1. El fallo era el direccionamiento de T_C y ya de paso se ha solucionado un posible bug con T_R y T_S cambiando tambien la forma de direccionar al acceder a memoria. Ahora, accedemos a todas las variables al igual que con el bug anterior: index_C=current_C*this->current_tile->get_T_C(); index_R=current_R*this->current_tile-get_T_R() y index_S=current_S*this->current_tile->get_T_S(). Se han hecho estas modificaciones en las 3 veces que se accede a memoria. En los pesos (opcion multicast y unicast) y a la hora de acceder a los inputs. 6 | 7 | Como anadir un nuevo parametro de salida: 8 | 1. Modificar Stats.h para añadir el parametro en la clase correspondiente 9 | 2. Modificar Stats.cpp para modificar la funcion reset que resetea el parametro a cero 10 | 3. Modificar el codigo correspondiente para que se use ese parametro en el fichero adecuado. 11 | 4. Modificar Stats.cpp en la clase correspondiente en la funcion printStats para que lo muestre 12 | 13 | Dia 11/11/2019 - Corregido el fallo al usar numero de multiplicadores 64 con T_R=3, T_S=3, T_C=3, T_K=2. EEl problema era que el algoritmo de generacion de señales no funcionaba correctamente. Esto es asi, porque en el paper de MAERI original, el algoritmo de generacion de señales es explicado utilizando un unico vector para cada nodo activo del ART. Sin embargo, si utilizamos solo un vector, y guardamos los resultados del nivel siguiente en el mismo vector, estamos machacando informacion del nivel actual, y no funciona correctamente. Lo mejor, y creo que mas correcto, es utilizar dos vectores de bits. Uno almacena los nodos activos en ese nivel, y el otro guarda los nodos que se van activando para el siguiente nivel. 14 | Esta solucion tambien proporciona una gran ayuda a la hora de decidir que nodos del ART van a escribir en memoria, y se ha implementado tambien. Basicamente, cuando en el vector de bits, solo queda un unico 1, quiere decir que toda la suma de esa VN se ha reducido completamente, y por tanto que ese nodo es el que debe enviar a memoria. 15 | 16 | Dia 16/12/2019 - Por alguna extrañisima razon que desconozco, y dudo mucho que incluso el mismisimo Dios pueda conocer jamas en toda su infinita existencia, el simulador no funcionaba en los casos con C PRIMO (WHAAAAAAAAAAAAAAT THE FUUUUUUCK!!!). Esto es debido a que por alguna razon mas extraña todavia, que jamas conocere, en la cola de MSwitches de pesos no se eliminaban correctamente en los flushes. La solucion ha sido en la clase MSwitch se usaba el siguiente bucle para eliminar los paquetes: 17 | for(int i=0; isize(); i++) { 18 | DataPackage* pck_in_fifo = weight_fifo->pop(); //this operation is done i times 19 | delete pck_in_fifo; //Deleting pck 20 | } 21 | La solucion ha sido cambiarlo a un while. Con esto ya los paquetes se eliminan correctamente. Esto tiene sentido. Sin embargo, por que ocurria esto unicamente con valores de C numeros primos? Quien sabe. La vida es un misterio, y este simulador lo es aun mas. 22 | 23 | 24 | Dia 17/12/2019: Se ha implementado grupos y por el momento no presenta ningun fallo. Se ha implementado la rutina smart_tests para testearla. Ademas, se ha solucionado el fallo con groups y k a la hora de controlar el flujo (no enviar iteraciones de k superaiores mientras las g o k anteriores están en proceso). Para ello, se ha llegado a la conclusion sobre que poner en las funciones setIterationK 25 | 26 | Dia 18/12/2019: Stride se ha implementado correctamente, y parece que funciona 27 | -------------------------------------------------------------------------------- /omega-code/stonne/include/DataPackage.h: -------------------------------------------------------------------------------- 1 | //Created 13/06/2019 2 | 3 | #ifndef __data_package_h__ 4 | #define __data_package_h__ 5 | 6 | #include "types.h" 7 | #include 8 | 9 | /* 10 | 11 | This class represents the wrapper of a certain data. It is used in both networks ART and DS but there are some fields that are used in just one particular class. For example, 12 | since the DS package does not need the VN, it is not used during that network. 13 | 14 | */ 15 | 16 | class DataPackage { 17 | 18 | private: 19 | //General field 20 | size_t size_package; //Actual size of the package. This just accounts for the truly data that is sent in a real implementation 21 | data_t data; //Data in the package 22 | operand_t data_type; //Type of data (i.e., WEIGHT, IACTIVATION, OACTIVATION, PSUM) 23 | id_t source; //Source that sent the package 24 | 25 | // Fields only used for the DS 26 | bool* dests; // Used in multicast traffic to indicate the receivers 27 | unsigned int n_dests; //Number of receivers in multicast operation 28 | unsigned int unicast_dest; //Indicates the destination in case of unicast package 29 | traffic_t traffic_type; // IF UNICAST dest is unicast_dest. If multicast, dest is indicate using dests and n_dests. 30 | 31 | unsigned int VN; //Virtual network where the psum is found 32 | adderoperation_t operation_mode; //operation that got this psum (Comparation or SUM) 33 | 34 | unsigned int output_port; //Used in the psum package to get the output port that was used in the bus to send the data 35 | unsigned int iteration_g; //Indicates the g value of this package (i.e., the number of g iteration). This is used to avoid sending packages of some iteration g and k without having performing the previous ones. 36 | unsigned int iteration_k; //Indicates the k value of this package (i.e, the number of k iteration). This is used to avoid sending packages of some iteration k (output channel k) without having performed the previous iterations yet 37 | 38 | 39 | 40 | public: 41 | //General constructor to be reused in both types of packages 42 | DataPackage(size_t size_package, data_t data, operand_t data_type, id_t source); 43 | 44 | //DS Package constructors for creating unicasts, multicasts and broadcasts packages 45 | //General constructor for DS 46 | DataPackage(size_t size_package, data_t data, operand_t data_type, id_t source,traffic_t traffic_type); 47 | // Unicast package constructor. 48 | DataPackage(size_t size_package, data_t data, operand_t data_type, id_t source,traffic_t traffic_type, unsigned int unicast_dest); 49 | //Multicast package. dests must be dynamic memory since the array is not copied. 50 | DataPackage(size_t size_package, data_t data, operand_t data_type, id_t source,traffic_t traffic_type, bool* dests, unsigned int n_dests); //Constructor 51 | //Broadcast package 52 | //Needs nothing. Just indicates is the type broadcast 53 | 54 | //ART Package constructor (only one package for this type) 55 | DataPackage(size_t size_package, data_t data, operand_t data_type, id_t source, unsigned int VN, adderoperation_t operation_mode); 56 | ~DataPackage(); 57 | DataPackage(DataPackage* pck); //Constructor copy used to repeat a package 58 | //Getters 59 | const size_t get_size_package() const {return this->size_package;} 60 | const data_t get_data() const {return this->data;} 61 | const operand_t get_data_type() const {return this->data_type;} 62 | const id_t get_source() const {return this->source;} 63 | const traffic_t get_traffic_type() const {return this->traffic_type;} 64 | bool isBroadcast() const {return this->traffic_type==BROADCAST;} 65 | bool isUnicast() const {return this->traffic_type==UNICAST;} 66 | bool isMulticast() const {return this->traffic_type==MULTICAST;} 67 | const bool* get_dests() const {return this->dests;} 68 | unsigned int get_unicast_dest() const {return this->unicast_dest;} 69 | unsigned int get_n_dests() const {return this->n_dests;} 70 | unsigned int getOutputPort() const {return this->output_port;} 71 | unsigned int getIterationK() const {return this->iteration_k;} 72 | void setOutputPort(unsigned int output_port); 73 | void setIterationK(unsigned int iteration_k); //Used to control avoid a package from the next iteration without having calculated the previous ones. 74 | 75 | unsigned int get_vn() const {return this->VN;} 76 | adderoperation_t get_operation_mode() const {return this->operation_mode;} 77 | }; 78 | 79 | #endif 80 | -------------------------------------------------------------------------------- /omega-code/stonne/src/TemporalRN.cpp: -------------------------------------------------------------------------------- 1 | // 2 | // Created by Francisco Munoz on 19/06/19. 3 | // 4 | #include "TemporalRN.h" 5 | #include 6 | #include "utility.h" 7 | #include 8 | 9 | //TODO Conectar los enlaces intermedios de forwarding 10 | //This Constructor creates the reduction tree similar to the one shown in the paper 11 | TemporalRN::TemporalRN(id_t id, std::string name, Config stonne_cfg, Connection* outputConnection) : ReduceNetwork(id, name) { 12 | // Collecting the parameters from configuration file 13 | this->stonne_cfg = stonne_cfg; 14 | this->port_width=stonne_cfg.m_ASwitchCfg.port_width; 15 | //End collecting the parameters from the configuration file 16 | // 17 | //Calculating the number of accumulators based on the shape of the multiplier network 18 | if(stonne_cfg.m_MSNetworkCfg.multiplier_network_type==LINEAR) { 19 | this->accumulation_buffer_size = stonne_cfg.m_MSNetworkCfg.ms_size; 20 | } 21 | 22 | else { 23 | this->accumulation_buffer_size = stonne_cfg.m_MSNetworkCfg.ms_rows*stonne_cfg.m_MSNetworkCfg.ms_cols; 24 | } 25 | this->accumulationBuffer = new AccumulationBuffer(0, "AccumulationBuffer", this->stonne_cfg, this->accumulation_buffer_size); 26 | //Creating the input connections 27 | for(int i=0; iaccumulation_buffer_size; i++) { 28 | Connection* input_connection = new Connection(this->port_width); 29 | inputconnectiontable.push_back(input_connection); 30 | } 31 | this->accumulationBuffer->setInputConnections(inputconnectiontable); 32 | 33 | 34 | } 35 | 36 | TemporalRN::~TemporalRN() { 37 | delete this->accumulationBuffer; 38 | for(int i=0; i < inputconnectiontable.size(); i++) { 39 | delete this->inputconnectiontable[i]; 40 | } 41 | 42 | 43 | 44 | 45 | } 46 | 47 | 48 | void TemporalRN::setMemoryConnections(std::vector> memoryConnections) { 49 | unsigned int n_bus_lines = memoryConnections.size(); 50 | std::cout << "N_bus_lines: " << n_bus_lines << std::endl; 51 | 52 | for(int i=0; iaccumulation_buffer_size; i++) { 53 | unsigned int inputID = (i / n_bus_lines); 54 | unsigned int busID = i % n_bus_lines; 55 | Connection* mem_conn = memoryConnections[busID][inputID]; 56 | outputconnectiontable.push_back(mem_conn); 57 | //std::cout << "ACCUMUlATOR " << i << " connected to BUS " << busID << " INPUT " << inputID << std::endl; 58 | } 59 | 60 | //Finally we connect the output links with the memory 61 | this->accumulationBuffer->setMemoryConnections(outputconnectiontable); 62 | 63 | } 64 | 65 | std::map TemporalRN::getLastLevelConnections() { 66 | //Converting from vector to map for questions of compatibility with the rest of the code 67 | std::map last_level_connections; 68 | for(int i=0; iaccumulationBuffer->resetSignals(); 76 | } 77 | 78 | void TemporalRN::configureSignals(Tile* current_tile, DNNLayer* dnn_layer, unsigned int ms_size, unsigned int n_folding) { 79 | this->accumulationBuffer->configureSignals(current_tile, dnn_layer, ms_size, n_folding); 80 | 81 | } 82 | 83 | void TemporalRN::configureSparseSignals(std::vector sparseVNs, DNNLayer* dnn_layer, unsigned int ms_size) { 84 | assert(false); 85 | } 86 | 87 | 88 | //TODO Implementar esto bien 89 | void TemporalRN::cycle() { 90 | this->accumulationBuffer->cycle(); 91 | } 92 | 93 | //Print configuration of the TemporalRN 94 | void TemporalRN::printConfiguration(std::ofstream& out, unsigned int indent) { 95 | 96 | out << ind(indent) << "\"ASNetworkConfiguration\" : {" << std::endl; 97 | out << ind(indent) << "}"; 98 | 99 | } 100 | 101 | //Printing stats 102 | void TemporalRN::printStats(std::ofstream& out, unsigned int indent) { 103 | out << ind(indent) << "\"ASNetworkStats\" : {" << std::endl; 104 | this->accumulationBuffer->printStats(out, indent+IND_SIZE); 105 | out << ind(indent) << "}"; 106 | } 107 | 108 | void TemporalRN::printEnergy(std::ofstream& out, unsigned int indent) { 109 | /* 110 | The TemporalRN component prints the counters for the next subcomponents: 111 | - Accumulators 112 | - Input wires are not printed as we consider this accumulator as inside the multiplier (it is an entire PE) 113 | 114 | Note that the wires that connect with memory are not taken into account in this component. This is done in the CollectionBus. 115 | 116 | */ 117 | //Printing the accumulator stats 118 | this->accumulationBuffer->printEnergy(out, indent); 119 | 120 | 121 | 122 | } 123 | -------------------------------------------------------------------------------- /omega-code/stonne/src/DSNetworkTop.cpp: -------------------------------------------------------------------------------- 1 | // Created on 06/11/2019 by Francisco Munoz Martinez 2 | 3 | #include "DSNetworkTop.h" 4 | #include 5 | #include "utility.h" 6 | 7 | DSNetworkTop::DSNetworkTop(id_t id, std::string name, Config stonne_cfg) : DistributionNetwork(id,name) { 8 | this->n_input_ports = stonne_cfg.m_SDMemoryCfg.n_read_ports; 9 | if(stonne_cfg.m_MSNetworkCfg.multiplier_network_type==LINEAR) { 10 | this->ms_size_per_port = stonne_cfg.m_MSNetworkCfg.ms_size / n_input_ports; 11 | } 12 | else if(stonne_cfg.m_MSNetworkCfg.multiplier_network_type==OS_MESH) { 13 | this->ms_size_per_port = (stonne_cfg.m_MSNetworkCfg.ms_rows + stonne_cfg.m_MSNetworkCfg.ms_cols) / n_input_ports; 14 | } 15 | this->port_width = stonne_cfg.m_DSwitchCfg.port_width; 16 | for(int i=0; in_input_ports; i++) { 17 | //Creating the top connection first 18 | Connection* conn = new Connection(this->port_width); 19 | //Creating the tree 20 | std::string name = "ASNetworkTree "+i; 21 | DSNetwork* dsnet = new DSNetwork(i,name, stonne_cfg, this->ms_size_per_port, conn); //Creating the dsnetwork with the connection 22 | connections.push_back(conn); 23 | dsnetworks.push_back(dsnet); 24 | 25 | } 26 | 27 | } 28 | 29 | std::map DSNetworkTop::getLastLevelConnections() { 30 | std::map connectionsLastLevel; 31 | for(int i=0; in_input_ports; i++) { //For each tree we add its lastlevelconnections 32 | std::map connectionsPort = this->dsnetworks[i]->getLastLevelConnections(); //Getting the last level conns of the tree i 33 | unsigned int index_base = i*ms_size_per_port; //Current connection respect to the first connection in the first tree 34 | for(int j=0; jms_size_per_port; j++) { //We are sure connectionsPort size is ms_size_per_per_port 35 | Connection* current_connection = connectionsPort[j]; //Local index 36 | connectionsLastLevel[index_base+j]=current_connection; //Adding to the global list 37 | } 38 | } 39 | return connectionsLastLevel; 40 | } 41 | 42 | //Return the top connections (i.e., the input connections that connects the DSMemory ports with the subtrees) 43 | std::vector DSNetworkTop::getTopConnections() { 44 | return this->connections; 45 | } 46 | 47 | void DSNetworkTop::cycle() { 48 | for(int i=0; in_input_ports; i++) { 49 | dsnetworks[i]->cycle(); 50 | } 51 | } 52 | 53 | DSNetworkTop::~DSNetworkTop() { 54 | for(int i=0; in_input_ports; i++) { 55 | delete dsnetworks[i]; 56 | delete connections[i]; 57 | } 58 | } 59 | 60 | void DSNetworkTop::printStats(std::ofstream& out, unsigned int indent) { 61 | out << ind(indent) << "\"DSNetworkStats\" : {" << std::endl; 62 | //General statistics if there are 63 | 64 | //For each subtree 65 | out << ind(indent+IND_SIZE) << "\"DSTreeStats\" : [" << std::endl; 66 | for(int i=0; in_input_ports; i++) { 67 | dsnetworks[i]->printStats(out, indent+IND_SIZE+IND_SIZE); 68 | if(i==(this->n_input_ports-1)) { //If I am in the last tree I do not have to separate the objects with a comma 69 | out << std::endl; 70 | } 71 | else { //Put a comma between two DStree objects 72 | out << "," << std::endl; 73 | } 74 | } 75 | out << ind(indent+IND_SIZE) << "]" << std::endl; 76 | out << ind(indent) << "}"; 77 | } 78 | 79 | void DSNetworkTop::printEnergy(std::ofstream& out, unsigned int indent) { 80 | /* 81 | This component prints: 82 | - Connections between memory and DSNetworks 83 | - DSNetworks 84 | */ 85 | 86 | //Printing wires between memory and DSNetwork 87 | for(int i=0; in_input_ports; i++) { 88 | this->connections[i]->printEnergy(out, indent, "DN_WIRE"); 89 | } 90 | 91 | //Printing ASNetworks 92 | for(int i=0; in_input_ports; i++) { 93 | this->dsnetworks[i]->printEnergy(out, indent); 94 | } 95 | 96 | } 97 | 98 | DSNetworkStats DSNetworkTop::getStats() { 99 | DSNetworkStats dsnetworkStats; 100 | //Wires between memory and DSNetwork 101 | for(int i=0; in_input_ports; i++) { 102 | ConnectionStats conn_stats = this->connections[i]->getStats(); 103 | dsnetworkStats.n_total_traversals+=conn_stats.n_sends; 104 | } 105 | 106 | //Adding the ASNetworks 107 | for(int i=0; in_input_ports; i++) { 108 | DSNetworkStats current_stats = this->dsnetworks[i]->getStats(); 109 | dsnetworkStats.n_broadcasts+=current_stats.n_broadcasts; 110 | dsnetworkStats.n_unicasts+=current_stats.n_unicasts; 111 | dsnetworkStats.n_right_sends+=current_stats.n_right_sends; 112 | dsnetworkStats.n_left_sends+=current_stats.n_left_sends; 113 | dsnetworkStats.n_total_traversals+=current_stats.n_total_traversals; 114 | } 115 | 116 | return dsnetworkStats; 117 | 118 | } 119 | 120 | -------------------------------------------------------------------------------- /omega-code/sample_graphs/vertex_mutag_batch64.txt: -------------------------------------------------------------------------------- 1 | 0,2,4,6,9,12,14,16,18,21,24,26,28,31,33,36,37,38,40,42,45,47,49,51,53,56,59,61,64,65,66,68,70,73,76,78,80,82,84,87,89,92,93,94,96,98,100,103,106,108,111,113,115,117,120,123,126,129,131,133,136,137,138,140,142,145,148,150,153,154,157,158,159,160,162,164,167,170,172,175,178,181,184,187,190,192,195,198,200,202,205,206,207,210,211,212,215,216,217,220,221,222,223,226,228,231,233,235,238,241,242,243,246,248,250,252,254,256,257,260,261,263,266,268,270,273,276,278,280,283,286,288,290,293,295,298,299,300,302,304,307,309,311,313,316,318,321,324,325,326,327,330,332,334,337,340,342,344,347,350,352,354,357,359,362,363,364,366,368,370,373,376,378,380,383,386,389,391,393,395,397,400,401,402,404,406,409,411,413,416,419,422,425,428,430,432,435,438,440,442,443,446,447,448,450,452,454,457,460,462,465,467,469,472,475,478,481,483,486,488,491,492,493,496,497,498,499,502,505,507,510,512,514,517,519,521,524,525,526,528,530,533,535,537,540,542,544,547,550,553,556,559,561,563,565,568,569,570,572,574,576,579,582,584,587,589,591,594,597,600,603,605,607,610,613,614,615,618,619,620,622,624,627,630,633,635,636,639,640,641,642,644,646,648,651,654,656,659,661,664,666,668,671,674,675,678,679,680,682,684,687,689,692,694,696,699,701,703,706,707,708,710,712,715,718,720,723,725,726,728,731,734,736,738,741,743,746,747,748,750,752,755,757,760,762,765,766,767,770,772,774,777,779,781,784,785,786,788,790,793,796,798,801,804,805,806,808,811,814,816,818,821,823,824,826,828,830,833,836,838,840,842,845,848,850,853,855,857,859,862,865,868,870,873,876,877,878,880,882,885,887,889,892,894,896,899,902,904,907,909,911,914,916,918,921,924,927,930,933,936,939,942,943,944,946,948,951,954,956,959,962,963,964,966,969,972,974,976,979,981,982,984,986,989,992,994,997,1000,1001,1002,1005,1006,1007,1008,1010,1012,1014,1017,1020,1022,1024,1027,1029,1031,1034,1036,1038,1041,1044,1047,1050,1053,1055,1057,1060,1061,1062,1064,1066,1069,1072,1074,1077,1080,1081,1082,1084,1087,1090,1092,1094,1097,1099,1100,1102,1104,1107,1110,1112,1115,1118,1119,1120,1123,1124,1127,1130,1133,1135,1138,1140,1143,1144,1145,1148,1149,1150,1152,1154,1156,1159,1161,1164,1167,1169,1171,1173,1176,1179,1182,1184,1187,1189,1192,1194,1196,1199,1202,1203,1204,1206,1208,1210,1213,1216,1218,1221,1223,1225,1227,1230,1233,1236,1239,1242,1244,1247,1248,1249,1252,1253,1254,1256,1258,1261,1264,1266,1269,1272,1274,1276,1279,1281,1284,1287,1288,1289,1292,1293,1294,1297,1298,1299,1302,1303,1304,1306,1308,1311,1314,1316,1319,1321,1324,1325,1329,1330,1331,1332,1334,1337,1340,1342,1344,1347,1349,1352,1353,1354,1356,1358,1360,1363,1365,1368,1370,1372,1375,1377,1380,1381,1382,1384,1386,1389,1392,1394,1397,1398,1400,1403,1406,1408,1410,1413,1415,1418,1419,1420,1421,1424,1426,1429,1431,1433,1435,1437,1440,1442,1445,1448,1449,1450,1452,1454,1456,1459,1461,1463,1465,1467,1470,1472,1474,1477,1479,1481,1484,1485,1486,1488,1490,1492,1495,1497,1499,1502,1504,1506,1509,1511,1513,1516,1517,1518,1519,1521,1524,1526,1529,1531,1533,1535,1537,1540,1543,1545,1548,1549,1550,1552,1554,1556,1559,1562,1564,1566,1568,1570,1573,1576,1577,1578,1580,1582,1585,1588,1590,1593,1596,1597,1598,1600,1603,1606,1608,1610,1613,1615,1616,1617,1620,1622,1625,1627,1630,1632,1634,1637,1639,1642,1643,1644,1646,1648,1650,1653,1656,1658,1661,1663,1665,1667,1670,1673,1676,1678,1681,1683,1686,1687,1688,1690,1692,1694,1697,1700,1702,1704,1706,1709,1712,1714,1716,1719,1721,1724,1725,1726,1728,1730,1733,1735,1737,1740,1743,1744,1745,1748,1749,1750,1752,1754,1757,1759,1762,1765,1768,1771,1774,1777,1779,1781,1784,1787,1789,1791,1794,1795,1796,1798,1800,1802,1804,1806,1809,1811,1814,1817,1820,1822,1824,1827,1830,1833,1835,1838,1840,1843,1844,1845,1847,1849,1852,1853,1854,1856,1858,1860,1863,1865,1868,1870,1873,1875,1877,1880,1882,1885,1887,1890,1891,1892,1894,1896,1899,1902,1904,1907,1910,1911,1912,1914,1917,1919,1922,1924,1926,1929,1931,1934,1935,1936,1938,1940,1943,1946,1948,1950,1952,1955,1958,1959,1960,1963,1965,1968,1969,1970,1972,1974,1977,1980,1982,1984,1987,1990,1993,1996,1998,2000,2003,2006,2009,2012,2014,2016,2018,2020,2023,2024,2025,2028,2029,2030,2032,2034,2037,2039,2041,2044,2047,2049,2052,2055,2058,2061,2064,2066,2069,2072,2074,2076,2078,2080,2083,2084,2085,2088,2089,2090,2092,2094,2097,2100,2102,2105,2108,2109,2110,2112,2115,2118,2120,2122,2125,2127,2130,2131,2132,2134,2136,2139,2142,2144,2147,2150,2151,2152,2154,2157,2159,2162,2165,2167,2170,2172,2173,2174,2175,2178,2181,2183,2185,2187,2189,2192,2194,2196,2199,2202,2203,2204,2206,2208,2210,2213,2216,2218,2220,2223,2225,2227,2230,2232,2235,2237,2240,2241,2242,2244,2246,2249,2251,2253,2256,2259,2261,2263,2266,2268,2271,2274,2275,2276,2279,2280,2281,2284,2285,2286,2288,2290,2293,2296,2299,2301,2303,2306,2309,2312,2314,2316,2319,2322,2323,2324,2326,2329,2332,2335,2338,2340,2342,2343,2344,2346,2348,2350,2353,2356,2358,2360,2362,2365,2368,2371,2374,2377,2379,2381,2383,2385,2388,2391,2393,2396,2397,2398,2400,2402,2405,2407,2410,2412,2415,2418,2421,2423,2426,2427,2428,2431,2432,2433,2436,2437,2438,2440,2442,2445,2448,2450,2453,2456,2457,2458,2460,2463,2466,2468,2470,2473,2475,2476,2478,2480,2483,2486,2488,2491,2494,2495,2496,2497,2498,2500,2502,2505,2507,2510,2512,2514,2517,2518,2520,2522,2524,2527,2529,2531,2534,2536,2539,2540,2541,2544,2545,2546,2548,2550,2553,2556,2558,2561,2564,2565,2566,2569,2570,2573,2576,2578,2580,2583,2585,2588,2589,2590 2 | -------------------------------------------------------------------------------- /omega-code/stonne/include/OSMeshSDMemory.h: -------------------------------------------------------------------------------- 1 | #ifndef __OSMESHSDMEMORY__H__ 2 | #define __OSMESHSDMEMORY__H__ 3 | 4 | #include 5 | #include "Tile.h" 6 | #include "Connection.h" 7 | #include "Fifo.h" 8 | #include "types.h" 9 | #include "DNNLayer.h" 10 | #include "Unit.h" 11 | #include "Config.h" 12 | #include "DataPackage.h" 13 | #include "Stats.h" 14 | #include "MemoryController.h" 15 | #include "MultiplierNetwork.h" 16 | #include "ReduceNetwork.h" 17 | 18 | 19 | class OSMeshSDMemory : public MemoryController { 20 | private: 21 | DNNLayer* dnn_layer; // Layer loaded in the accelerator 22 | ReduceNetwork* reduce_network; //Reduce network used to be reconfigured 23 | MultiplierNetwork* multiplier_network; //Multiplier network used to be reconfigured 24 | 25 | unsigned int M; 26 | unsigned int N; 27 | 28 | unsigned int K; //Number of columns MK matrix and rows KN matrix. Extracted from dnn_layer->get_C(); 29 | 30 | unsigned int OUT_DIST_VN; //To calculate the output memory address 31 | unsigned int OUT_DIST_VN_ITERATION; //To calculate the memory address 32 | Connection* write_connection; 33 | OSMeshControllerState current_state; //Stage to control what to do according to the state 34 | std::vector configurationVNs; //A set of each VN size mapped onto the architecture. 35 | std::vector vnat_table; //Every element is a VN, indicating the column that is calculating 36 | //Connection* read_connection; 37 | std::vector read_connections; //Input port connections. There are as many connections as n_read_ports are specified. 38 | 39 | //Input parameters 40 | unsigned int ms_rows; 41 | unsigned int ms_cols; 42 | unsigned int n_read_ports; 43 | unsigned int n_write_ports; 44 | unsigned int write_buffer_capacity; 45 | unsigned int port_width; 46 | 47 | unsigned int rows_used; 48 | unsigned int cols_used; 49 | 50 | unsigned int ms_size_per_input_port; 51 | //Fifos 52 | Fifo* write_fifo; //Fifo uses to store the writes before going to the memory 53 | 54 | std::vector input_fifos; //Fifos used to store the inputs before being fetched 55 | std::vector psum_fifos; //Fifos used to store partial psums before being fetched 56 | //Fifo* read_fifo; //Fifo used to store the inputs before being fetched 57 | //Fifo* psums_fifo; //Fifo used to store partial psums before being fetched 58 | 59 | //Addresses 60 | address_t MK_address; 61 | address_t KN_address; 62 | address_t output_address; 63 | 64 | 65 | 66 | //Tile parameters 67 | unsigned int T_N; //Actual value of T_N if adaptive tiling is used 68 | unsigned int T_K; //This is the actual value of tile of K. This is just 1 in this case 69 | unsigned int T_M; 70 | unsigned int iter_N; 71 | unsigned int iter_K; 72 | unsigned int iter_M; 73 | 74 | //Current parameters 75 | unsigned int current_M; 76 | unsigned int current_N; 77 | unsigned int current_K; 78 | 79 | 80 | //Signals 81 | bool configuration_done; //Indicates whether the architecture has been configured to perform the delivering 82 | bool execution_finished; //Flag that indicates when the execution is over. This happens when all the output values have been calculated. 83 | bool iteration_completed; 84 | 85 | bool metadata_loaded; //Flag that indicates whether the metadata has been loaded 86 | bool layer_loaded; //Flag that indicates whether the layer has been loaded. 87 | 88 | 89 | unsigned int current_output; 90 | unsigned int output_size; 91 | 92 | unsigned int current_output_iteration; 93 | unsigned int n_iterations_completed; 94 | unsigned int output_size_iteration; 95 | 96 | //For stats 97 | std::vector write_port_connections; 98 | cycles_t local_cycle; 99 | SDMemoryStats sdmemoryStats; //To track information 100 | 101 | //Aux functions 102 | void receive(); 103 | void send(); 104 | void sendPackageToInputFifos(DataPackage* pck); 105 | std::vector getWritePortConnections() const {return this->write_port_connections;} 106 | 107 | 108 | public: 109 | OSMeshSDMemory(id_t id, std::string name, Config stonne_cfg, Connection* write_connection); 110 | ~OSMeshSDMemory(); 111 | void setLayer(DNNLayer* dnn_layer, address_t KN_address, address_t MK_address, address_t output_address, Dataflow dataflow); 112 | void setTile(Tile* current_tile); 113 | void setReadConnections(std::vector read_connections); 114 | void setWriteConnections(std::vector write_port_connections); //All the write connections must be set at a time 115 | void cycle(); 116 | bool isExecutionFinished(); 117 | 118 | void setSparseMatrixMetadata(metadata_address_t MK_metadata_id, metadata_address_t MK_metadata_pointer) {assert(false);} // Supported by this controller 119 | void setDenseSpatialData(unsigned int T_N, unsigned int T_K) {assert(false);} 120 | void setReduceNetwork(ReduceNetwork* reduce_network) {this->reduce_network=reduce_network;} 121 | //Used to configure the MultiplierNetwork according to the controller 122 | void setMultiplierNetwork(MultiplierNetwork* multiplier_network) {this->multiplier_network = multiplier_network;} 123 | void printStats(std::ofstream& out, unsigned int indent); 124 | void printEnergy(std::ofstream& out, unsigned int indent); 125 | SDMemoryStats getStats() {return this->sdmemoryStats;} 126 | 127 | }; 128 | 129 | 130 | #endif 131 | -------------------------------------------------------------------------------- /omega-code/stonne/src/CollectionBusLine.cpp: -------------------------------------------------------------------------------- 1 | // Created the 4th of november of 2019 by Francisco Munoz Martinez 2 | 3 | #include "CollectionBusLine.h" 4 | #include "utility.h" 5 | 6 | CollectionBusLine::CollectionBusLine(id_t id, std::string name, unsigned int busID, unsigned int input_ports_bus_line, unsigned int connection_width, unsigned int fifo_size) : Unit(id,name) { 7 | this->input_ports=input_ports_bus_line; 8 | this->busID = busID; 9 | //Creating the connections for this bus line 10 | for(int i=0; iinput_ports; i++) { 11 | //Adding the input connection 12 | Connection* input_connection = new Connection(connection_width); 13 | input_connections.push_back(input_connection); 14 | 15 | //Adding the input fifo 16 | Fifo* fifo = new Fifo(fifo_size); 17 | input_fifos.push_back(fifo); 18 | 19 | //Creating the output connection 20 | output_port = new Connection(connection_width); 21 | this->collectionbuslineStats.n_inputs_receive.push_back(0); //To track information 22 | 23 | } 24 | next_input_selected=0; 25 | } 26 | 27 | CollectionBusLine::~CollectionBusLine() { 28 | //First removing the input_connections 29 | for(int i=0; iinput_connections[inputID]; 45 | } 46 | 47 | void CollectionBusLine::receive() { 48 | for(int i=0; iinput_connections.size(); i++) { 49 | if(input_connections[i]->existPendingData()) { 50 | std::vector pck = input_connections[i]->receive(); 51 | for(int j=0; jcollectionbuslineStats.n_inputs_receive[i]+=1; //To track information. Number of packages received by each input line for this output port 53 | input_fifos[i]->push(pck[j]); //Inserting the package into the fifo 54 | } 55 | } 56 | } 57 | } 58 | 59 | 60 | void CollectionBusLine::cycle() { 61 | this->collectionbuslineStats.total_cycles++; 62 | this->receive(); //Receiving packages from the connections 63 | bool selected=false; 64 | unsigned int n_iters = 0; 65 | //To track Information 66 | unsigned int n_inputs_trying=0; 67 | for(int i=0; iisEmpty()) { 69 | n_inputs_trying+=1; 70 | } 71 | } 72 | 73 | this->collectionbuslineStats.n_conflicts_average+=n_inputs_trying; //Later this will be divided by the number of total cycles to calculate the average 74 | if(n_inputs_trying>1) { 75 | this->collectionbuslineStats.n_times_conflicts+=1; //To track information 76 | } 77 | 78 | //End to track information and the actual code to perform the cycle is executed 79 | 80 | std::vector data_to_send; 81 | while(!selected && (n_iters < input_fifos.size())) { //if input not found or there is still data to look up 82 | if(!input_fifos[next_input_selected]->isEmpty()) { //If there is data in this input then 83 | selected=true; 84 | DataPackage* pck = input_fifos[next_input_selected]->pop(); //Poping from the fifo 85 | pck->setOutputPort(this->busID); //Setting tracking information to the package 86 | data_to_send.push_back(pck); //Sending the package to memory 87 | this->collectionbuslineStats.n_sends++; //To track information 88 | } 89 | next_input_selected = (next_input_selected + 1) % input_fifos.size(); 90 | n_iters++; 91 | } 92 | 93 | //Sending the data to the output connection 94 | if(selected) { 95 | this->output_port->send(data_to_send); 96 | 97 | } 98 | 99 | } 100 | 101 | void CollectionBusLine::printStats(std::ofstream& out, unsigned int indent) { 102 | out << ind(indent) << "{" << std::endl; //TODO put ID 103 | this->collectionbuslineStats.print(out, indent+IND_SIZE); 104 | out << ind(indent+IND_SIZE) << ",\"input_fifos_stats\" : [" << std::endl; 105 | for(int i=0; iprintStats(out, indent+IND_SIZE+IND_SIZE+IND_SIZE); 108 | out << ind(indent+IND_SIZE+IND_SIZE) << "}"; 109 | if(i<(input_fifos.size()-1)) { 110 | out << ","; 111 | } 112 | 113 | out << std::endl; 114 | } 115 | out << ind(indent+IND_SIZE) << "]" << std::endl; 116 | out << ind(indent) << "}"; //Take care. Do not print endl here. This is parent responsability 117 | } 118 | 119 | void CollectionBusLine::printEnergy(std::ofstream& out, unsigned int indent) { 120 | /* 121 | This component prints: 122 | - The input wires connected to this output wire 123 | - The input FIFOs to connect every input wire 124 | - The output wire 125 | */ 126 | 127 | //Printing input wires 128 | for(int i=0; iprintEnergy(out, indent, "CB_WIRE"); 131 | } 132 | 133 | //Printing input fifos 134 | for(int i=0; iprintEnergy(out, indent); 137 | } 138 | 139 | //Printing output wire 140 | output_port->printEnergy(out, indent, "CB_WIRE"); 141 | } 142 | -------------------------------------------------------------------------------- /omega-code/stonne/include/MSwitch.h: -------------------------------------------------------------------------------- 1 | //Created 13/06/2019 2 | 3 | #ifndef __MSwitch__h 4 | #define __MSwitch__h 5 | 6 | #include "types.h" 7 | #include "DataPackage.h" 8 | #include "Connection.h" 9 | #include "Fifo.h" 10 | #include "Unit.h" 11 | #include 12 | #include "Config.h" 13 | #include "Stats.h" 14 | /* 15 | */ 16 | 17 | class MSwitch : public Unit { 18 | private: 19 | bool pending_to_compute; // Indicates there is data pending to compute 20 | bool pending_output; 21 | Fifo* activation_fifo; // Package received by the DN 22 | Fifo* forwarding_input_fifo; //Package received from the neighbour 23 | Fifo* forwarding_output_fifo; //Packages accumulated to be sent to the fw links when required 24 | Fifo* weight_fifo; //Weights stored in the MS 25 | Fifo* psum_fifo; //Psum ready to be sent to the parent 26 | 27 | std::vector psums_created; // All the psums created by this multiplier used to delete the package after the execution is finished. 28 | Connection* outputConnection; // Towards the Reduce Network 29 | Connection* inputConnection; //From the DistributionNetwork 30 | Connection* outputForwardingConnection; //To the neighbour MS 31 | Connection* inputForwardingConnection; //Input from the neighbour 32 | cycles_t latency; //latency in number of cycles 33 | int num; 34 | 35 | //This values are in esence the size of a single element in the architecture (by default) 36 | unsigned int input_ports; 37 | unsigned int output_ports; 38 | unsigned int forwarding_ports; 39 | unsigned int buffers_capacity; 40 | unsigned int port_width; 41 | 42 | //Signals 43 | int VN; 44 | bool inputForwardingEnabled; //Control signal that specifies if the input fw link is enabled to receive data 45 | bool outputForwardingEnabled; //Control signal that specifies if the output fw link is enabled to send data 46 | unsigned int n_windows; //Control number that specifies the number of slides (shifts) a MS takes to calculate one row. It is useful to know if the MS has to send/receive a data from the fw link in a specific cycle (the first cycle of a row cannot receive data from neighbours since there is no data of that row. 47 | unsigned int n_folding; //Control the number of partial sums that must be generated to accumulate a whole ofmap value. if n_folding is 1, then partial sums is not required. 48 | 49 | //Counters to perform the control 50 | unsigned int current_n_windows; //Measure the number of windows that have been performed in the current row. 51 | unsigned int current_n_folding; // meausre the number of foldings that have been performed in the current window. This help to know whether to read from the input or from the fw link 52 | bool forward_psum; //Indicates if the behaviour of this MS is to forward a psum. This is useful to implement folding 53 | bool direct_forward_psum; //Always forward the psum. It is different than the variable forward_psum as the last one has some control regarding the number of iterations. 54 | 55 | cycles_t local_cycle; 56 | MSwitchStats mswitchStats; //Object to track the behaviour of the MSwitch 57 | 58 | public: 59 | MSwitch(id_t id, std::string name, int num, Config stonne_cfg); 60 | MSwitch(id_t id, std::string name, int num, Config stonne_cfg, Connection* outputConnection, 61 | Connection* inputConnection); 62 | ~MSwitch(); 63 | void setOutputConnection(Connection* outputConnection); //Set the output connection of the switch (TO THE ADDER) 64 | void setInputForwardingConnection(Connection* inputForwardingConnection); //Set the right connection of the switch 65 | void setOutputForwardingConnection(Connection* outputForwardingConnection); 66 | void setInputConnection(Connection* inputConnection); //Set the input connection of the switch 67 | void send(); //Send the result through the outputConnection 68 | void receive(Connection* connection); //Receive a package from the inputConnection or the forwarding connection and store it in this->data 69 | void forward(DataPackage* activation); 70 | void setVirtualNeuron(unsigned int VN); //Indicates the VN ID assigned 71 | void setInputForwardingEnabled(bool inputForwardingEnabled); //Indicates if the MSwitch receives data from the fw link (from the RIGHT MS) 72 | void setOutputForwardingEnabled(bool outputForwardingEnabled); //Indicates if the MSwitch send data to the fw link (to the LEFT MS) 73 | void setNWindows(unsigned int n_windows); //The number of windows per row which is T_Y_ 74 | void setNFolding(unsigned int n_folding); //The number of partial sums used to accumulate a whole sum 75 | void setForwardPsum(bool forward_psum); // Disable multipliplier function and enable the psum forwarding with the control managed by the multiplier 76 | void setDirectForwardPsum(bool direct_forward_psum); //Disable multiplier function and enable the psum forwarding. In this case, the psum is always forwarded (SIGMA). 77 | Fifo* getActivationFifo() {return this->activation_fifo;} 78 | Fifo* getWeightFifo() {return this->weight_fifo;} 79 | Fifo* getPsumFifo() {return this->psum_fifo;} 80 | DataPackage* perform_operation_2_operands(DataPackage* pck_left, DataPackage* pck_right); //Perform multiplication and returns result. 81 | 82 | void cycle(); //Computing a cyclels 83 | void resetSignals(); 84 | 85 | void printConfiguration(std::ofstream& out, unsigned int indent); //This function prints the configuration of MSwitch such us the VN ID 86 | void printStats(std::ofstream& out, unsigned int indent); 87 | void printEnergy(std::ofstream& out, unsigned int indent); 88 | MSwitchStats getStats() {return this->mswitchStats;} 89 | 90 | 91 | 92 | 93 | }; 94 | 95 | #endif 96 | 97 | -------------------------------------------------------------------------------- /omega-code/stonne/include/STONNEModel.h: -------------------------------------------------------------------------------- 1 | #ifndef STONNEMODEL_H_ 2 | #define STONNEMODEL_H_ 3 | 4 | #include 5 | //#include "RSNetwork.h" 6 | #include "MSNetwork.h" 7 | #include "DSNetworkTop.h" 8 | #include "ASNetwork.h" 9 | #include "SDMemory.h" 10 | #include "Connection.h" 11 | #include "LookupTable.h" 12 | #include "CollectionBus.h" 13 | #include "Config.h" 14 | #include "CompilerART.h" 15 | #include "CompilerMSN.h" 16 | #include "ReduceNetwork.h" 17 | #include "DistributionNetwork.h" 18 | #include "FENetwork.h" 19 | #include "MemoryController.h" 20 | #include "SparseSDMemory.h" 21 | #include "SparseDenseSDMemory.h" 22 | #include "TemporalRN.h" 23 | #include "OSMeshSDMemory.h" 24 | #include "OSMeshMN.h" 25 | 26 | class Stonne { 27 | private: 28 | //Hardware paramenters 29 | Config stonne_cfg; 30 | unsigned int ms_size; //Number of multipliers 31 | unsigned int n_adders; //Number of adders obtained from ms_size 32 | DistributionNetwork* dsnet; //Distribution Network 33 | MultiplierNetwork* msnet; //Multiplier Network 34 | ReduceNetwork* asnet; //ART Network 35 | LookupTable* lt; //Lookuptable 36 | MemoryController* mem; //MemoryController abstraction (e.g., SDMemory from MAERI) 37 | Bus* collectionBus; //CollectionBus 38 | Connection* outputASConnection; //The last connection of the AS and input to the lookuptable 39 | Connection* outputLTConnection; //Output of the lookup table connection and write port to the SDMemory 40 | Connection** addersBusConnections; //Array of output connections between the adders and the bus 41 | Connection** BusMemoryConnections; //Array of output Connections between the bus and the memory. (Write output ports) 42 | 43 | //Software parameters 44 | DNNLayer* dnn_layer; 45 | Tile* current_tile; 46 | bool layer_loaded; //Indicates if the function loadDNN 47 | bool tile_loaded; 48 | 49 | //Connection and cycle functions 50 | void connectMemoryandDSN(); 51 | void connectMSNandDSN(); //Function to connect the multiplieers of the MSN to the last level switches in the DSN. 52 | void connectMSNandASN(); 53 | void connectASNandBus(); //Connect the adders to the Collection bus 54 | void connectBusandMemory(); //Connect the bus and the memory write ports. 55 | void cycle(); 56 | void printStats(); 57 | void printEnergy(); 58 | void printGlobalStats(std::ofstream& out, unsigned int indent); 59 | 60 | // DEBUG PARAMETERS 61 | unsigned long time_ds; 62 | unsigned long time_ms; 63 | unsigned long time_as; 64 | unsigned long time_lt; 65 | unsigned long time_mem; 66 | //DEBUG functions 67 | void testDSNetwork(unsigned int num_ms); 68 | void testTile(unsigned int num_ms); 69 | void testMemory(unsigned int num_ms); 70 | 71 | //Statistics 72 | unsigned int n_cycles; 73 | 74 | 75 | public: 76 | Stonne (Config stonne_cfg); 77 | ~Stonne(); 78 | 79 | void loadDNNLayer(Layer_t layer_type, std::string layer_name, unsigned int R, unsigned int S, unsigned int C, unsigned int K, unsigned int G, unsigned int N, unsigned int X, unsigned int Y, unsigned int strides, address_t input_address, address_t filter_address, address_t output_address, Dataflow dataflow); //General constructor 80 | 81 | //Load CONV Layer. At the end this calls to the general constructor with all the parameters 82 | void loadCONVLayer(std::string layer_name, unsigned int R, unsigned int S, unsigned int C, unsigned int K, unsigned int G, unsigned int N, unsigned int X, unsigned int Y, unsigned int strides, address_t input_address, address_t filter_address, address_t output_address); 83 | 84 | //Load FC layer just with the appropiate parameters 85 | //N = batch size (i.e., number of rows in input matrix); S=number of inputs per batch (i.e., column size in input matrix and weight matrix); K=number of outputs neurons (i.e, number of rows weight matrix) 86 | void loadFCLayer(std::string layer_name, unsigned int N, unsigned int S, unsigned int K, address_t input_address, address_t filter_address, address_t output_address); 87 | 88 | //Load Sparse GEMM onto STONNE according to SIGMA parameter taxonomy. 89 | void loadGEMM(std::string layer_name, unsigned int N, unsigned int K, unsigned int M, address_t MK_matrix, address_t KN_matrix, metadata_address_t MK_metadata, metadata_address_t KN_metadata, address_t output_matrix, metadata_address_t output_metadata, Dataflow dataflow); 90 | 91 | //Load Dense GEMM onto STONNE according to SIGMA parameter taxonomy and tiling according to T_N, T_K and T_M 92 | void loadDenseGEMM(std::string layer_name, unsigned int N, unsigned int K, unsigned int M, address_t MK_matrix, address_t KN_matrix, address_t output_matrix, Dataflow dataflow); 93 | 94 | //Load sparse-dense GEMM onto STONNE 95 | void loadSparseDense(std::string layer_name, unsigned int N, unsigned int K, unsigned int M, address_t MK_matrix, address_t KN_matrix, metadata_address_t MK_metadata_id, metadata_address_t MK_metadata_pointer, address_t output_matrix, unsigned int T_N, unsigned int T_K); 96 | 97 | //Load a Dense GEMM tile to run it using the loadDenseGEMM function 98 | void loadGEMMTile(unsigned int T_N, unsigned int T_K, unsigned int T_M); 99 | 100 | void loadTile(unsigned int T_R, unsigned int T_S, unsigned int T_C, unsigned int T_K, unsigned int T_G, unsigned int T_N, unsigned int T_X_, unsigned int T_Y_); //Load general and CONV tile 101 | void loadFCTile(unsigned int T_S, unsigned int T_N, unsigned int T_K); //VNSize = T_S, NumVNs= T_N*T_K 102 | void run(); 103 | 104 | 105 | unsigned int getCycles(){return this->n_cycles;} 106 | void loadClocking(unsigned int* clocked_op); 107 | //Getting stats from code 108 | SDMemoryStats getMemoryStats(); 109 | MSNetworkStats getMultiplierNetworkStats(); 110 | ASNetworkStats getASNetworkStats(); 111 | DSNetworkStats getDSNetworkStats(); 112 | 113 | }; 114 | 115 | #endif 116 | //TO DO add enumerate. 117 | -------------------------------------------------------------------------------- /omega-code/stonne/src/AccumulationBuffer.cpp: -------------------------------------------------------------------------------- 1 | // 2 | // Created by Francisco Munoz on 19/06/19. 3 | // 4 | #include "AccumulationBuffer.h" 5 | #include 6 | #include "utility.h" 7 | #include 8 | 9 | //TODO Conectar los enlaces intermedios de forwarding 10 | //This Constructor creates the reduction tree similar to the one shown in the paper 11 | AccumulationBuffer::AccumulationBuffer(id_t id, std::string name, Config stonne_cfg, unsigned int n_accumulators) : Unit(id, name) { 12 | // Collecting the parameters from configuration file 13 | this->port_width=stonne_cfg.m_ASwitchCfg.port_width; 14 | this->n_accumulators = n_accumulators; //Number of accumulators 15 | std::string name_str = "accumulator "; 16 | for(int i=0; in_accumulators; i++) { 17 | std::string name_acc = name_str+=i; 18 | Accumulator* acc = new Accumulator(i, name_acc, stonne_cfg, i); 19 | accumulatortable[i]=acc; 20 | } 21 | 22 | 23 | } 24 | 25 | AccumulationBuffer::~AccumulationBuffer() { 26 | for(int i=0; in_accumulators; i++) { 27 | delete accumulatortable[i]; 28 | } 29 | } 30 | 31 | 32 | void AccumulationBuffer::setMemoryConnections(std::vector memoryConnections) { 33 | for(int i=0; in_accumulators; i++) { 34 | outputconnectiontable[i]=memoryConnections[i]; 35 | accumulatortable[i]->setOutputConnection(memoryConnections[i]); 36 | } 37 | } 38 | 39 | void AccumulationBuffer::setInputConnections(std::vector inputConnections) { 40 | for(int i=0; in_accumulators; i++) { 41 | inputconnectiontable[i]=inputConnections[i]; 42 | accumulatortable[i]->setInputConnection(inputConnections[i]); 43 | } 44 | } 45 | 46 | void AccumulationBuffer::configureSignals(Tile* current_tile, DNNLayer* dnn_layer, unsigned int ms_size, unsigned int n_folding) { 47 | 48 | this->NPSumsConfiguration(n_folding); //All the accumulation buffers have the same folding iteration numbers which means that in this case all the VNs are similar 49 | 50 | } 51 | 52 | void AccumulationBuffer::resetSignals() { 53 | for(std::map::iterator it=accumulatortable.begin(); it != accumulatortable.end(); ++it) { 54 | it->second->resetSignals(); 55 | } 56 | 57 | } 58 | 59 | void AccumulationBuffer::NPSumsConfiguration(unsigned int n_psums) { 60 | for(std::map::iterator it=accumulatortable.begin(); it != accumulatortable.end(); ++it) { 61 | it->second->setNPSums(n_psums); 62 | } 63 | 64 | 65 | } 66 | 67 | void AccumulationBuffer::cycle() { 68 | for(int i=0; in_accumulators; i++) { 69 | accumulatortable[i]->cycle(); 70 | } 71 | } 72 | 73 | //Print configuration of the ASNetwork 74 | void AccumulationBuffer::printConfiguration(std::ofstream& out, unsigned int indent) { 75 | 76 | } 77 | 78 | //Printing stats 79 | void AccumulationBuffer::printStats(std::ofstream& out, unsigned int indent) { 80 | out << ind(indent) << "\"AccumulationBufferStats\" : {" << std::endl; 81 | out << ind(indent+IND_SIZE) << "\"AccumulatorStats\" : [" << std::endl; 82 | for(int i=0; i < this->n_accumulators; i++) { 83 | Accumulator* ac = accumulatortable[i]; 84 | ac->printStats(out, indent+IND_SIZE+IND_SIZE); 85 | if(i==(this->n_accumulators-1)) { //If I am in the last accumulator, the comma to separate the accumulators is not added 86 | out << std::endl; //This is added because the call to acc print do not show it (to be able to put the comma, if neccesary) 87 | } 88 | else { 89 | out << "," << std::endl; //Comma and line break are added to separate with the next accumulator in the array 90 | } 91 | 92 | 93 | 94 | } 95 | out << ind(indent+IND_SIZE) << "]" << std::endl; 96 | out << ind(indent) << "}"; 97 | 98 | } 99 | 100 | void AccumulationBuffer::printEnergy(std::ofstream& out, unsigned int indent) { 101 | /* 102 | The AccumulationBuffer component prints the counters for the next subcomponents: 103 | - Accumualators 104 | - wires that connect each accumulator with its AdderSwitch 105 | 106 | Note that the wires that connect with memory are not taken into account in this component. This is done in the CollectionBus. 107 | 108 | */ 109 | 110 | //Printing the input wires 111 | 112 | for(std::map::iterator it=inputconnectiontable.begin(); it != inputconnectiontable.end(); ++it) { 113 | Connection* conn = inputconnectiontable[it->first]; 114 | conn->printEnergy(out, indent, "RN_WIRE"); 115 | } 116 | 117 | 118 | //Printing the Accumulator energy stats and their fifos stats 119 | for(std::map::iterator it=accumulatortable.begin(); it != accumulatortable.end(); ++it) { 120 | Accumulator* acc = accumulatortable[it->first]; //index 121 | acc->printEnergy(out, indent); //Setting the direction 122 | } 123 | 124 | 125 | } 126 | 127 | AccumulationBufferStats AccumulationBuffer::getStats() { 128 | AccumulationBufferStats accumulationbufferStats; 129 | //Collecting accumulator stats 130 | for(std::map::iterator it=accumulatortable.begin(); it != accumulatortable.end(); ++it) { 131 | Accumulator* acc = accumulatortable[it->first]; //index 132 | AccumulatorStats accumulatorStats = acc->getStats(); 133 | accumulationbufferStats.n_adds+=accumulatorStats.n_adds; 134 | accumulationbufferStats.n_memory_send+=accumulatorStats.n_memory_send; 135 | accumulationbufferStats.n_receives+=accumulatorStats.n_receives; 136 | accumulationbufferStats.n_register_reads+=accumulatorStats.n_register_reads; 137 | accumulationbufferStats.n_register_writes+=accumulatorStats.n_register_writes; 138 | accumulationbufferStats.n_configurations+=accumulatorStats.n_configurations; 139 | } 140 | 141 | return accumulationbufferStats; 142 | } 143 | -------------------------------------------------------------------------------- /omega-code/stonne/src/Tile.cpp: -------------------------------------------------------------------------------- 1 | //Created by Francisco Munoz Martinez on 26/06/2019 2 | 3 | #include "Tile.h" 4 | #include "utility.h" 5 | #include 6 | #include "types.h" 7 | #include 8 | #include "cpptoml.h" 9 | 10 | 11 | 12 | 13 | //Used to create a convolutional tile 14 | Tile::Tile(unsigned int T_R, unsigned int T_S, unsigned int T_C, unsigned int T_K, unsigned int T_G, unsigned int T_N, unsigned int T_X_, unsigned int T_Y_, bool folding) { 15 | this->T_R = T_R; 16 | this->T_S = T_S; 17 | this->T_C = T_C; 18 | this->T_K = T_K; 19 | this->T_G = T_G; 20 | this->T_N = T_N; 21 | this->T_X_ = T_X_; 22 | this->T_Y_ = T_Y_; 23 | 24 | this->VN_Size = T_R*T_S*T_C; 25 | this->Num_VNs = T_K*T_G*T_N*T_X_*T_Y_; 26 | this->folding = folding; 27 | if(this->folding) { 28 | this->VN_Size+=1; //1 MS extra to psum accumulation 29 | } 30 | } 31 | 32 | 33 | 34 | 35 | Tile::Tile(std::string tile_file) { 36 | auto config = cpptoml::parse_file(tile_file); //Creating object to parse 37 | auto tile_type=config->get_as("tile_type"); 38 | auto T_R=config->get_as("T_R"); 39 | auto T_S=config->get_as("T_S"); 40 | auto T_C=config->get_as("T_C"); 41 | auto T_K=config->get_as("T_K"); 42 | auto T_G=config->get_as("T_G"); 43 | auto T_N=config->get_as("T_N"); 44 | auto T_X_=config->get_as("T_X'"); 45 | auto T_Y_=config->get_as("T_Y'"); 46 | 47 | if(!tile_type) { 48 | std::cout << "Error to parse tile_type. Parameter not found" << std::endl; 49 | exit(1); 50 | } 51 | 52 | if(*tile_type=="CONV") { //Actually the architecture does not know about the layer type. This is just to make sure that the user introduces the 53 | //appropiate parameters. 54 | std::cout << "Reading a tile of type CONV" << std::endl; 55 | } 56 | 57 | else if(*tile_type=="FC") { 58 | std::cout << "Reading a tile of type FC" << std::endl; 59 | } 60 | 61 | else { 62 | std::cout << "Error to parse tile_type. Specify a correct type: [CONV, FC, POOL]" << std::endl; 63 | exit(1); 64 | } 65 | 66 | if(*tile_type=="CONV") { 67 | if(!T_R) { 68 | std::cout << "Error to parse T_R. Value not found." << std::endl; 69 | exit(1); 70 | } 71 | 72 | if(!T_S) { 73 | std::cout << "Error to parse T_S. Value not found." << std::endl; 74 | exit(1); 75 | } 76 | 77 | if(!T_C) { 78 | std::cout << "Error to parse T_C. Value not found." << std::endl; 79 | exit(1); 80 | } 81 | 82 | if(!T_K) { 83 | std::cout << "Error to parse T_K. Value not found." << std::endl; 84 | exit(1); 85 | } 86 | 87 | if(!T_G) { 88 | std::cout << "Error to parse T_G. Value not found." << std::endl; 89 | exit(1); 90 | } 91 | 92 | if(!T_N) { 93 | std::cout << "Error to parse T_N. Value not found." << std::endl; 94 | exit(1); 95 | } 96 | 97 | if(!T_X_) { 98 | std::cout << "Error to parse T_X'. Value not found." << std::endl; 99 | exit(1); 100 | } 101 | 102 | if(!T_Y_) { 103 | std::cout << "Error to parse T_Y'. Value not found." << std::endl; 104 | exit(1); 105 | } 106 | 107 | 108 | 109 | //Filling the parameters 110 | this->T_R = *T_R; 111 | this->T_S = *T_S; 112 | this->T_C = *T_C; 113 | this->T_K = *T_K; 114 | this->T_G = *T_G; 115 | this->T_N = *T_N; 116 | this->T_X_ = *T_X_; 117 | this->T_Y_ = *T_Y_; 118 | 119 | } 120 | 121 | else if(*tile_type=="FC") { 122 | 123 | if(!T_N) { 124 | std::cout << "Error to parse T_N. Value not found." << std::endl; 125 | exit(1); 126 | } 127 | 128 | if(!T_S) { 129 | std::cout << "Error to parse T_S. Value not found." << std::endl; 130 | exit(1); 131 | } 132 | 133 | if(!T_K) { 134 | std::cout << "Error to parse T_K. Value not found." << std::endl; 135 | exit(1); 136 | } 137 | 138 | //Filling the parameters 139 | this->T_R = 1; 140 | this->T_S=*T_S; 141 | this->T_C=1; 142 | this->T_K=*T_K; 143 | this->T_G=1; 144 | this->T_N=*T_N; 145 | this->T_X_=1; 146 | this->T_Y_=1; 147 | 148 | 149 | 150 | } 151 | 152 | //Folding is not specified in this case since this use case is not to load the tile into the architecture. Rather, it is to load the tile from the file and layer specify all the parameters 153 | // to the architecture by means of some abstractions like an instruction. 154 | 155 | 156 | 157 | } //End constructor 158 | 159 | 160 | 161 | void Tile::printConfiguration(std::ofstream& out, unsigned int indent) { 162 | out << ind(indent) << "\"TileConfiguration\" : {" << std::endl; 163 | out << ind(indent+IND_SIZE) << "\"T_R\" : " << this->T_R << "," << std::endl; 164 | out << ind(indent+IND_SIZE) << "\"T_S\" : " << this->T_S << "," << std::endl; 165 | out << ind(indent+IND_SIZE) << "\"T_C\" : " << this->T_C << "," << std::endl; 166 | out << ind(indent+IND_SIZE) << "\"T_K\" : " << this->T_K << "," << std::endl; 167 | out << ind(indent+IND_SIZE) << "\"T_G\" : " << this->T_G << "," << std::endl; 168 | out << ind(indent+IND_SIZE) << "\"T_N\" : " << this->T_N << "," << std::endl; 169 | out << ind(indent+IND_SIZE) << "\"T_X_\" : " << this->T_X_ << "," << std::endl; 170 | out << ind(indent+IND_SIZE) << "\"T_Y_\" : " << this->T_Y_ << "," << std::endl; 171 | out << ind(indent+IND_SIZE) << "\"VN_Size\" : " << this->VN_Size << "," << std::endl; 172 | out << ind(indent+IND_SIZE) << "\"Num_VNs\" : " << this->Num_VNs << "," << std::endl; 173 | out << ind(indent+IND_SIZE) << "\"folding_enabled\" : " << this->folding << std::endl; 174 | //out << ind(indent+IND_SIZE) << "\"n_folding\" : " << this->n_folding << std::endl; 175 | 176 | out << ind(indent) << "}"; 177 | } 178 | 179 | 180 | 181 | 182 | 183 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | # OMEGA GNN Cost Modeling Framework 4 | 5 | OMEGA (***O***bserving ***M***apping ***E***fficiency over ***G***NN ***A***ccelerator) framework is the cost model for inter-phase Graph Neural Network (GNN) dataflows. OMEGA can be used to model other SpMM and GEMM multiphase dataflows as well. 6 | 7 | # GNN Dataflow Analysis Using OMEGA Framework 8 | 9 | 10 | 11 | GNNs are becoming increasingly popular because of their ability to accurately learn representations from graph structured data. GNN inference runtime is dominated by two phases: (1) *Aggregation* which is an SpMM computation with irregular, workload dependent data accesses, and (2) *Combination* computations that can be cast as GEMMs, similar to dense DNNs as shown in the figure above. Prior works on DNN dataflow studies have described the data orchestration and data movement in DNN accelerators. However, these works only model dense computations and model one GEMM or convolution operation at a time. GNNs offer an additional knob of pipelining between the two phases which also leads to interdependence of the two dataflows. 12 | 13 | ## Taxonomy for GNN Dataflows 14 | 15 | We aim to provide analysis of the design-space of GNN dataflows over flexible accelerator (for example - MAERI) which captures both individual phase dataflows (Intra-phase dataflows) and dataflows between the two phases (Inter-phase dataflows). 16 | To enable this, we propose a taxonomy that expresses: (1) *Aggregation intra-phase dataflow* (2) *Combination intra-phase} dataflow* (3) *Inter-phase strategy*, and (4) *phase ordering* targetting a flexible accelerator like [MAERI](https://dl.acm.org/doi/pdf/10.1145/3173162.3173176) which can support execution of all possible dataflows. OMEGA is a cost model that models performance and energy for the GNN dataflows. 17 | 18 | ## OMEGA Framework 19 | 20 | OMEGA is built around [STONNE simulator](https://stonne-simulator.github.io), [STONNE codebase](https://github.com/stonne-simulator/stonne) 21 | 22 | It instantiates SpMM and GEMM on STONNE's flexible accelerator model [MAERI](https://dl.acm.org/doi/pdf/10.1145/3173162.3173176) and feeds the statistics to an inter-phase cost model that returns the metrics of a pipelined inter-phase dataflow as shown in Figure below. 23 | 24 | 25 | 26 | Regarding energy, OMEGA reports the global buffer and register file accesses. For energy model, please refer to the paper. 27 | 28 | # Resources 29 | 30 | ## Publication 31 | 32 | For more details, please refer to our [IPDPS paper](https://ieeexplore.ieee.org/abstract/document/9820725). ArXiv Link - [here](https://arxiv.org/abs/2103.07977) 33 | 34 | Update: The paper is published in IPDPS 2022 and was nominated for the best paper award (Top 5 from 474 submissions). 35 | 36 | ## Bibtex 37 | If you use OMEGA and/or our GNN dataflow taxonomy in your reseach, please cite- 38 | ``` 39 | @INPROCEEDINGS{garg2022understanding, 40 | author={Garg, Raveesh and Qin, Eric and Muñoz-Matrínez, Francisco and Guirado, Robert and Jain, Akshay and Abadal, Sergi and Abellán, José L. and Acacio, Manuel E. and Alarcón, Eduard and Rajamanickam, Sivasankaran and Krishna, Tushar}, 41 | booktitle={2022 IEEE International Parallel and Distributed Processing Symposium (IPDPS)}, 42 | title={Understanding the Design-Space of Sparse/Dense Multiphase GNN dataflows on Spatial Accelerators}, 43 | year={2022}, 44 | volume={}, 45 | number={}, 46 | pages={571-582}, 47 | doi={10.1109/IPDPS53621.2022.00062}} 48 | ``` 49 | 50 | 51 | # Tutorials 52 | 53 | ## Docker Image 54 | 55 | We have created a docker image for OMEGA for the purpose of ASPLOS tutorial! This is the most stable version. Everything is installed in the image so using the simulator is much easier. Just type the next docker command to download and run the image: 56 | 57 | ``` 58 | docker run -it stonnesimulator/stonne-simulators 59 | cd OMEGA/omega-code 60 | ``` 61 | 62 | ## ASPLOS 2023 63 | 64 | OMEGA was presented at ASPLOS 2023 [STONNE+OMEGA Tutorial](https://stonne-simulator.github.io/ASPLOSTUT.html). It was also presented in ASPLOS 2022 tutorial in past. 65 | 66 | For OMEGA demo and video presentation of GNN Dataflows, please refer to [this video](https://www.youtube.com/watch?v=GbbdnrTdnEo&t=9742s) 67 | 68 | # Codebase - Master Branch 69 | 70 | For the most stable and most convenient to install codebase, please refer to the docker image used in the tutorial. 71 | 72 | Most stable version of this github repo will always be "master" 73 | 74 | # Dependencies 75 | 76 | Please refer to the C++ and python requirements in the [STONNE codebase](https://github.com/stonne-simulator/stonne) 77 | 78 | # Documentation 79 | 80 | ``` 81 | omega-code (Directory which has the Makefile and the example scripts) 82 | |-->sample_graphs (Contains example input graphs in csr format) 83 | |-->stonne (Directory with stonne codebase and modifications for OMEGA) 84 | |--> src (Contains stonne cpp files and modifications for OMEGA) 85 | |-->omega.cpp (Wrapper that calls the stonne instances and has the analytical model) 86 | ``` 87 | Please refer to the [STONNE simulator](https://github.com/stonne-simulator/stonne) for details on simulation of an individual kernel. OMEGA is a wrapper around the STONNE simulator that instanciates SpMM and GEMM simulation, takes the individual kernel statistics and applies analytical equations on these statistics to return the statistics for Inter-phase dataflows. 88 | 89 | OMEGA takes the following inputs 90 | 91 |
    92 |
  • Dimensions 93 | 94 | * -V, -F, -G, -E (Edges, required for parsing) 95 |
  • Tile sizes for both phases. VF matrix is shared across phases so we use 'a' and 'c' to refer to the phase for which tile size is being specified 96 | 97 | * -T_Va, -T_N, -T_Fa, -T_Vc, -T_G, -T_Fc 98 |
  • Hardware Parameters 99 | 100 | * -Pe_agg, -Pe_cmb, -dn_bw_agg, -dn_bw_cmb, -rn_bw_agg, -rn_bw_cmb 101 |
  • Path to Input files for the adjacency matrix (CSR representation). Refer to sample_graphs directory 102 | 103 | * -vertex_path, edge_path 104 |
105 | 106 | An example command is as follows: 107 | 108 | ``` 109 | ./omega -V=1168 -F=28 -G=2 -E=2590 -T_Va=18 -T_N=1 -T_Fa=28 -T_Vc=18 -T_G=1 -T_Fc=28 -pe_agg=512 -pe_cmb=512 -dn_bw_agg=512 -rn_bw_agg=512 -dn_bw_cmb=512 -rn_bw_cmb=512 -vertex_path="sample_graphs/vertex_mutag_batch64.txt" -edge_path="sample_graphs/edge_mutag_batch64.txt" 110 | ``` 111 | 112 | For running an example simulation, go to the omega_code directory and first compile. 113 | 114 | ``` 115 | cd omega-code 116 | 117 | make all 118 | 119 | source example_simulation.sh 120 | ``` 121 | -------------------------------------------------------------------------------- /omega-code/stonne/include/SparseSDMemory.h: -------------------------------------------------------------------------------- 1 | #ifndef __SPARSESDMEMORY__H__ 2 | #define __SPARSESDMEMORY__H__ 3 | 4 | #include 5 | #include "Tile.h" 6 | #include "Connection.h" 7 | #include "Fifo.h" 8 | #include "types.h" 9 | #include "DNNLayer.h" 10 | #include "Unit.h" 11 | #include "Config.h" 12 | #include "DataPackage.h" 13 | #include "Stats.h" 14 | #include "MemoryController.h" 15 | #include "MultiplierNetwork.h" 16 | #include "ReduceNetwork.h" 17 | 18 | 19 | class SparseSDMemory : public MemoryController { 20 | private: 21 | DNNLayer* dnn_layer; // Layer loaded in the accelerator 22 | ReduceNetwork* reduce_network; //Reduce network used to be reconfigured 23 | MultiplierNetwork* multiplier_network; //Multiplier network used to be reconfigured 24 | 25 | unsigned int M; 26 | unsigned int N; 27 | Dataflow dataflow; 28 | unsigned int dim_sta; //Number of vectors sta matrix. Extracted from dnn_layer->get_K(); (See equivalence with CNN) 29 | unsigned int K; //Number of columns MK matrix and rows KN matrix. Extracted from dnn_layer->get_S(); 30 | unsigned int dim_str; //Number of vectors str matrix. Extracted from dnn_layer->get_N() 31 | unsigned int STA_DIST_ELEM; //Distance in bitmap memory between two elements of the same vector 32 | unsigned int STA_DIST_VECTOR; //Disctance in bitmap memory between two elements of differ vectors. 33 | 34 | unsigned int STR_DIST_ELEM; //Idem than before but with the STR matrix 35 | unsigned int STR_DIST_VECTOR; 36 | 37 | unsigned int OUT_DIST_VN; //To calculate the output memory address 38 | unsigned int OUT_DIST_VN_ITERATION; //To calculate the memory address 39 | Connection* write_connection; 40 | SparsityControllerState current_state; //Stage to control what to do according to the state 41 | std::vector configurationVNs; //A set of each VN size mapped onto the architecture. 42 | std::vector vnat_table; //Every element is a VN, indicating the column that is calculating 43 | //Connection* read_connection; 44 | std::vector read_connections; //Input port connections. There are as many connections as n_read_ports are specified. 45 | 46 | //Input parameters 47 | unsigned int num_ms; 48 | unsigned int n_read_ports; 49 | unsigned int n_write_ports; 50 | unsigned int write_buffer_capacity; 51 | unsigned int port_width; 52 | 53 | unsigned int ms_size_per_input_port; 54 | //Fifos 55 | Fifo* write_fifo; //Fifo uses to store the writes before going to the memory 56 | 57 | std::vector input_fifos; //Fifos used to store the inputs before being fetched 58 | std::vector psum_fifos; //Fifos used to store partial psums before being fetched 59 | //Fifo* read_fifo; //Fifo used to store the inputs before being fetched 60 | //Fifo* psums_fifo; //Fifo used to store partial psums before being fetched 61 | 62 | //Addresses 63 | address_t STA_address; 64 | address_t STR_address; 65 | address_t output_address; 66 | 67 | 68 | 69 | //Metadata addresses 70 | metadata_address_t STA_metadata; 71 | metadata_address_t STR_metadata; 72 | metadata_address_t output_metadata; 73 | 74 | //Counters to calculate SRC and DST 75 | unsigned int* sta_counters_table; //Matrix of size rows*columns to figure out the dst of each sta value 76 | unsigned int* str_counters_table; //Matrix of size rows*columns of the str matrix to calculate the source of each bit enabled. 77 | 78 | //Pointers 79 | unsigned int str_current_index; //Streaming current index to calculate the next values to stream 80 | unsigned int sta_current_index_metadata; //Stationary matrix current index (e.g., row in MK) 81 | unsigned int sta_current_index_matrix; //Index to next element in the sparse matrix 82 | unsigned int sta_current_j_metadata; //Index to current element in the same cluster. Used to manage folding 83 | unsigned int sta_last_j_metadata; //Indext to last element in the same cluster. Used to manage folding 84 | //the boundaries of a certain fold is sta_current_j_metadata and sta_last_j_metadata 85 | 86 | //Signals 87 | bool configuration_done; //Indicates whether the architecture has been configured to perform the delivering 88 | bool stationary_distributed; //Indicates if the stationary values has been distributed for a certain iteration 89 | bool stationary_finished; //Flag that indicates that all the stationary values have been delivered 90 | bool stream_finished; //Flag that indicates that all the streaming values have been delivered 91 | bool execution_finished; //Flag that indicates when the execution is over. This happens when all the output values have been calculated. 92 | bool sta_iter_completed; //Indicates if the pending psums have been writen back 93 | 94 | 95 | bool metadata_loaded; //Flag that indicates whether the metadata has been loaded 96 | bool layer_loaded; //Flag that indicates whether the layer has been loaded. 97 | 98 | 99 | unsigned int current_output; 100 | unsigned int output_size; 101 | 102 | unsigned int current_output_iteration; 103 | unsigned int output_size_iteration; 104 | 105 | //For stats 106 | unsigned int n_ones_sta_matrix; 107 | unsigned int n_ones_str_matrix; 108 | std::vector write_port_connections; 109 | cycles_t local_cycle; 110 | SDMemoryStats sdmemoryStats; //To track information 111 | 112 | //Aux functions 113 | void receive(); 114 | void send(); 115 | void sendPackageToInputFifos(DataPackage* pck); 116 | std::vector getWritePortConnections() const {return this->write_port_connections;} 117 | 118 | 119 | public: 120 | SparseSDMemory(id_t id, std::string name, Config stonne_cfg, Connection* write_connection); 121 | ~SparseSDMemory(); 122 | void setLayer(DNNLayer* dnn_layer, address_t KN_address, address_t MK_address, address_t output_address, Dataflow dataflow); 123 | void setTile(Tile* current_tile) {assert(false);} 124 | void setReadConnections(std::vector read_connections); 125 | void setWriteConnections(std::vector write_port_connections); //All the write connections must be set at a time 126 | void cycle(); 127 | bool isExecutionFinished(); 128 | 129 | void setSparseMetadata(metadata_address_t MK_metadata, metadata_address_t KN_metadata, metadata_address_t output_metadata); // Supported by this controller 130 | void setReduceNetwork(ReduceNetwork* reduce_network) {this->reduce_network=reduce_network;} 131 | //Used to configure the MultiplierNetwork according to the controller 132 | void setMultiplierNetwork(MultiplierNetwork* multiplier_network) {this->multiplier_network = multiplier_network;} 133 | void printStats(std::ofstream& out, unsigned int indent); 134 | void printEnergy(std::ofstream& out, unsigned int indent); 135 | SDMemoryStats getStats() {return this->sdmemoryStats;} 136 | 137 | }; 138 | 139 | 140 | #endif //SPARSESDMEMORY_H_ 141 | -------------------------------------------------------------------------------- /omega-code/stonne/src/CompilerMSN.cpp: -------------------------------------------------------------------------------- 1 | #include "CompilerMSN.h" 2 | #include "Tile.h" 3 | #include "utility.h" 4 | #include 5 | #include "types.h" 6 | #include 7 | #include "cpptoml.h" 8 | 9 | void CompilerMSN::configureSignals(Tile* current_tile, DNNLayer* dnn_layer, unsigned int num_ms, unsigned int n_folding) { 10 | assert(ispowerof2(num_ms)); 11 | assert(current_tile->get_VN_Size()*current_tile->get_Num_VNs() <= num_ms); 12 | this->current_tile = current_tile; 13 | this->dnn_layer = dnn_layer; 14 | this->num_ms = num_ms; 15 | this->n_folding=n_folding; 16 | this->signals_configured = true; 17 | //Configuring Multiplier switches 18 | this->generate_ms_signals(num_ms); 19 | 20 | } 21 | 22 | void CompilerMSN::configureSparseSignals(std::vector sparseVNs, DNNLayer* dnn_layer, unsigned int num_ms) { 23 | assert(ispowerof2(num_ms)); 24 | //Checking if there are enough multipliers 25 | int num_ms_used = 0; 26 | for(int i=0; isparseVNs = sparseVNs; 32 | this->dnn_layer = dnn_layer; 33 | this->num_ms = num_ms; 34 | this->signals_configured = true; 35 | //Configuring Multiplier switches 36 | this->generate_ms_sparse_signals(num_ms); 37 | 38 | } 39 | 40 | 41 | void CompilerMSN::generate_ms_signals(unsigned int num_ms) { 42 | //1. Indicating to each MS its corresponding VN ID 43 | //Saving the number of iterations needed. Used in the cases where an extra MS is needed 44 | //unsigned int n_folding=(this->dnn_layer->get_R() / this->current_tile->get_T_R())*(this->dnn_layer->get_S() / this->current_tile->get_T_S()) * (this->dnn_layer->get_C() / this->current_tile->get_T_C()); 45 | for(int i=0; i < this->current_tile->get_Num_VNs(); i++) { 46 | for(int j=0; j < this->current_tile->get_VN_Size(); j++) { 47 | unsigned int ms_index = i*this->current_tile->get_VN_Size() + j; 48 | ms_vn_configuration[ms_index]=i; //Allocating the corresponding VN. 49 | n_folding_configuration[ms_index]=n_folding; //Allocating the number of folds. In dense all the same 50 | direct_forwarding_psum_enabled[ms_index]=false; 51 | 52 | if(this->current_tile->get_folding_enabled() && (j==0)) { //The first MS of each VN is the aux MS to accumulate psums 53 | forwarding_psum_enabled[ms_index]=true; 54 | } 55 | 56 | else { 57 | forwarding_psum_enabled[ms_index]=false; 58 | } 59 | 60 | 61 | } 62 | } 63 | 64 | //2. Indicating to each MS wether it has to receive or send data 65 | 66 | if(this->current_tile->get_T_Y_() > 1) { //Conditions to enable the fw links. T_Y_ must be greater than 1 and stride must be 1. If not, all the signals must be false 67 | for(int i=0; i < this->current_tile->get_Num_VNs(); i++) { 68 | for(int j=0; j < this->current_tile->get_VN_Size(); j++) { 69 | unsigned int ms_index = i*this->current_tile->get_VN_Size() + j; 70 | ms_fwsend_enabled[ms_index]=false; 71 | ms_fwreceive_enabled[ms_index]=false; 72 | 73 | } 74 | } 75 | 76 | } 77 | 78 | else { 79 | unsigned int shift_ms = 0; 80 | if(this->current_tile->get_folding_enabled()) { 81 | shift_ms =1; //If there is folding we leave a ms to accumulate 82 | } 83 | for(int i=0; i < this->current_tile->get_Num_VNs(); i++) { 84 | for(int c=0; c < this->current_tile->get_T_C(); c++) { //For each channel 85 | for(int r=0; r < this->current_tile->get_T_R() ; r++) { //For each row 86 | for(int s=0; s < this->current_tile->get_T_S(); s++) { 87 | int ms_index = i*this->current_tile->get_VN_Size() + c*this->current_tile->get_T_R()*this->current_tile->get_T_S() + r*this->current_tile->get_T_S() + s + shift_ms; //Note that we sum shift_ms 88 | // Indicating to each MS whether it has to send data to the fw link (MS LEFT) or not . 89 | if(s > 0) { //If the ms does not contain one first column, it has to send to the left 90 | ms_fwsend_enabled[ms_index]=true; 91 | } 92 | else { 93 | ms_fwsend_enabled[ms_index]=false; 94 | } 95 | 96 | // Indicating to each MS whether it has to receive data from the fw link (MS RIGHT) or not. 97 | if(s < (this->current_tile->get_T_S()-1)) { //If the ms does not map one last column, it has to receive data from the right 98 | ms_fwreceive_enabled[ms_index]=true; 99 | } 100 | 101 | else { 102 | ms_fwreceive_enabled[ms_index]=false; 103 | } 104 | 105 | 106 | } 107 | } 108 | } 109 | 110 | } //End for i 111 | 112 | } //End else 113 | 114 | //Disabling if padding 115 | //If stride > 1 then all the signals of ms_fwreceive_enabled and ms_fwsend_enabled must be disabled since no reuse between MSwitches can be done. In order to not to incorporate stride 116 | //as a tile parameter, we leave the class Tile not aware of the stride. Then, if stride exists, here the possible enabled signals (since tile does not know about tile) are disabled. 117 | //Zero-remainder constraint. Something similar happens if T_S % S != 0 118 | if((this->dnn_layer->get_strides() > 1) || ((this->dnn_layer->get_S() % this->current_tile->get_T_S())!=0)) { 119 | for(unsigned int i=0; isparseVNs.size(); i++) { 136 | for(int j=0; j < this->sparseVNs[i].get_VN_Size(); j++) { 137 | ms_vn_configuration[ms_index]=i; //Allocating the corresponding VN. 138 | n_folding_configuration[ms_index]=1; //TODO change this later 139 | forwarding_psum_enabled[ms_index]=false; //In sparse this type of forwrding is always false 140 | if(this->sparseVNs[i].getFolding() && (j==0)) { //The first MS of each VN is the aux MS to accumulate psums 141 | direct_forwarding_psum_enabled[ms_index]=true; 142 | } 143 | 144 | else { 145 | direct_forwarding_psum_enabled[ms_index]=false; 146 | } 147 | 148 | //2. Indicating to each MS wether it has to receive or send data. In this case, since this is to run GEMM, 149 | //the fw links must be disabled 150 | 151 | ms_fwsend_enabled[ms_index]=false; 152 | ms_fwreceive_enabled[ms_index]=false; 153 | 154 | 155 | ms_index++; 156 | 157 | 158 | } 159 | 160 | 161 | } 162 | 163 | 164 | } 165 | 166 | 167 | 168 | 169 | -------------------------------------------------------------------------------- /omega-code/stonne/include/SDMemory.h: -------------------------------------------------------------------------------- 1 | //Created by Francisco Munoz Martinez on 02/07/2019 2 | #ifndef __SDMEMORY__H__ 3 | #define __SDMEMORY__H__ 4 | 5 | #include 6 | #include "Tile.h" 7 | #include "Connection.h" 8 | #include "Fifo.h" 9 | #include "types.h" 10 | #include "DNNLayer.h" 11 | #include "Unit.h" 12 | #include "Config.h" 13 | #include "DataPackage.h" 14 | #include "Stats.h" 15 | #include "MemoryController.h" 16 | 17 | //This class contains for each VN the next address to write 18 | class VNAT_Register { 19 | public: 20 | unsigned int VN; //VN Saved 21 | unsigned int base_addr; //Base addr of this VN (i.e., the first element to compute). 22 | unsigned int addr; //Offset 23 | unsigned int current_N; 24 | unsigned int current_G; 25 | unsigned int current_K; 26 | unsigned int current_X; 27 | unsigned int current_Y; 28 | unsigned int current_R; 29 | unsigned int current_S; 30 | unsigned int current_C; 31 | //To calculate next output_address 32 | unsigned int iter_N; 33 | unsigned int iter_G; 34 | unsigned int iter_K; 35 | unsigned int iter_X; 36 | unsigned int iter_Y; 37 | unsigned int iter_R; 38 | unsigned int iter_S; 39 | unsigned int iter_C; 40 | //Bases to figure out each vn associated dimensions 41 | unsigned int base_N; 42 | unsigned int base_G; 43 | unsigned int base_K; 44 | unsigned int base_X; 45 | unsigned int base_Y; 46 | unsigned int n_psums; //psums per window 47 | unsigned int current_psum; 48 | DNNLayer* dnn_layer; 49 | Tile* current_tile; 50 | bool finished; 51 | bool valid_value; //Zero-remainder constraint 52 | 53 | 54 | VNAT_Register(unsigned int VN, unsigned int addr, unsigned int N, unsigned int G, unsigned int K, unsigned int X, unsigned int Y, 55 | unsigned int iter_N, unsigned int iter_G, unsigned int iter_K, unsigned int iter_X, unsigned int iter_Y, unsigned int iter_R, unsigned int iter_S, unsigned int iter_C, DNNLayer* dnn_layer, Tile* current_tile); 56 | void update(); //Update variables to the next cycle 57 | unsigned int get_address(); 58 | 59 | }; 60 | 61 | class SDMemory : public MemoryController { 62 | private: 63 | DNNLayer* dnn_layer; // Layer loaded in the accelerator 64 | Tile* current_tile; // Layer loaded in the tile 65 | ReduceNetwork* reduce_network; //This is not used in this controller as the configuration is performed in STONNEModel when the tile is loaded, and this is needed just once 66 | MultiplierNetwork* multiplier_network; //Idem as reduce_network 67 | Connection* write_connection; 68 | //Connection* read_connection; 69 | std::vector read_connections; //Input port connections. There are as many connections as n_read_ports are specified. 70 | 71 | //Input parameters 72 | unsigned int num_ms; 73 | unsigned int n_read_ports; 74 | unsigned int n_write_ports; 75 | unsigned int write_buffer_capacity; 76 | unsigned int port_width; 77 | 78 | unsigned int ms_size_per_input_port; 79 | //Fifos 80 | Fifo* write_fifo; //Fifo uses to store the writes before going to the memory 81 | 82 | std::vector input_fifos; //Fifos used to store the inputs before being fetched 83 | std::vector psum_fifos; //Fifos used to store partial psums before being fetched 84 | //Fifo* read_fifo; //Fifo used to store the inputs before being fetched 85 | //Fifo* psums_fifo; //Fifo used to store partial psums before being fetched 86 | 87 | //Addresses 88 | address_t filter_address; 89 | address_t input_address; 90 | address_t output_address; 91 | 92 | //Signals 93 | bool weights_distributed; //Indicates if the weights have been distributed for a certain iteration 94 | bool fw_link_enabled; //Indicates if the fw link is enabled in this cycle and therefore the number of bw used per cycle is less 95 | bool weights_finished; //Flag that indicates that all the weights have been delivered 96 | bool input_finished; //Flag that indicates that all the inputs have been delivered 97 | bool tile_loaded; //SPecify if the tile is loaded 98 | bool execution_finished; //Flag that indicates when the execution is over. This happens when all the opixels have been calculated. 99 | 100 | //Variables to track the progress of the execution 101 | unsigned int iter_R; 102 | unsigned int iter_S; 103 | unsigned int iter_C; 104 | unsigned int iter_G; 105 | unsigned int iter_N; 106 | unsigned int iter_K; 107 | unsigned int iter_X; 108 | unsigned int iter_Y; 109 | 110 | unsigned int current_R; 111 | unsigned int current_S; 112 | unsigned int current_C; 113 | unsigned int current_G; 114 | unsigned int current_N; 115 | unsigned int current_K; 116 | unsigned int current_X; 117 | unsigned int current_Y; 118 | 119 | //Variable to track the number of opixels calculated 120 | unsigned int current_output_pixel; //This variable has the count for the current number of output pixels calculated 121 | unsigned int output_pixels_to_compute; //This variable has the number of output pixels that the simulator must calculate before finishing the execution 122 | unsigned int output_psums_per_channel; 123 | 124 | //Variables to make the calculation easier 125 | unsigned int channel_filter_size; 126 | unsigned int row_filter_size; 127 | unsigned int filter_size; 128 | unsigned int channel_input_size; 129 | unsigned int row_input_size; 130 | unsigned int input_size; 131 | unsigned int channel_output_size; 132 | unsigned int row_output_size; 133 | unsigned int output_size; 134 | unsigned int group_size; 135 | 136 | unsigned int* clocked_op; 137 | 138 | 139 | std::list packages_created; // Vector used to track the packages and delete them at the end of the execution 140 | std::vector write_port_connections; 141 | VNAT_Register** VNAT; //VNAT with as many registers as VN configured in the accelerator 142 | cycles_t local_cycle; 143 | SDMemoryStats sdmemoryStats; //To track information 144 | 145 | //Aux functions 146 | void receive(); 147 | void sendPackageToInputFifos(DataPackage* pck); 148 | void send(); 149 | std::vector getWritePortConnections() const {return this->write_port_connections;} 150 | 151 | 152 | public: 153 | SDMemory(id_t id, std::string name, Config stonne_cfg, Connection* write_connection); 154 | ~SDMemory(); 155 | void setLayer(DNNLayer* dnn_layer, address_t input_address, address_t filter_address, address_t output_address, Dataflow dataflow); 156 | void setTile(Tile* current_tile); 157 | void setReadConnections(std::vector read_connections); 158 | void setWriteConnections(std::vector write_port_connections); //All the write connections must be set at a time 159 | void setSparseMetadata(metadata_address_t MK_metadata, metadata_address_t KN_metadata, metadata_address_t output_metadata) {assert(false);} //Not supported by this controller 160 | void setReduceNetwork(ReduceNetwork* reduce_network) {this->reduce_network=reduce_network;} 161 | //Used to configure the MultiplierNetwork according to the controller if needed 162 | void setMultiplierNetwork(MultiplierNetwork* multiplier_network) {this->multiplier_network = multiplier_network;} 163 | 164 | 165 | void cycle(); 166 | bool isExecutionFinished(); 167 | 168 | void printStats(std::ofstream& out, unsigned int indent); 169 | void printEnergy(std::ofstream& out, unsigned int indent); 170 | void setClocking(unsigned int* clocked_op){this->clocked_op=clocked_op;} 171 | SDMemoryStats getStats() {return this->sdmemoryStats;} 172 | }; 173 | 174 | 175 | #endif //SDMEMORY_H_ 176 | --------------------------------------------------------------------------------