├── .gitignore ├── LICENSE ├── Makefile ├── README.md ├── msg.mk ├── setup.sh ├── softbrain-config ├── Makefile ├── configs │ └── diannao_simd64.sbmodel ├── make.config ├── make.rules └── src │ ├── Makefile │ ├── direction.cpp │ ├── direction.h │ ├── fixed_point.h │ ├── fu_model.cpp │ ├── fu_model.h │ ├── full.sbinst │ ├── inst_model.cpp │ ├── inst_model.h │ ├── insts │ ├── Abs16x4.h │ ├── Acc16x4.h │ ├── Acc64.h │ ├── Add16x4.h │ ├── Add32x2.h │ ├── Add64.h │ ├── And.h │ ├── Copy.h │ ├── Div16x4.h │ ├── FAdd32x2.h │ ├── FAdd64.h │ ├── FMul32x2.h │ ├── FMul64.h │ ├── FRed32x2.h │ ├── FxAdd16x4.h │ ├── FxAdd32x2.h │ ├── FxExp16x4.h │ ├── FxMul16x4.h │ ├── FxMul32x2.h │ ├── FxRed16x4.h │ ├── FxRed32x2.h │ ├── FxRelu16x4.h │ ├── FxSig16x4.h │ ├── FxTanh16x4.h │ ├── HAdd16x4.h │ ├── ICmpEQ.h │ ├── LShf64.h │ ├── Max16x4.h │ ├── Min16x4.h │ ├── Mul16x4.h │ ├── Mul32x2.h │ ├── Mul64.h │ ├── Or.h │ ├── RShf16x4.h │ ├── RShf2_16x4.h │ ├── RShf32x2.h │ ├── RShf4_16x4.h │ ├── RShf64.h │ ├── Red16x4.h │ ├── Red32x2.h │ ├── RedMax16x4.h │ ├── RedMin16x4.h │ ├── RedSMax16x4.h │ ├── RedSMin16x4.h │ ├── SMax16x4.h │ ├── SMin16x4.h │ ├── Select.h │ ├── Sig16.h │ ├── Sub16x4.h │ ├── Sub64.h │ ├── TAdd16x4.h │ └── Xor.h │ ├── model.cpp │ ├── model.h │ ├── model_parsing.cpp │ ├── model_parsing.h │ ├── sub_model.cpp │ └── sub_model.h ├── softbrain-emu ├── Makefile └── src │ ├── .gitignore │ ├── create_insts.c │ ├── sb.h │ ├── sb_c_insts.h │ ├── sb_emu.h │ ├── sb_init.h │ └── softbrain.C ├── softbrain-scheduler ├── .gitignore ├── Makefile ├── dfgs │ └── 5x4 │ │ ├── bfs.dfg │ │ ├── dot.dfg │ │ ├── long.dfg │ │ ├── medium.dfg │ │ ├── medium_short.dfg │ │ ├── mm_sb.dfg │ │ ├── out.txt │ │ ├── pool2x2l4avg.dfg │ │ ├── pool4x4l2avg.dfg │ │ ├── pool_simple.dfg │ │ ├── red16to1sig.dfg │ │ ├── red16to1sigx2-simple.dfg │ │ ├── red16to1sigx2.dfg │ │ ├── red32to1sig.dfg │ │ ├── red8to1sig.dfg │ │ ├── run-all-sched.sh │ │ ├── run-sched.sh │ │ ├── spmv.dfg │ │ ├── stencil.dfg │ │ ├── sum.txt │ │ ├── vadd.dfg │ │ ├── vadd4.dfg │ │ ├── vadd5.dfg │ │ ├── vadd6.dfg │ │ └── viterbi.dfg ├── drivers │ ├── Makefile │ └── sb_dfg_emu.cpp ├── make.config ├── make.rules └── src │ ├── Makefile │ ├── sbpdg.cpp │ └── sbpdg.h └── workloads └── diannao ├── Makefile ├── classifier.cpp ├── convolution.cpp ├── convolution_old.cpp ├── dnn.hpp ├── pool2x2avg.dfg ├── pool2x2l4avg.dfg ├── pool4x4l2avg.dfg ├── pooling.cpp ├── red16to1sig.dfg ├── red16to1sigx2.dfg ├── red32to1sig.dfg ├── red8to1sig.dfg ├── run-all.sh ├── sim_timing.h └── softbrain.hpp /.gitignore: -------------------------------------------------------------------------------- 1 | ss-tools/ 2 | include/ 3 | 4 | # Prerequisites 5 | *.d 6 | 7 | # Compiled Object files 8 | *.slo 9 | *.lo 10 | *.o 11 | *.obj 12 | 13 | # Precompiled Headers 14 | *.gch 15 | *.pch 16 | 17 | # Compiled Dynamic libraries 18 | *.so 19 | *.dylib 20 | *.dll 21 | 22 | # Fortran module files 23 | *.mod 24 | *.smod 25 | 26 | # Compiled Static libraries 27 | *.lai 28 | *.la 29 | *.a 30 | *.lib 31 | 32 | # Executables 33 | *.exe 34 | *.out 35 | *.app 36 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | BSD 3-Clause License 2 | 3 | Copyright (c) 2017, PolyArch 4 | All rights reserved. 5 | 6 | Redistribution and use in source and binary forms, with or without 7 | modification, are permitted provided that the following conditions are met: 8 | 9 | * Redistributions of source code must retain the above copyright notice, this 10 | list of conditions and the following disclaimer. 11 | 12 | * Redistributions in binary form must reproduce the above copyright notice, 13 | this list of conditions and the following disclaimer in the documentation 14 | and/or other materials provided with the distribution. 15 | 16 | * Neither the name of the copyright holder nor the names of its 17 | contributors may be used to endorse or promote products derived from 18 | this software without specific prior written permission. 19 | 20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 21 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 23 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 24 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 26 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 27 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 28 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | PHONY: default 2 | default: build-all 3 | 4 | include msg.mk 5 | 6 | SBLIBS = $(addprefix softbrain-, config scheduler emu) 7 | 8 | MODULES = $(SBLIBS) 9 | CLEAN_MODULES = $(addprefix clean-,$(MODULES)) 10 | 11 | .PHONY: $(MODULES) $(CLEAN_MODULES) 12 | 13 | .PHONY: build-all 14 | build-all: $(MODULES) 15 | 16 | .PHONY: clean-all 17 | clean-all: $(CLEAN_MODULES) 18 | 19 | SIMPLE = $(SBLIBS) 20 | 21 | $(SIMPLE): 22 | $(MAKE) -C $@ install 23 | 24 | $(addprefix clean-,$(SIMPLE)): 25 | $(MAKE) -C $(patsubst clean-%,%,$@) clean 26 | 27 | $(addprefix clean-,$(AUTOTOOLS)): 28 | rm -rf $(patsubst clean-%,%,$@)/build 29 | 30 | # Dependencies 31 | softbrain-scheduler: softbrain-config 32 | softbrain-emu: softbrain-scheduler softbrain-config 33 | 34 | 35 | full-rebuild: 36 | @echo "Wipe \$$SS_TOOLS ($$SS_TOOLS) and rebuild everything?" 37 | @read -p "[Y/n]: " yn && { [ -z $$yn ] || [ $$yn = Y ] || [ $$yn = y ]; } 38 | rm -rf "$$SS_TOOLS" 39 | $(MAKE) clean-all 40 | $(MAKE) build-all 41 | 42 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Repo Deprecated 2 | 3 | This content of this repo, including the stream-dataflow ISA implementation, has been moved to: 4 | 5 | github.com/PolyArch/stream-specialization-stack 6 | 7 | The new repository contains example workloads, a compiler, and a gem5-based simulator. 8 | 9 | 10 | # stream-dataflow 11 | Stream-Dataflow Infrastructure 12 | 13 | This is the location of the public release of the infrastructure for the stream dataflow architecture. 14 | Please keep in mind this is an early stage release, and more advanced features will follow. 15 | 16 | Description of Folders 17 | * softbrain-config: Library for Defining Accelerator Substrate Topology, Features, and Instructions 18 | * softbrain-emu: Library for software emulation of softbrain 19 | * sb-scheduler: Library for parsing and scheduling dataflow graphs to a particular topology. (right now only includes emulator code) 20 | * gem5: (To be included) 21 | * workloads: Example workloads including kernels based on diannao parallelization strategy. 22 | 23 | 24 | # Try it: 25 | ```bash 26 | source setup.sh 27 | make -j8 28 | cd workloads/diannao 29 | make -j8 30 | bash run-tests.sh 31 | ``` 32 | -------------------------------------------------------------------------------- /msg.mk: -------------------------------------------------------------------------------- 1 | define env-msg 2 | 3 | The following environment variables must be defined 4 | SS_STACK (suggested: $(PWD)) 5 | SS_TOOLS (suggested: $$SS_STACK/ss-tools) 6 | 7 | Additionally, $$SS_TOOLS/bin must be in your $$PATH. 8 | 9 | endef 10 | 11 | ifeq ($(SS_TOOLS),) 12 | $(error $(env-msg)) 13 | endif 14 | 15 | ifeq ($(SS_STACK),) 16 | $(error $(env-msg)) 17 | endif 18 | 19 | ifeq ($(findstring $(SS_TOOLS)/bin,$(PATH)),) 20 | $(error $(env-msg)) 21 | endif 22 | -------------------------------------------------------------------------------- /setup.sh: -------------------------------------------------------------------------------- 1 | export SS_STACK=`pwd` 2 | export SS_TOOLS=$SS_STACK/ss-tools 3 | export PATH=$SS_TOOLS/bin:$PATH 4 | export LD_LIBRARY_PATH=$SS_TOOLS/lib:$LD_LIBRARY_PATH 5 | 6 | 7 | -------------------------------------------------------------------------------- /softbrain-config/Makefile: -------------------------------------------------------------------------------- 1 | include $(SS_STACK)/msg.mk 2 | prefix:=$(SS_TOOLS) 3 | 4 | 5 | level=./ 6 | include make.config 7 | 8 | 9 | 10 | .PHONY: program 11 | 12 | all: directories program 13 | 14 | program: 15 | +make -C src 16 | 17 | install: program 18 | ${MKDIR_P} ${prefix}/lib 19 | cp ${build}/lib/* ${prefix}/lib 20 | ${MKDIR_P} ${prefix}/include/softbrain-config 21 | cp src/*.h ${prefix}/include/softbrain-config/ 22 | cp -rf configs ${prefix}/ 23 | 24 | clean: 25 | make -C src clean 26 | 27 | include make.rules 28 | -------------------------------------------------------------------------------- /softbrain-config/configs/diannao_simd64.sbmodel: -------------------------------------------------------------------------------- 1 | [fu-model] 2 | # fu_types and capabilities 3 | # Number after colon specifies encoding 4 | FU_TYPE FU_MUL: Mul16x4:2, Mul32x2:3, Mul64:4, RShf64:5, LShf64:6, FMul32x2: 7, Div16x4: 8, FxMul16x4: 9 5 | FU_TYPE FU_ADD: Add16x4:3, Red16x4:4, HAdd16x4:5, RShf4_16x4:6, RShf2_16x4:7, Add32x2:8, Red32x2:9, Add64:10, RShf64:11, Sub16x4:12, Abs16x4:13, Sub64:14, Max16x4:15, Min16x4:16, SMax16x4:17, SMin16x4:18, RedMax16x4:19, RedMin16x4:20, RedSMax16x4:21, RedSMin16x4:22, Select:23, And:24, Or:25, Xor:26, LShf64:27, Acc64:28, ICmpEQ:29, Acc16x4:30, FAdd32x2:31, RShf16x4:32, FRed32x2: 33, FxAdd16x4: 34, FxRed16x4: 35, FxExp16x4: 36 6 | 7 | FU_TYPE FU_SPC: RShf4_16x4:6, RShf2_16x4:7, Sig16:8, RShf16x4:32, FxRelu16x4: 53, FxSig16x4: 54, FxTanh16x4: 55 8 | 9 | 10 | OUT_DIRECTIONS: NE:0 SE:1 SW:2 NW:3 11 | 12 | [switch-model] 13 | #OUT_DIRECTIONS: N:0 NE:1 E:2 SE:3 S:4 SW:5 W:6 NW:7 14 | IN_DIRECTIONS: N:0 NE:1 E:2 S:3 W:4 15 | 16 | 17 | [sub-model] 18 | # DySER 8x8 Hetero Model File 19 | topology: grid 20 | width: 5 21 | height: 4 22 | 23 | io_layout: three_sides_in 24 | ins_per_switch: 3 25 | outs_per_switch: 3 26 | 27 | # Fully Specified Layout 28 | SB_LAYOUT: FULL 29 | FU_MUL FU_MUL FU_MUL FU_MUL FU_MUL 30 | FU_MUL FU_ADD FU_ADD FU_ADD FU_ADD 31 | FU_MUL FU_ADD FU_ADD FU_ADD FU_ADD 32 | FU_MUL FU_ADD FU_ADD FU_SPC FU_SPC 33 | 34 | #FU_ADD FU_MUL FU_ADD FU_MUL FU_ADD FU_MUL FU_ADD FU_MUL 35 | #FU_MUL FU_ADD FU_MUL FU_ADD FU_MUL FU_ADD FU_MUL FU_ADD 36 | #FU_ADD FU_MUL FU_ADD FU_MUL FU_ADD FU_MUL FU_ADD FU_MUL 37 | #FU_MUL FU_ADD FU_MUL FU_ADD FU_MUL FU_ADD FU_MUL FU_ADD 38 | #FU_ADD FU_MUL FU_ADD FU_MUL FU_ADD FU_MUL FU_ADD FU_MUL 39 | #FU_MUL FU_ADD FU_MUL FU_ADD FU_MUL FU_ADD FU_MUL FU_ADD 40 | #FU_ADD FU_MUL FU_ADD FU_MUL FU_ADD FU_MUL FU_ADD FU_MUL 41 | #FU_MUL FU_ADD FU_MUL FU_SIG FU_MUL FU_ADD FU_MUL FU_SIG 42 | 43 | [io-model] 44 | #vector ports specify portno:vec_offset1 vec_offset2 45 | VPORT_IN 0: 2:0, 5:1, 8:2, 11:3, 17:4, 20:5, 23:6, 26:7 #standard 8-wide port 46 | VPORT_IN 1: 4:0, 7:1, 10:2, 16:3, 19:4, 22:5, 25:6, 31:7 #standard 8-wide port 47 | VPORT_IN 2: 4:0, 10:1, 19:2, 25:3 #4-wide 48 | VPORT_IN 3: 7:0, 18:1, 22:2, 31:3 #4-wide 49 | VPORT_IN 4: 3:0 #1 2 3 4 5 6 7 #8-deep output Port 50 | VPORT_IN 5: 6:0 #1 2 3 4 5 6 7 #8-deep output Port 51 | VPORT_IN 6: 12:0 #1 2 3 4 5 6 7 #8-deep output Port 52 | VPORT_IN 7: 15:0 #1 2 3 4 5 6 7 #8-deep output Port 53 | VPORT_IN 8: 18:0 #1 2 3 4 5 6 7 #8-deep output Port 54 | VPORT_IN 9: 24:0 #1 2 3 4 5 6 7 #8-deep output Port 55 | VPORT_IN 10: 27:0 #1 2 3 4 5 6 7 #8-deep output Port 56 | VPORT_IN 10: 32:0 #1 2 3 4 5 6 7 #8-deep output Port 57 | VPORT_IN 11: 4:0, 7:1, #2-wide 58 | VPORT_IN 12: 10:0, 16:1, #2-wide 59 | VPORT_IN 13: 19:0, 22:1, #2-wide 60 | VPORT_IN 14: 25:0, 31:1, #2-wide 61 | VPORT_IN 15: 8:0, 20:1, #2-wide 62 | VPORT_IN 16: 2:0, 8:1, 17:2, 23:3 #4-wide 63 | VPORT_IN 17: 5:0, 11:1, 20:2, 26:3 #4-wide 64 | VPORT_IN 18: 3:0, 12:1, 18:2, 27:3 #4-wide 65 | VPORT_IN 19: 3:0, 18:1, #2-wide 66 | VPORT_IN 20: 6:0, 24:1, #2-wide 67 | VPORT_IN 21: 12:0, 15:1, #2-wide 68 | VPORT_IN 22: 27:0, 32:1, #2-wide 69 | 70 | 71 | 72 | 73 | VPORT_OUT 0: 1:0, 3:1, 5:2, 6:3, 8:4, 9:5, 11:6, 12:7 #8-wide output Port 74 | VPORT_OUT 1: 2:0, 7:1, 10:2, 13:3 #4-wide output Port 75 | VPORT_OUT 2: 0:0 #1 2 3 4 5 6 7 #8-deep output Port 76 | VPORT_OUT 3: 2:0 #1 2 3 4 5 6 7 #8-deep output Port 77 | VPORT_OUT 4: 4:0 #1 2 3 4 5 6 7 #8-deep output Port 78 | VPORT_OUT 5: 6:0 #1 2 3 4 5 6 7 #8-deep output Port 79 | VPORT_OUT 6: 8:0 #1 2 3 4 5 6 7 #8-deep output Port 80 | VPORT_OUT 7: 10:0 #1 2 3 4 5 6 7 #8-deep output Port 81 | VPORT_OUT 8: 12:0 #1 2 3 4 5 6 7 #8-deep output Port 82 | VPORT_OUT 8: 1:0, 3:1 83 | VPORT_OUT 9: 5:0, 7:1 84 | VPORT_OUT 10: 9:0, 11:1 85 | VPORT_OUT 11: 13:0, 14:1 86 | 87 | #PORT_IN 0: 17 18 19 #any of these 88 | #PORT_IN 1: 20 21 22 #any of these 89 | #PORT_OUT 0: 17 18 19 20 21 22 #any of these 90 | #PORT_OUT 1: 25 26 27 #any of these 91 | 92 | 93 | -------------------------------------------------------------------------------- /softbrain-config/make.config: -------------------------------------------------------------------------------- 1 | MKDIR_P = mkdir -p 2 | 3 | SYS = $(shell sys) 4 | CXX = g++ 5 | CXXFLAGS := -Wall -g -std=c++11 -O3 -lm 6 | 7 | build ?= $(shell pwd)/${level}/build 8 | prefix ?= $(shell pwd) 9 | 10 | 11 | 12 | -------------------------------------------------------------------------------- /softbrain-config/make.rules: -------------------------------------------------------------------------------- 1 | .PHONY: directories 2 | 3 | directories: 4 | ${MKDIR_P} ${build}/obj 5 | ${MKDIR_P} ${build}/lib 6 | -------------------------------------------------------------------------------- /softbrain-config/src/Makefile: -------------------------------------------------------------------------------- 1 | level=../ 2 | include ${level}/make.config 3 | 4 | SOURCES= model.cpp model_parsing.cpp fu_model.cpp sub_model.cpp direction.cpp sbinst.cpp 5 | PRE_OBJECTS=$(SOURCES:.cpp=.o) 6 | INST_MODEL_FILE=full.sbinst 7 | 8 | LIB_DEST=${build}/lib 9 | OBJ_DEST=${build}/obj 10 | 11 | PRE_OBJECTS=$(SOURCES:.cpp=.o) 12 | OBJECTS = $(patsubst %,$(OBJ_DEST)/%,$(PRE_OBJECTS)) 13 | 14 | CXXFLAGS += -fPIC 15 | 16 | all: directories sbinst.cpp $(LIB_DEST)/libsbconfig.a $(LIB_DEST)/libsbconfig.so 17 | 18 | $(LIB_DEST)/libsbconfig.a: $(OBJECTS) 19 | ar crs $@ $^ 20 | 21 | $(LIB_DEST)/libsbconfig.so: $(OBJECTS) 22 | $(CXX) $(CXXFLAGS) -MD -shared -o $@ $^ 23 | 24 | sbinst.cpp: sbinst.h insts/*.h 25 | @echo "done" 26 | 27 | sbinst.h: inst_model.cpp inst_model.h model_parsing.cpp insts/*.h $(INST_MODEL_FILE) 28 | $(CXX) $(CXXFLAGS) -MD inst_model.cpp model_parsing.cpp -o inst_model 29 | ./inst_model $(INST_MODEL_FILE) sbinst.h sbinst.cpp 30 | 31 | $(OBJ_DEST)/%.o: %.cpp %.h sbinst.h 32 | $(CXX) $(CXXFLAGS) -MD -c -o $@ $< 33 | 34 | $(OBJ_DEST)/sbinst.o: sbinst.cpp sbinst.h 35 | $(CXX) $(CXXFLAGS) -MD -c -o $@ $< 36 | 37 | 38 | .phony: clean 39 | 40 | clean: 41 | -rm -Rf libsbconfig.a *.o sbinst.cpp sbinst.h $(LIB_DEST)/*.a $(LIB_DEST)/*.so $(OBJ_DEST)/*.o *.d 42 | 43 | include ${level}/make.rules 44 | 45 | -------------------------------------------------------------------------------- /softbrain-config/src/direction.cpp: -------------------------------------------------------------------------------- 1 | #include "direction.h" 2 | #include "model_parsing.h" 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | using namespace SB_CONFIG; 9 | using namespace std; 10 | 11 | SbDIR::SbDIR() { 12 | 13 | //Adding the encoding for each direction 14 | add_encode(SbDIR::N,3); 15 | add_encode(SbDIR::NE,4); 16 | add_encode(SbDIR::E,5); 17 | add_encode(SbDIR::SE,6); 18 | add_encode(SbDIR::S,7); 19 | add_encode(SbDIR::SW,0); 20 | add_encode(SbDIR::W,1); 21 | add_encode(SbDIR::NW,2); 22 | 23 | //The following functions map the input directions and correspoding tuple 24 | //to an index 25 | 26 | //TOP bottom left right 27 | add_encode(SbDIR::IP0, std::make_tuple(true,false,false,false),encode(SbDIR::NW)); 28 | add_encode(SbDIR::IP1, std::make_tuple(true,false,false,false),encode(SbDIR::N)); 29 | add_encode(SbDIR::IP2, std::make_tuple(true,false,false,false),encode(SbDIR::NE)); 30 | 31 | //top bottom LEFT right 32 | add_encode(SbDIR::IP0, std::make_tuple(false,false,true,false),encode(SbDIR::SW)); 33 | add_encode(SbDIR::IP1, std::make_tuple(false,false,true,false),encode(SbDIR::W)); 34 | add_encode(SbDIR::IP2, std::make_tuple(false,false,true,false),encode(SbDIR::NW)); 35 | 36 | //top bottoSbleft RIGHT 37 | add_encode(SbDIR::IP0, std::make_tuple(false,false,false,true),encode(SbDIR::SE)); 38 | add_encode(SbDIR::IP1, std::make_tuple(false,false,false,true),encode(SbDIR::E)); 39 | add_encode(SbDIR::IP2, std::make_tuple(false,false,false,true),encode(SbDIR::NE)); 40 | 41 | //top BOTTOSbleft right 42 | add_encode(SbDIR::IP0, std::make_tuple(false,true,false,false),encode(SbDIR::SW)); 43 | add_encode(SbDIR::IP1, std::make_tuple(false,true,false,false),encode(SbDIR::S)); 44 | add_encode(SbDIR::IP2, std::make_tuple(false,true,false,false),encode(SbDIR::SE)); 45 | } 46 | 47 | 48 | int SbDIR::encode(DIR myDir) { 49 | return encode(myDir,false,false,false,false); 50 | } 51 | 52 | //preferred directions 53 | void set_pref_dirs(bool& top,bool& bottom, bool& left, bool& right) { 54 | if(top && right) {right=false;} //top 55 | if(top && left) {left=false;} //top 56 | if(bottom && left) {bottom=false;} //left 57 | if(bottom && right){bottom=false;} //right 58 | } 59 | 60 | //returns index of the direction and tuple using the io_enc map 61 | int SbDIR::encode(DIR myDir, bool top, bool bottom, bool left, bool right) { 62 | set_pref_dirs(top,bottom,left,right); 63 | std::pair pair = make_pair(myDir,epos(top,bottom,left,right)); 64 | assert(io_enc.count(pair)); 65 | return io_enc[pair]; 66 | } 67 | 68 | //decode func with index and tuple returning the direction 69 | SbDIR::DIR SbDIR::decode(int i, bool top, bool bottom, bool left, bool right) { 70 | set_pref_dirs(top,bottom,left,right); 71 | 72 | std::pair pair = make_pair(i,epos(top,bottom,left,right)); 73 | assert(io_dec.count(pair)); 74 | return io_dec[pair]; 75 | } 76 | 77 | //position of output direction 78 | //TODO: generalize for more than one output side 79 | int SbDIR::slot_for_dir(DIR myDir, bool top, bool bottom, bool left, bool right) { 80 | set_pref_dirs(top,bottom,left,right); 81 | 82 | if(isOutputDir(myDir)) { 83 | myDir=reverse(myDir,true); 84 | } 85 | return encode(myDir, top, bottom, left, right); 86 | } 87 | 88 | SbDIR::DIR SbDIR::dir_for_slot(int index, bool top, bool bottom, bool left, bool right) { 89 | set_pref_dirs(top,bottom,left,right); 90 | 91 | SbDIR::DIR myDir = decode(index, top, bottom, left, right); 92 | if(isInputDir(myDir)) { 93 | myDir=reverse(myDir,true); 94 | } 95 | return myDir; 96 | } 97 | 98 | int SbDIR::encode_fu_dir(DIR myDir) { 99 | switch(myDir) { 100 | case NE: return 1; 101 | case SE: return 2; 102 | case SW: return 3; 103 | case NW: return 4; 104 | case IM: return 5; 105 | default: assert(0 && "no encoding"); 106 | } 107 | assert(0 && "not reachable"); 108 | } 109 | 110 | SbDIR::DIR SbDIR::fu_dir_of(int i) { 111 | switch(i) { 112 | case 0: return END_DIR; 113 | case 1: return NE; 114 | case 2: return SE; 115 | case 3: return SW; 116 | case 4: return NW; 117 | case 5: return IM; 118 | default: assert(0); 119 | } 120 | assert(0 && "not reachable"); 121 | } 122 | 123 | // switch(myDir) { 124 | // case N: return 3; 125 | // case NE: return 4; 126 | // case E: return 5; 127 | // case SE: return 6; 128 | // case S: return 7; 129 | // case SW: return 0; 130 | // case W: return 1; 131 | // case NW: return 2; 132 | // case OP0: return pos_of(SW); 133 | // case OP1: return pos_of(S); 134 | // case OP2: return pos_of(SE); 135 | // default: assert(0); 136 | // } 137 | 138 | 139 | 140 | SbDIR::DIR SbDIR::toDir(string qs, bool outgoing) { 141 | if (false) return END_DIR; 142 | else if(ModelParsing::StartsWith(qs,"NW")) return outgoing ? NW : reverse(NW); 143 | else if(ModelParsing::StartsWith(qs,"NE")) return outgoing ? NE : reverse(NE); 144 | else if(ModelParsing::StartsWith(qs,"SE")) return outgoing ? SE : reverse(SE); 145 | else if(ModelParsing::StartsWith(qs,"SW")) return outgoing ? SW : reverse(SW); 146 | else if (ModelParsing::StartsWith(qs,"N" )) return outgoing ? N : reverse(N); 147 | else if(ModelParsing::StartsWith(qs,"E" )) return outgoing ? E : reverse(E); 148 | else if(ModelParsing::StartsWith(qs,"S" )) return outgoing ? S : reverse(S); 149 | else if(ModelParsing::StartsWith(qs,"W" )) return outgoing ? W : reverse(W); 150 | else if(ModelParsing::StartsWith(qs,"P0")) return outgoing ? OP0 : IP0; 151 | else if(ModelParsing::StartsWith(qs,"P1")) return outgoing ? OP1 : IP1; 152 | else if(ModelParsing::StartsWith(qs,"P2")) return outgoing ? OP2 : IP2; 153 | else if(ModelParsing::StartsWith(qs,"IM")) return IM; 154 | return END_DIR; 155 | } 156 | 157 | 158 | //returns the reverse direction of DIR 159 | SbDIR::DIR SbDIR::reverse(DIR myDir, bool reverseIO) { 160 | switch(myDir) { 161 | case N: return S; 162 | case NE: return SW; 163 | case E: return W; 164 | case SE: return NW; 165 | case S: return N; 166 | case SW: return NE; 167 | case W: return E; 168 | case NW: return SE; 169 | default: { 170 | if(reverseIO) { 171 | switch(myDir) { 172 | case IP0: return OP0; 173 | case IP1: return OP1; 174 | case IP2: return OP2; 175 | case OP0: return IP0; 176 | case OP1: return IP1; 177 | case OP2: return IP2; 178 | default: assert(0); return myDir; 179 | } 180 | } 181 | assert(isInputDir(myDir) && !isOutputDir(myDir)); 182 | return myDir; //don't reverse 183 | } 184 | } 185 | } 186 | 187 | const char* SbDIR::dirNameDBG(SbDIR::DIR myDir, bool reverse) { 188 | if(isInputDir(myDir) || isOutputDir(myDir)) { 189 | switch(reverse ? SbDIR::reverse(myDir) : myDir) { 190 | case SbDIR::IP0: 191 | return "IP0"; 192 | break; 193 | case SbDIR::IP1: 194 | return "IP1"; 195 | break; 196 | case SbDIR::IP2: 197 | return "IP2"; 198 | break; 199 | case SbDIR::OP0: 200 | return "IP0"; 201 | break; 202 | case SbDIR::OP1: 203 | return "IP1"; 204 | break; 205 | case SbDIR::OP2: 206 | return "IP2"; 207 | break; 208 | case SbDIR::IM: 209 | return "IM"; 210 | break; 211 | default: 212 | assert(0); 213 | break; 214 | } 215 | } else { 216 | return SbDIR::dirName(myDir,reverse); 217 | } 218 | } 219 | 220 | 221 | const char* SbDIR::dirName(SbDIR::DIR myDir, bool reverse) { 222 | 223 | switch(reverse ? SbDIR::reverse(myDir) : myDir) { 224 | case SbDIR::N: 225 | return "N"; 226 | break; 227 | case SbDIR::NE: 228 | return "NE"; 229 | break; 230 | case SbDIR::E: 231 | return "E"; 232 | break; 233 | case SbDIR::SE: 234 | return "SE"; 235 | break; 236 | case SbDIR::S: 237 | return "S"; 238 | break; 239 | case SbDIR::SW: 240 | return "SW"; 241 | break; 242 | case SbDIR::W: 243 | return "W"; 244 | break; 245 | case SbDIR::NW: 246 | return "NW"; 247 | break; 248 | case SbDIR::IP0: 249 | return "P0"; 250 | break; 251 | case SbDIR::IP1: 252 | return "P1"; 253 | break; 254 | case SbDIR::IP2: 255 | return "P2"; 256 | break; 257 | case SbDIR::OP0: 258 | return "P0"; 259 | break; 260 | case SbDIR::OP1: 261 | return "P1"; 262 | break; 263 | case SbDIR::OP2: 264 | return "P2"; 265 | break; 266 | case SbDIR::IM: 267 | return "IM"; 268 | break; 269 | case SbDIR::END_DIR: 270 | return "xxx"; 271 | break; 272 | } 273 | return "???"; 274 | } 275 | -------------------------------------------------------------------------------- /softbrain-config/src/direction.h: -------------------------------------------------------------------------------- 1 | #ifndef __SB_DIRECTION_H__ 2 | #define __SB_DIRECTION_H__ 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | namespace SB_CONFIG { 10 | 11 | typedef std::tuple epos; 12 | 13 | class SbDIR { 14 | public: 15 | enum DIR { IP0, IP1, IP2, OP0, OP1, OP2, N, NE, E, SE, S, SW, W, NW, IM, END_DIR }; 16 | 17 | static bool isInputDir(DIR d) {return d==IP0 || d==IP1 || d==IP2;} 18 | static bool isOutputDir(DIR d) {return d==OP0 || d==OP1 || d==OP2;} 19 | 20 | static DIR reverse(DIR myDir, bool reverseIO=false); 21 | static DIR toDir(std::string qs, bool outgoing); 22 | static const char* dirName(SbDIR::DIR dir, bool reverse=false); 23 | static const char* dirNameDBG(SbDIR::DIR dir, bool reverse=false); 24 | 25 | std::map,int> io_enc; 26 | std::map,DIR> io_dec; 27 | 28 | void add_encode(DIR dir, epos e, int index) { 29 | io_enc[std::make_pair(dir,e)]=index; 30 | io_dec[std::make_pair(index,e)]=dir; 31 | } 32 | 33 | //map func to map each direction and its index with 34 | //all possible tuples 35 | void add_encode(DIR dir, int index) { 36 | for(int i = 0; i <= 1; ++i) { 37 | for(int j = 0; j <= 1; ++j) { 38 | for(int k = 0; k <= 1; ++k) { 39 | for(int l = 0; l <= 1; ++l) { 40 | add_encode(dir,std::make_tuple(i,j,k,l),index); 41 | } 42 | } 43 | } 44 | } 45 | } 46 | 47 | SbDIR(); 48 | 49 | 50 | int encode(DIR i); 51 | DIR decode(int i, bool top, bool bottom, bool left, bool right); 52 | DIR dir_for_slot(int i, bool top, bool bottom, bool left, bool right); 53 | int encode(DIR i, bool top, bool bottom, bool left, bool right); 54 | int slot_for_dir(DIR i, bool top, bool bottom, bool left, bool right); 55 | 56 | int encode_fu_dir(DIR myDir); 57 | DIR fu_dir_of(int i); 58 | 59 | }; 60 | } 61 | 62 | 63 | 64 | 65 | #endif 66 | -------------------------------------------------------------------------------- /softbrain-config/src/fixed_point.h: -------------------------------------------------------------------------------- 1 | // NOTE: The macros below were copied from cambricon/include/fix_common.h 2 | // in the softbrain-workloads repository 3 | 4 | // In 16-bit integer representation, 5 | // one bit is reserved for sign, 6 | // the maximum supported number is 32767, 7 | // the minimum supported number is -32768. 8 | // Here FIX_MAX = 32767, and FIX_MIN is chosen to be negative 9 | // of FIX_MAX instead of -32768 to keep the symmetry. 10 | 11 | // FIX_TRUNC is to keep the number falling within the range 12 | // between FIX_MIN and FIX_MAX (both inclusively) 13 | #define FIX_MAX ((1 << 15) - 1) 14 | #define FIX_MIN (-FIX_MAX) 15 | #define FIX_TRUNC(x) (x > FIX_MAX ? FIX_MAX : (x < FIX_MIN ? FIX_MIN : x) ) 16 | 17 | // FRAC_BITS is the number of bits reserved for fractional parts. 18 | // So the integer part has 15 - FRAC_BITS bits. 19 | 20 | // DELTA is the minimum positive amount that can be represented in this number system. 21 | 22 | // FLOAT_MAX is the largest real value that can be represented in this number system. 23 | // FLOAT_MIN is the smallest real value that can be represented in this number system. 24 | 25 | // FLOAT_TRUNC is to keep numbers within the range 26 | // between FLOAT_MIN and FLOAT_MAX (both inclusively) 27 | #define FRAC_BITS 11 // 11 or 12 is recommended 28 | #define DELTA (((double)1.0)/(1 << FRAC_BITS)) 29 | #define FLOAT_MAX (FIX_MAX * DELTA) 30 | #define FLOAT_MIN (FIX_MIN * DELTA) 31 | #define FLOAT_TRUNC(x) (x > FLOAT_MAX ? FLOAT_MAX : (x < FLOAT_MIN ? FLOAT_MIN : x) ) 32 | 33 | // DOUBLE_TO_FIX converts a double number to integer in our fixed representation. 34 | // FIX_TO_DOUBLE converts a integer number to double in our fixed representation. 35 | #define DOUBLE_TO_FIX(x) ( (int)(FLOAT_TRUNC(x) / DELTA) ) 36 | #define FIX_TO_DOUBLE(x) (x * DELTA) 37 | 38 | // FIX_ADD fixed addition. 39 | // FIX_MINUS fixed subtraction. 40 | // FIX_MUL fixed multiplication. 41 | // FIX_TAN_H fixed tanh, but is right now using tanh from math.h 42 | #define FIX_ADD(a, b) ( FIX_TRUNC( (int)a + (int)b ) ) 43 | #define FIX_MINUS(a, b) ( FIX_ADD(a, -b) ) 44 | #define FIX_MUL(a, b) ( FIX_TRUNC( ((int)a * (int)b) >> FRAC_BITS ) ) 45 | #define FIX_TAN_H(x) ( DOUBLE_TO_FIX(tanh(FIX_TO_DOUBLE(x))) ) 46 | 47 | -------------------------------------------------------------------------------- /softbrain-config/src/fu_model.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include "fu_model.h" 4 | #include "model_parsing.h" 5 | #include "sbinst.h" 6 | #include "assert.h" 7 | 8 | using namespace SB_CONFIG; 9 | using namespace std; 10 | 11 | //FU_type(func_unit_def) capabilities 12 | //FU_ADD: Add16x4:1 13 | 14 | FuModel::FuModel(std::istream& istream) { 15 | //char line[512]; 16 | string param,value; 17 | 18 | while(istream.good()) 19 | { 20 | if(istream.peek()=='[') break; //break out if done 21 | 22 | //string line; 23 | ModelParsing::ReadPair(istream, param, value); 24 | 25 | if(param[0]=='#' || value[0]=='#') continue; //Not a comment 26 | 27 | if(ModelParsing::StartsWith(param, "FU_TYPE")) { 28 | //defining an fu and capabilitty 29 | 30 | string newtype; 31 | 32 | std::stringstream ss(param); 33 | 34 | getline(ss, param, ' '); 35 | getline(ss, newtype); 36 | 37 | func_defs.push_back(func_unit_def(newtype)); 38 | AddCapabilities(func_defs[func_defs.size()-1], value); 39 | 40 | } else if(ModelParsing::StartsWith(param, "SWITCH_TYPE")) { 41 | //AddCapabilities(*GetFU("SWITCH"), value); 42 | assert(0); 43 | } 44 | } 45 | } 46 | 47 | func_unit_def* FuModel::GetFUDef(char* fu_cstr) 48 | { 49 | string s(fu_cstr); 50 | return GetFUDef(s); 51 | } 52 | 53 | 54 | //Get a functional unit based upon the description string (the name) 55 | func_unit_def* FuModel::GetFUDef(string& fu_string) 56 | { 57 | for(unsigned i = 0; i < func_defs.size(); ++i) 58 | { 59 | if(func_defs[i].name().compare(fu_string)==0) 60 | { 61 | return &func_defs[i]; 62 | } 63 | } 64 | return NULL; //if no fu, return null 65 | } 66 | 67 | //This function reads line from an ifstream, and gets a param and value, 68 | //seperated by a ":" 69 | void FuModel::AddCapabilities(func_unit_def& fu, string& cap_string) 70 | { 71 | 72 | stringstream ss(cap_string); 73 | string cur_cap; 74 | 75 | while (getline(ss, cur_cap, ',')) 76 | { 77 | stringstream pss(cur_cap); 78 | string cap; 79 | string enc_str; 80 | 81 | getline(pss, cap, ':'); 82 | 83 | ModelParsing::trim(cap); 84 | 85 | if(cap.empty()) { 86 | return; 87 | } 88 | 89 | if(ModelParsing::stricmp(cap,"ALL")) { 90 | for(int i = 0; i < SB_NUM_TYPES; ++i) { 91 | fu.add_cap((sb_inst_t)i); 92 | } 93 | return; 94 | } 95 | 96 | sb_inst_t sb_inst = inst_from_config_name(cap.c_str()); 97 | 98 | if(sb_inst==SB_NONE || sb_inst==SB_ERR) 99 | { 100 | cerr << "ERROR IN PARSING SOFTBRAIN INSTRUCTION: \"" << cap << "\"\n"; 101 | assert(0); 102 | return; 103 | } 104 | 105 | fu.add_cap(sb_inst); 106 | 107 | if(pss.good()) //then there must be an encoding string 108 | { 109 | unsigned encoding; 110 | pss >> encoding; 111 | 112 | fu.set_encoding(sb_inst,encoding); 113 | } 114 | } 115 | } 116 | -------------------------------------------------------------------------------- /softbrain-config/src/fu_model.h: -------------------------------------------------------------------------------- 1 | #ifndef __SB_FU_MODEL_H__ 2 | #define __SB_FU_MODEL_H__ 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | #include "sbinst.h" 11 | 12 | namespace SB_CONFIG { 13 | 14 | class func_unit_def { 15 | public: 16 | func_unit_def(std::string name_in) { 17 | _name = name_in; 18 | } 19 | 20 | std::string name() {return _name;} 21 | 22 | void add_cap(sb_inst_t sb_inst) { _cap.insert(sb_inst); } 23 | void set_encoding(sb_inst_t sb_inst, unsigned i) { 24 | if(i==0) { 25 | assert(0 && "Encoding for Instruction cannot be zero. Zero is reserved for Blank"); 26 | } 27 | if(i==1) { 28 | assert(0 && "Encoding for Instruction cannot be 1. 1 is reserved for Copy"); 29 | } 30 | _cap2encoding[sb_inst]=i; 31 | _encoding2cap[i]=sb_inst; 32 | } 33 | 34 | bool is_cap(sb_inst_t inst) { return _cap.count(inst)>0; } 35 | unsigned encoding_of(sb_inst_t inst) { 36 | if(inst == SB_Copy) { 37 | return 1; 38 | } else { 39 | return _cap2encoding[inst]; 40 | } 41 | } 42 | 43 | sb_inst_t inst_of_encoding(unsigned i) { 44 | if(i==1) { 45 | return SB_Copy; 46 | } 47 | assert(_encoding2cap.count(i)); 48 | return _encoding2cap[i]; 49 | } 50 | 51 | private: 52 | std::string _name; 53 | std::set _cap; 54 | std::map _cap2encoding; 55 | std::map _encoding2cap; 56 | 57 | friend class FuModel; 58 | }; 59 | 60 | class FuModel { 61 | public: 62 | FuModel(std::istream& istream); 63 | func_unit_def* GetFUDef(char*); 64 | func_unit_def* GetFUDef(std::string& fu_string); 65 | 66 | private: 67 | void AddCapabilities(func_unit_def& fu, std::string& cap_string); 68 | 69 | std::vector func_defs; 70 | 71 | }; 72 | 73 | } 74 | 75 | 76 | #endif 77 | -------------------------------------------------------------------------------- /softbrain-config/src/full.sbinst: -------------------------------------------------------------------------------- 1 | #Instruction ConfigName Latency NumOperands 2 | Add16 Add16 1 2 3 | Mul16 Mul16 1 2 4 | Sig16 Sig16 3 2 5 | 6 | Add16x4 Add16x4 1 2 7 | TAdd16x4 TAdd16x4 1 2 8 | HAdd16x4 HAdd16x4 1 2 9 | RShf16x4 RShf16x4 1 2 10 | Sub16x4 Sub16x4 1 2 11 | Abs16x4 Abs16x4 1 2 12 | Acc16x4 Acc16x4 1 2 13 | 14 | RShf2_16x4 RShf2_16x4 1 2 15 | RShf4_16x4 RShf4_16x4 1 2 16 | 17 | Mul16x4 Mul16x4 1 2 18 | Div16x4 Div16x4 3 2 19 | Sig16x4 Sig16x4 3 2 20 | Red16x4 Red16x4 2 2 21 | 22 | Max16x4 Max16x4 1 2 23 | Min16x4 Min16x4 1 2 24 | SMax16x4 SMax16x4 1 2 25 | SMin16x4 SMin16x4 1 2 26 | RedMax16x4 RedMax16x4 2 2 27 | RedMin16x4 RedMin16x4 2 2 28 | RedSMax16x4 RedSMax16x4 2 2 29 | RedSMin16x4 RedSMin16x4 2 2 30 | 31 | DelayFU DelayFU 0 1 32 | 33 | Mul32x2 Mul32x2 1 2 34 | Add32x2 Add32x2 1 2 35 | Red32x2 Red32x2 2 2 36 | RShf32x2 RShf32x2 1 2 37 | 38 | Max32x2 Max32x2 1 2 39 | Min32x2 Min32x2 1 2 40 | RedMax32x2 RedMax32x2 2 2 41 | RedMin32x2 RedMin32x2 2 2 42 | 43 | Mul64 Mul64 1 2 44 | Add64 Add64 1 2 45 | Sub64 Sub64 1 2 46 | RShf64 RShf64 1 2 47 | LShf64 LShf64 1 2 48 | Max64 Max64 1 2 49 | Min64 Min64 1 2 50 | Acc64 Acc64 1 2 51 | 52 | FAdd32x2 FAdd32x2 1 2 53 | FRed32x2 FRed32x2 2 2 54 | FMul32x2 FMul32x2 2 2 55 | 56 | FxRelu16x4 FxRelu16x4 1 2 57 | FxSig16x4 FxSig16x4 3 2 58 | FxTanh16x4 FxTanh16x4 3 2 59 | 60 | FxAdd16x4 FxAdd16x4 1 2 61 | FxRed16x4 FxRed16x4 2 2 62 | FxMul16x4 FxMul16x4 1 2 63 | FxExp16x4 FxExp16x4 3 2 64 | 65 | FxMul32x2 FxMul32x2 1 2 66 | FxAdd32x2 FxAdd32x2 1 2 67 | FxRed32x2 FxRed32x2 2 2 68 | 69 | FAdd64 FAdd64 1 2 70 | FMul64 FMul64 2 2 71 | 72 | Select Select 1 3 73 | And And 1 2 74 | Or Or 1 2 75 | Xor Xor 1 2 76 | 77 | Copy Copy 1 1 78 | 79 | ICmpEQ ICmpEQ 1 2 80 | 81 | # ----- old ones (no corresponding impl) ---- 82 | 83 | Switch Switch 1 8 84 | 85 | 86 | Add Add 1 2 87 | Sub Sub 1 2 88 | Mul Mul 1 2 89 | UDiv UDiv 1 2 90 | SDiv SDiv 1 2 91 | URem URem 1 2 92 | SRem SRem 1 2 93 | IMax IMax 1 2 94 | IMin IMin 1 2 95 | SMax SMax 1 2 96 | SMin SMin 1 2 97 | 98 | FAdd FAdd 3 2 99 | FSub FSub 3 2 100 | FMul FMul 3 2 101 | FDiv FDiv 12 2 102 | FRem FRem 12 2 103 | Sqrt Sqrt 12 1 104 | FSin FSin 24 1 105 | FCos FCos 24 1 106 | FMax FMax 3 2 107 | FMin FMin 3 2 108 | 109 | SExt SExt 1 1 110 | 111 | Shl Shl 1 2 112 | LShr LShr 1 2 113 | AShr AShr 1 2 114 | 115 | PHI PHI 1 2 116 | Ternary Ternary 1 3 117 | 118 | ICmpNE ICmp-NE 1 2 119 | ICmpUGT ICmp-UGT 1 2 120 | ICmpUGE ICmp-UGE 1 2 121 | ICmpULT ICmp-ULT 1 2 122 | ICmpULE ICmp-ULE 1 2 123 | ICmpSGT ICmp-SGT 1 2 124 | ICmpSGE ICmp-SGE 1 2 125 | ICmpSLT ICmp-SLT 1 2 126 | ICmpSLE ICmp-SLE 1 2 127 | 128 | FCmpOEQ FCmp-OEQ 3 2 129 | FCmpONE FCmp-ONE 3 2 130 | FCmpOGT FCmp-OGT 3 2 131 | FCmpOGE FCmp-OGE 3 2 132 | FCmpOLT FCmp-OLT 3 2 133 | FCmpOLE FCmp-OLE 3 2 134 | -------------------------------------------------------------------------------- /softbrain-config/src/inst_model.cpp: -------------------------------------------------------------------------------- 1 | #include "inst_model.h" 2 | 3 | #include 4 | #include 5 | #include "model_parsing.h" 6 | 7 | using namespace SB_CONFIG; 8 | using namespace std; 9 | 10 | //constructor based on input stream 11 | InstModel::InstModel(char* filename) { 12 | 13 | ifstream ifs(filename, ios::in); 14 | 15 | if(ifs.fail()) { 16 | cerr << "Could Not Open: " << filename << "\n"; 17 | return; 18 | } 19 | 20 | char line[512]; 21 | while(ifs.good()) 22 | { 23 | //string line; 24 | ifs.getline(line,512); 25 | 26 | string str_line=string(line); 27 | 28 | ModelParsing::trim(str_line); 29 | 30 | //Empty line or the first line 31 | if(str_line[0]=='#' || str_line.empty()) continue; 32 | 33 | SbInst* inst = new SbInst(); 34 | 35 | char* token; 36 | token = strtok (line," "); 37 | string str_name(token); 38 | inst->setName(str_name); 39 | 40 | token = strtok (NULL," "); 41 | string str_config(token); 42 | inst->setConfigName(str_config); 43 | 44 | token = strtok (NULL, " "); 45 | inst->setLatency(atoi(token)); 46 | 47 | token = strtok (NULL, " "); 48 | inst->setNumOperands(atoi(token)); 49 | 50 | _instList.push_back(inst); 51 | } 52 | 53 | 54 | } 55 | 56 | void InstModel::printCFiles(char* header_file, char* cpp_file) { 57 | 58 | // -------------------------print header file ----------------------------- 59 | ofstream ofs(header_file, ios::out); 60 | ofs << 61 | "//This file generated from inst_model.cpp -- Do not edit. Do not commit to repo.\n" 62 | "#ifndef __SB_INST_H__\n" 63 | "#define __SB_INST_H__\n" 64 | "\n" 65 | "#include \n" 66 | "#include \n" 67 | "#include \n" 68 | "#include \n" 69 | "#include \n" 70 | "#include \n" 71 | "#include \n" 72 | "#include \"fixed_point.h\"\n" 73 | "\n" 74 | "namespace SB_CONFIG {\n" 75 | "\n" 76 | 77 | "float as_float(std::uint32_t ui);\n" 78 | "uint32_t as_uint32(float f);\n" 79 | "\n" 80 | "double as_double(std::uint64_t ui);\n" 81 | "uint64_t as_uint64(double f);\n" 82 | "\n" 83 | 84 | 85 | "enum sb_inst_t {\n" 86 | "SB_NONE=0,\n" 87 | "SB_ERR,\n"; 88 | 89 | for(unsigned i = 0; i < _instList.size(); ++i) { 90 | ofs << "SB_" << _instList[i]->name() << ", \n"; 91 | }; 92 | 93 | ofs << "SB_NUM_TYPES\n};\n"; 94 | 95 | ofs << "\n"; 96 | ofs << "extern int num_ops[" << _instList.size()+2 << "];\n"; 97 | 98 | ofs << 99 | "\n" 100 | "sb_inst_t inst_from_string(const char* str);\n" 101 | "sb_inst_t inst_from_config_name(const char* str);\n" 102 | "const char* name_of_inst(sb_inst_t inst);\n" 103 | "const char* config_name_of_inst(sb_inst_t inst);\n" 104 | "int inst_lat(sb_inst_t inst);\n" 105 | "uint64_t execute(sb_inst_t inst, std::vector& ops, uint64_t& accum);\n" 106 | "\n" 107 | "};\n\n" 108 | "#endif\n"; 109 | 110 | ofs.close(); 111 | 112 | // -------------------------print cpp file -------------------------------- 113 | { 114 | 115 | ofstream ofs(cpp_file, ios::out); 116 | 117 | // inst_from_string 118 | ofs << 119 | "//This file generated from inst_model.cpp -- Do not edit. Do not commit to repo.\n" 120 | "#include \"" << header_file << "\"\n\n" 121 | 122 | "float SB_CONFIG::as_float(std::uint32_t ui) {\n" 123 | " float f;\n" 124 | " std::memcpy(&f, &ui, sizeof(float));\n" 125 | " return f;\n" 126 | "}\n" 127 | "\n" 128 | 129 | "uint32_t SB_CONFIG::as_uint32(float f) {\n" 130 | " uint32_t ui;\n" 131 | " std::memcpy(&ui, &f, sizeof(float));\n" 132 | " return ui;\n" 133 | "}\n" 134 | "\n" 135 | 136 | "double SB_CONFIG::as_double(std::uint64_t ui) {\n" 137 | " float f;\n" 138 | " std::memcpy(&f, &ui, sizeof(float));\n" 139 | " return f;\n" 140 | "}\n" 141 | "\n" 142 | 143 | "uint64_t SB_CONFIG::as_uint64(double f) {\n" 144 | " uint32_t ui;\n" 145 | " std::memcpy(&ui, &f, sizeof(double));\n" 146 | " return ui;\n" 147 | "}\n" 148 | "\n" 149 | 150 | 151 | "using namespace SB_CONFIG;\n\n" 152 | "sb_inst_t SB_CONFIG::inst_from_string(const char* str) {\n" 153 | " if(strcmp(str,\"NONE\")==0) return SB_NONE;\n"; 154 | 155 | for(unsigned i = 0; i < _instList.size(); ++i) { 156 | ofs << " else if(strcmp(str,\"" << _instList[i]->name() << "\")==0) return SB_" << _instList[i]->name() << ";\n"; 157 | } 158 | ofs << " else return SB_ERR;\n\n"; 159 | 160 | ofs << "}\n\n"; 161 | 162 | 163 | // inst_from_config_name 164 | ofs << 165 | "sb_inst_t SB_CONFIG::inst_from_config_name(const char* str) {\n" 166 | " if(strcmp(str,\"NONE\")==0) return SB_NONE;\n"; 167 | for(unsigned i = 0; i < _instList.size(); ++i) { 168 | ofs << " else if(strcmp(str,\"" << _instList[i]->configName() << "\")==0) return SB_" << _instList[i]->name() << ";\n"; 169 | } 170 | ofs << " else return SB_ERR;\n\n"; 171 | 172 | ofs << "}\n\n"; 173 | 174 | // Properties of Instructions 175 | 176 | // name_of_inst 177 | ofs << 178 | "const char* SB_CONFIG::name_of_inst(sb_inst_t inst) {\n" 179 | " switch(inst) {\n"; 180 | for(unsigned i = 0; i < _instList.size(); ++i) { 181 | ofs << " case " << "SB_" << _instList[i]->name() << ": return \"" << _instList[i]->name() << "\";\n"; 182 | } 183 | ofs << "case SB_NONE: return \"NONE\";\n"; 184 | ofs << "case SB_ERR: assert(0); return \"ERR\";\n"; 185 | ofs << "case SB_NUM_TYPES: assert(0); return \"ERR\";\n"; 186 | ofs << " default: assert(0); return \"DEFAULT\";\n"; 187 | ofs << " }\n\n"; 188 | ofs << "}\n\n"; 189 | 190 | // config_name_of_inst 191 | ofs << 192 | "const char* SB_CONFIG::config_name_of_inst(sb_inst_t inst) {\n" 193 | " switch(inst) {\n"; 194 | for(unsigned i = 0; i < _instList.size(); ++i) { 195 | ofs << " case " << "SB_" << _instList[i]->name() << ": return \"" << _instList[i]->configName() << "\";\n"; 196 | } 197 | 198 | ofs << " case SB_NONE: return \"NONE\";\n"; 199 | ofs << " case SB_ERR: assert(0); return \"ERR\";\n"; 200 | ofs << " case SB_NUM_TYPES: assert(0); return \"ERR\";\n"; 201 | ofs << " default: assert(0); return \"DEFAULT\";\n"; 202 | ofs << " }\n\n"; 203 | ofs << "}\n\n"; 204 | 205 | //FUNCTION: inst_lat (this really should have just used an array...) 206 | ofs << 207 | "int SB_CONFIG::inst_lat(sb_inst_t inst) {\n" 208 | " switch(inst) {\n"; 209 | for(unsigned i = 0; i < _instList.size(); ++i) { 210 | ofs << " case " << "SB_" << _instList[i]->name() << ": return " << _instList[i]->latency() << ";\n"; 211 | } 212 | ofs << " default: return 1;\n"; 213 | ofs << " }\n\n"; 214 | ofs << "}\n\n"; 215 | 216 | // num_ops_array 217 | ofs << "int SB_CONFIG::num_ops[" << _instList.size()+2 << "]={0, 0\n"; 218 | ofs << "\t\t\t\t\t\t\t\t\t\t\t\t\t\t"; 219 | for(unsigned i = 0; i < _instList.size(); ++i) { 220 | ofs << ", " << _instList[i]->numOps(); 221 | if(i%16==15) { 222 | ofs << "\n"; 223 | ofs << "\t\t\t\t\t\t\t\t\t\t\t\t\t\t"; 224 | } 225 | } 226 | ofs << "};\n\n"; 227 | 228 | 229 | //FUNCTION: execute() 230 | ofs << 231 | "uint64_t SB_CONFIG::execute(sb_inst_t inst, std::vector& ops, uint64_t& accum) {\n"; 232 | 233 | ofs << //this is an implementation of pass through 234 | " assert(ops.size() <= 3); \n" 235 | " assert(ops.size() <= (unsigned)(num_ops[inst]+1)); \n" 236 | " if((ops.size() > (unsigned)num_ops[inst]) && (ops[ops.size()] == 0)) { \n" 237 | " return ops[0];\n" 238 | " }\n" 239 | 240 | " switch(inst) {\n"; 241 | for(unsigned i = 0; i < _instList.size(); ++i) { 242 | ofs << " case " << "SB_" << _instList[i]->name() << ": {"; 243 | string inst_code_name = "insts/" + _instList[i]->name() + ".h"; 244 | ifstream f(inst_code_name.c_str()); 245 | 246 | if (f.good()) { 247 | std::string line; 248 | ofs << "\n"; 249 | while (std::getline(f, line)) { 250 | ofs << " " << line << "\n"; 251 | } 252 | ofs << " };\n"; 253 | } else { 254 | ofs << "assert(0 && \"Instruction Not Implemented\");"; 255 | ofs << "};\n"; 256 | } 257 | } 258 | ofs << " default: assert(0); return 1;\n"; 259 | ofs << " }\n\n"; 260 | ofs << "}\n\n"; 261 | 262 | 263 | ofs.close(); 264 | 265 | } 266 | } 267 | 268 | int main(int argc, char** argv) 269 | { 270 | if(argc!=4) { 271 | std::cout << "Usage:\n inst_model [input file] [header file] [cpp file]\n"; 272 | return 1; 273 | } 274 | 275 | InstModel* instModel = new InstModel(argv[1]); 276 | instModel->printCFiles(argv[2],argv[3]); 277 | return 0; 278 | } 279 | 280 | -------------------------------------------------------------------------------- /softbrain-config/src/inst_model.h: -------------------------------------------------------------------------------- 1 | #ifndef __SB_INST_MODEL_H__ 2 | #define __SB_INST_MODEL_H__ 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | namespace SB_CONFIG { 10 | 11 | // SB Instruction Class 12 | // Stores attributes like it's name, latency, etc... 13 | class SbInst { 14 | public: 15 | std::string name() { return _name; } 16 | void setName(std::string& name) { _name=name; } 17 | 18 | std::string configName() { return _configname; } 19 | void setConfigName(std::string& name) { _configname=name; } 20 | 21 | int latency() { return _latency; } 22 | void setLatency(int lat) { _latency=lat; } 23 | 24 | int numOps() { return _num_ops; } 25 | void setNumOperands(int n_ops) { _num_ops=n_ops; } 26 | 27 | private: 28 | std::string _name; 29 | std::string _configname; 30 | int _latency; 31 | int _num_ops; 32 | }; 33 | 34 | class InstModel { 35 | public: 36 | InstModel(char* filename); //read the file and populate the instructions 37 | //DyInst* GetDyInstByName(std::string& name); 38 | 39 | void printCFiles(char* header, char* cpp); 40 | 41 | private: 42 | std::vector _instList; 43 | }; 44 | 45 | 46 | 47 | 48 | 49 | 50 | } 51 | 52 | #endif 53 | -------------------------------------------------------------------------------- /softbrain-config/src/insts/Abs16x4.h: -------------------------------------------------------------------------------- 1 | int16_t a0 = (ops[0]&0x000000000000FFFF)>>0; 2 | int16_t a1 = (ops[0]&0x00000000FFFF0000)>>16; 3 | int16_t a2 = (ops[0]&0x0000FFFF00000000)>>32; 4 | int16_t a3 = (ops[0]&0xFFFF000000000000)>>48; 5 | a0 = a0 >= 0 ? a0 : -a0; 6 | a1 = a1 >= 0 ? a1 : -a1; 7 | a2 = a2 >= 0 ? a2 : -a2; 8 | a3 = a3 >= 0 ? a3 : -a3; 9 | uint64_t c0 = (uint64_t)(a0)<<0; 10 | uint64_t c1 = (uint64_t)(a1)<<16; 11 | uint64_t c2 = (uint64_t)(a2)<<32; 12 | uint64_t c3 = (uint64_t)(a3)<<48; 13 | return c0 | c1 | c2 | c3; 14 | -------------------------------------------------------------------------------- /softbrain-config/src/insts/Acc16x4.h: -------------------------------------------------------------------------------- 1 | uint16_t a0 = (ops[0]&0x000000000000FFFF)>>0; 2 | uint16_t a1 = (ops[0]&0x00000000FFFF0000)>>16; 3 | uint16_t a2 = (ops[0]&0x0000FFFF00000000)>>32; 4 | uint16_t a3 = (ops[0]&0xFFFF000000000000)>>48; 5 | uint16_t b0 = (accum&0x000000000000FFFF)>>0; 6 | uint16_t b1 = (accum&0x00000000FFFF0000)>>16; 7 | uint16_t b2 = (accum&0x0000FFFF00000000)>>32; 8 | uint16_t b3 = (accum&0xFFFF000000000000)>>48; 9 | a0+=b0; 10 | a1+=b1; 11 | a2+=b2; 12 | a3+=b3; 13 | uint64_t c0 = (uint64_t)(a0)<<0; 14 | uint64_t c1 = (uint64_t)(a1)<<16; 15 | uint64_t c2 = (uint64_t)(a2)<<32; 16 | uint64_t c3 = (uint64_t)(a3)<<48; 17 | 18 | accum = c0 | c1 | c2 | c3; 19 | 20 | uint64_t ret = accum; 21 | 22 | if(ops[1]) { 23 | accum=0; 24 | } 25 | 26 | return ret; 27 | -------------------------------------------------------------------------------- /softbrain-config/src/insts/Acc64.h: -------------------------------------------------------------------------------- 1 | accum+=ops[0]; 2 | 3 | uint64_t ret = accum; 4 | 5 | if(ops[1]) { 6 | accum=0; 7 | } 8 | 9 | return ret; 10 | -------------------------------------------------------------------------------- /softbrain-config/src/insts/Add16x4.h: -------------------------------------------------------------------------------- 1 | uint16_t a0 = (ops[0]&0x000000000000FFFF)>>0; 2 | uint16_t a1 = (ops[0]&0x00000000FFFF0000)>>16; 3 | uint16_t a2 = (ops[0]&0x0000FFFF00000000)>>32; 4 | uint16_t a3 = (ops[0]&0xFFFF000000000000)>>48; 5 | uint16_t b0 = (ops[1]&0x000000000000FFFF)>>0; 6 | uint16_t b1 = (ops[1]&0x00000000FFFF0000)>>16; 7 | uint16_t b2 = (ops[1]&0x0000FFFF00000000)>>32; 8 | uint16_t b3 = (ops[1]&0xFFFF000000000000)>>48; 9 | a0+=b0; 10 | a1+=b1; 11 | a2+=b2; 12 | a3+=b3; 13 | uint64_t c0 = (uint64_t)(a0)<<0; 14 | uint64_t c1 = (uint64_t)(a1)<<16; 15 | uint64_t c2 = (uint64_t)(a2)<<32; 16 | uint64_t c3 = (uint64_t)(a3)<<48; 17 | return c0 | c1 | c2 | c3; 18 | //return (uint64_t) _mm_adds_pu16((__m64)ops[0], (__m64)ops[1]); 19 | -------------------------------------------------------------------------------- /softbrain-config/src/insts/Add32x2.h: -------------------------------------------------------------------------------- 1 | uint32_t a0 = (ops[0]&0x00000000FFFFFFFF)>>0; 2 | uint32_t a1 = (ops[0]&0xFFFFFFFF00000000)>>32; 3 | uint32_t b0 = (ops[1]&0x00000000FFFFFFFF)>>0; 4 | uint32_t b1 = (ops[1]&0xFFFFFFFF00000000)>>32; 5 | a0+=b0; 6 | a1+=b1; 7 | uint64_t c0 = (uint64_t)(a0)<<0; 8 | uint64_t c1 = (uint64_t)(a1)<<32; 9 | return c0 | c1; 10 | //return (uint64_t) _mm_adds_pu16((__m64)ops[0], (__m64)ops[1]); 11 | -------------------------------------------------------------------------------- /softbrain-config/src/insts/Add64.h: -------------------------------------------------------------------------------- 1 | return ops[0] + ops[1]; 2 | -------------------------------------------------------------------------------- /softbrain-config/src/insts/And.h: -------------------------------------------------------------------------------- 1 | return ops[0] & ops[1]; 2 | -------------------------------------------------------------------------------- /softbrain-config/src/insts/Copy.h: -------------------------------------------------------------------------------- 1 | return ops[0]; 2 | -------------------------------------------------------------------------------- /softbrain-config/src/insts/Div16x4.h: -------------------------------------------------------------------------------- 1 | uint16_t a0 = (ops[0]&0x000000000000FFFF)>>0; 2 | uint16_t a1 = (ops[0]&0x00000000FFFF0000)>>16; 3 | uint16_t a2 = (ops[0]&0x0000FFFF00000000)>>32; 4 | uint16_t a3 = (ops[0]&0xFFFF000000000000)>>48; 5 | uint16_t b0 = (ops[1]&0x000000000000FFFF)>>0; 6 | uint16_t b1 = (ops[1]&0x00000000FFFF0000)>>16; 7 | uint16_t b2 = (ops[1]&0x0000FFFF00000000)>>32; 8 | uint16_t b3 = (ops[1]&0xFFFF000000000000)>>48; 9 | a0/=b0; 10 | a1/=b1; 11 | a2/=b2; 12 | a3/=b3; 13 | uint64_t c0 = (uint64_t)(a0)<<0; 14 | uint64_t c1 = (uint64_t)(a1)<<16; 15 | uint64_t c2 = (uint64_t)(a2)<<32; 16 | uint64_t c3 = (uint64_t)(a3)<<48; 17 | return c0 | c1 | c2 | c3; 18 | -------------------------------------------------------------------------------- /softbrain-config/src/insts/FAdd32x2.h: -------------------------------------------------------------------------------- 1 | uint32_t t_a0 = (ops[0]&0x00000000FFFFFFFF)>>0; 2 | uint32_t t_a1 = (ops[0]&0xFFFFFFFF00000000)>>32; 3 | uint32_t t_b0 = (ops[1]&0x00000000FFFFFFFF)>>0; 4 | uint32_t t_b1 = (ops[1]&0xFFFFFFFF00000000)>>32; 5 | 6 | float a0=as_float(t_a0); 7 | float a1=as_float(t_a1); 8 | float b0=as_float(t_b0); 9 | float b1=as_float(t_b1); 10 | 11 | a0+=b0; 12 | a1+=b1; 13 | 14 | uint64_t c0 = (uint64_t)(as_uint32(a0))<<0; 15 | uint64_t c1 = (uint64_t)(as_uint32(a1))<<32; 16 | return c0 | c1; 17 | //return (uint64_t) _mm_adds_pu16((__m64)ops[0], (__m64)ops[1]); 18 | -------------------------------------------------------------------------------- /softbrain-config/src/insts/FAdd64.h: -------------------------------------------------------------------------------- 1 | double a = as_double(ops[0]); 2 | double b = as_double(ops[1]); 3 | double c = a+b; 4 | return as_uint64(c); 5 | -------------------------------------------------------------------------------- /softbrain-config/src/insts/FMul32x2.h: -------------------------------------------------------------------------------- 1 | uint32_t t_a0 = (ops[0]&0x00000000FFFFFFFF)>>0; 2 | uint32_t t_a1 = (ops[0]&0xFFFFFFFF00000000)>>32; 3 | uint32_t t_b0 = (ops[1]&0x00000000FFFFFFFF)>>0; 4 | uint32_t t_b1 = (ops[1]&0xFFFFFFFF00000000)>>32; 5 | 6 | float a0=as_float(t_a0); 7 | float a1=as_float(t_a1); 8 | float b0=as_float(t_b0); 9 | float b1=as_float(t_b1); 10 | 11 | a0*=b0; 12 | a1*=b1; 13 | 14 | uint64_t c0 = (uint64_t)(as_uint32(a0))<<0; 15 | uint64_t c1 = (uint64_t)(as_uint32(a1))<<32; 16 | return c0 | c1; 17 | 18 | //return (uint64_t) _mm_mullo_pi32((__m64)ops[0], (__m64)ops[1]); -- mullo_pi32 doesnt exisit in mm intrinsics 19 | -------------------------------------------------------------------------------- /softbrain-config/src/insts/FMul64.h: -------------------------------------------------------------------------------- 1 | double a = as_double(ops[0]); 2 | double b = as_double(ops[1]); 3 | double c = a*b; 4 | return as_uint64(c); 5 | -------------------------------------------------------------------------------- /softbrain-config/src/insts/FRed32x2.h: -------------------------------------------------------------------------------- 1 | uint32_t t_r0 = (ops[0]&0x00000000FFFFFFFF)>>0; 2 | uint32_t t_r1 = (ops[0]&0xFFFFFFFF00000000)>>32; 3 | 4 | float r0=as_float(t_r0); 5 | float r1=as_float(t_r1); 6 | 7 | float result; 8 | if(ops.size() > 1) { //additional op is acc 9 | result = r0 + r1 + as_float((uint32_t)ops[1]); 10 | } else { 11 | result = r0 + r1; 12 | } 13 | return (uint64_t)(as_uint32(result)); 14 | 15 | -------------------------------------------------------------------------------- /softbrain-config/src/insts/FxAdd16x4.h: -------------------------------------------------------------------------------- 1 | uint16_t a0 = (ops[0]&0x000000000000FFFF)>>0; 2 | uint16_t a1 = (ops[0]&0x00000000FFFF0000)>>16; 3 | uint16_t a2 = (ops[0]&0x0000FFFF00000000)>>32; 4 | uint16_t a3 = (ops[0]&0xFFFF000000000000)>>48; 5 | uint16_t b0 = (ops[1]&0x000000000000FFFF)>>0; 6 | uint16_t b1 = (ops[1]&0x00000000FFFF0000)>>16; 7 | uint16_t b2 = (ops[1]&0x0000FFFF00000000)>>32; 8 | uint16_t b3 = (ops[1]&0xFFFF000000000000)>>48; 9 | 10 | uint16_t sum = a0 + b0; 11 | 12 | if (!((a0 ^ b0) & 0x8000) && ((a0 ^ sum) & 0x8000) && !(a0 & 0x8000)) 13 | a0 = 0x7FFF; 14 | else if (!((a0 ^ b0) & 0x8000) && ((a0 ^ sum) & 0x8000) && (a0 & 0x8000)) 15 | a0 = 0x8001; 16 | else 17 | a0 = sum; 18 | 19 | sum = a1 + b1; 20 | 21 | if (!((a1 ^ b1) & 0x8000) && ((a1 ^ sum) & 0x8000) && !(a1 & 0x8000)) 22 | a1 = 0x7FFF; 23 | else if (!((a1 ^ b1) & 0x8000) && ((a1 ^ sum) & 0x8000) && (a1 & 0x8000)) 24 | a1 = 0x8001; 25 | else 26 | a1 = sum; 27 | 28 | sum = a2 + b2; 29 | 30 | if (!((a2 ^ b2) & 0x8000) && ((a2 ^ sum) & 0x8000) && !(a2 & 0x8000)) 31 | a2 = 0x7FFF; 32 | else if (!((a2 ^ b2) & 0x8000) && ((a2 ^ sum) & 0x8000) && (a2 & 0x8000)) 33 | a2 = 0x8001; 34 | else 35 | a2 = sum; 36 | 37 | sum = a3 + b3; 38 | 39 | if (!((a3 ^ b3) & 0x8000) && ((a3 ^ sum) & 0x8000) && !(a3 & 0x8000)) 40 | a3 = 0x7FFF; 41 | else if (!((a3 ^ b3) & 0x8000) && ((a3 ^ sum) & 0x8000) && (a3 & 0x8000)) 42 | a3 = 0x8001; 43 | else 44 | a3 = sum; 45 | 46 | uint64_t c0 = (uint64_t)(a0)<<0; 47 | uint64_t c1 = (uint64_t)(a1)<<16; 48 | uint64_t c2 = (uint64_t)(a2)<<32; 49 | uint64_t c3 = (uint64_t)(a3)<<48; 50 | return c0 | c1 | c2 | c3; 51 | -------------------------------------------------------------------------------- /softbrain-config/src/insts/FxAdd32x2.h: -------------------------------------------------------------------------------- 1 | uint32_t a0 = (ops[0]&0x00000000FFFFFFFF)>>0; 2 | uint32_t a1 = (ops[0]&0xFFFFFFFF00000000)>>32; 3 | uint32_t b0 = (ops[1]&0x00000000FFFFFFFF)>>0; 4 | uint32_t b1 = (ops[1]&0xFFFFFFFF00000000)>>32; 5 | 6 | uint32_t sum; 7 | 8 | sum = a0 + b0; 9 | if (!((a0 ^ b0) & 0x80000000) && ((a0 ^ sum) & 0x80000000) && !(a0 & 0x80000000)) 10 | a0 = 0x7FFFFFFF; 11 | else if (!((a0 ^ b0) & 0x80000000) && ((a0 ^ sum) & 0x80000000) && (a0 & 0x80000000)) 12 | a0 = 0x80000001; 13 | else 14 | a0 = sum; 15 | 16 | sum = a1 + b1; 17 | if (!((a1 ^ b1) & 0x80000000) && ((a1 ^ sum) & 0x80000000) && !(a1 & 0x80000000)) 18 | a1 = 0x7FFFFFFF; 19 | else if (!((a1 ^ b1) & 0x80000000) && ((a1 ^ sum) & 0x80000000) && (a1 & 0x80000000)) 20 | a1 = 0x80000001; 21 | else 22 | a1 = sum; 23 | 24 | uint64_t c0 = ((uint64_t)(a0))<<0; 25 | uint64_t c1 = ((uint64_t)(a1))<<32; 26 | return c0 | c1; 27 | -------------------------------------------------------------------------------- /softbrain-config/src/insts/FxExp16x4.h: -------------------------------------------------------------------------------- 1 | int16_t a0 = (ops[0]&0x000000000000FFFF)>>0; 2 | int16_t a1 = (ops[0]&0x00000000FFFF0000)>>16; 3 | int16_t a2 = (ops[0]&0x0000FFFF00000000)>>32; 4 | int16_t a3 = (ops[0]&0xFFFF000000000000)>>48; 5 | 6 | if ((ops.size() > 1) && (ops[1] == 0)) 7 | return ops[0]; 8 | 9 | double d0 = FIX_TO_DOUBLE(a0); 10 | double d1 = FIX_TO_DOUBLE(a1); 11 | double d2 = FIX_TO_DOUBLE(a2); 12 | double d3 = FIX_TO_DOUBLE(a3); 13 | 14 | d0 = exp(d0); 15 | d1 = exp(d1); 16 | d2 = exp(d2); 17 | d3 = exp(d3); 18 | 19 | int16_t b0 = DOUBLE_TO_FIX(d0); 20 | int16_t b1 = DOUBLE_TO_FIX(d1); 21 | int16_t b2 = DOUBLE_TO_FIX(d2); 22 | int16_t b3 = DOUBLE_TO_FIX(d3); 23 | 24 | uint64_t c0 = ((uint64_t)(b0)<<0)&0x000000000000FFFF; 25 | uint64_t c1 = ((uint64_t)(b1)<<16)&0x00000000FFFF0000; 26 | uint64_t c2 = ((uint64_t)(b2)<<32)&0x0000FFFF00000000; 27 | uint64_t c3 = ((uint64_t)(b3)<<48)&0xFFFF000000000000; 28 | 29 | return c0 | c1 | c2 | c3; 30 | -------------------------------------------------------------------------------- /softbrain-config/src/insts/FxMul16x4.h: -------------------------------------------------------------------------------- 1 | int16_t a0 = (ops[0]&0x000000000000FFFF)>>0; 2 | int16_t a1 = (ops[0]&0x00000000FFFF0000)>>16; 3 | int16_t a2 = (ops[0]&0x0000FFFF00000000)>>32; 4 | int16_t a3 = (ops[0]&0xFFFF000000000000)>>48; 5 | int16_t b0 = (ops[1]&0x000000000000FFFF)>>0; 6 | int16_t b1 = (ops[1]&0x00000000FFFF0000)>>16; 7 | int16_t b2 = (ops[1]&0x0000FFFF00000000)>>32; 8 | int16_t b3 = (ops[1]&0xFFFF000000000000)>>48; 9 | 10 | int32_t im0 = ((int32_t)a0 * (int32_t)b0) >> 11; 11 | int16_t m0 = im0 > (int16_t)0x7FFF ? (int16_t)0x7FFF : (im0 < (int32_t)0xFFFF8001 ? (int32_t)0xFFFF8001 : im0); 12 | 13 | int32_t im1 = ((int32_t)a1 * (int32_t)b1) >> 11; 14 | int16_t m1 = im1 > 0x7FFF ? 0x7FFF : (im1 < (int32_t)(int16_t)0xFFFF8001 ? (int32_t)(int16_t)0xFFFF8001 : im1); 15 | 16 | int32_t im2 = ((int32_t)a2 * (int32_t)b2) >> 11; 17 | int16_t m2 = im2 > (int16_t)0x7FFF ? (int16_t)0x7FFF : (im2 < (int32_t)0xFFFF8001 ? (int32_t)0xFFFF8001 : im2); 18 | 19 | int32_t im3 = ((int32_t)a3 * (int32_t)b3) >> 11; 20 | int16_t m3 = im3 > (int16_t)0x7FFF ? (int16_t)0x7FFF : (im3 < (int32_t)0xFFFF8001 ? (int32_t)0xFFFF8001 : im3); 21 | 22 | uint64_t c0 = ((uint64_t)(m0)<<0)&0x000000000000FFFF; 23 | uint64_t c1 = ((uint64_t)(m1)<<16)&0x00000000FFFF0000; 24 | uint64_t c2 = ((uint64_t)(m2)<<32)&0x0000FFFF00000000; 25 | uint64_t c3 = ((uint64_t)(m3)<<48)&0xFFFF000000000000; 26 | 27 | return c0 | c1 | c2 | c3; 28 | -------------------------------------------------------------------------------- /softbrain-config/src/insts/FxMul32x2.h: -------------------------------------------------------------------------------- 1 | int32_t a0 = (ops[0]&0x00000000FFFFFFFF)>>0; 2 | int32_t a1 = (ops[0]&0xFFFFFFFF00000000)>>32; 3 | int32_t b0 = (ops[1]&0x00000000FFFFFFFF)>>0; 4 | int32_t b1 = (ops[1]&0xFFFFFFFF00000000)>>32; 5 | 6 | int64_t im0 = ((int64_t)a0 * (int64_t)b0) >> 14; // 14 fractional bits 7 | int32_t m0 = im0 > (int64_t)0x000000007FFFFFFF ? (int32_t)0x7FFFFFFF : (im0 < (int64_t)0xFFFFFFFF80000001 ? (int32_t)0x80000001 : im0); 8 | 9 | int64_t im1 = ((int64_t)a1 * (int64_t)b1) >> 14; 10 | int32_t m1 = im1 > (int64_t)0x000000007FFFFFFF ? (int32_t)0x7FFFFFFF : (im1 < (int64_t)0xFFFFFFFF80000001 ? (int32_t)0x80000001 : im1); 11 | 12 | uint64_t c0 = ((uint64_t)(m0)<<0)&0x00000000FFFFFFFF; 13 | uint64_t c1 = ((uint64_t)(m1)<<32)&0xFFFFFFFF00000000; 14 | 15 | return c0 | c1; 16 | -------------------------------------------------------------------------------- /softbrain-config/src/insts/FxRed16x4.h: -------------------------------------------------------------------------------- 1 | uint16_t r0 = (ops[0]&0x000000000000FFFF)>>0; 2 | uint16_t r1 = (ops[0]&0x00000000FFFF0000)>>16; 3 | uint16_t r2 = (ops[0]&0x0000FFFF00000000)>>32; 4 | uint16_t r3 = (ops[0]&0xFFFF000000000000)>>48; 5 | 6 | uint16_t sum; 7 | 8 | uint16_t sum0 = r0 + r1; 9 | if (!((r0 ^ r1) & 0x8000) && ((r0 ^ sum0) & 0x8000) && !(r0 & 0x8000)) 10 | sum0 = 0x7FFF; 11 | else if (!((r0 ^ r1) & 0x8000) && ((r0 ^ sum0) & 0x8000) && (r0 & 0x8000)) 12 | sum0 = 0x8001; 13 | 14 | uint16_t sum1 = r2 + r3; 15 | if (!((r2 ^ r3) & 0x8000) && ((r2 ^ sum1) & 0x8000) && !(r2 & 0x8000)) 16 | sum1 = 0x7FFF; 17 | else if (!((r2 ^ r3) & 0x8000) && ((r2 ^ sum1) & 0x8000) && (r2 & 0x8000)) 18 | sum1 = 0x8001; 19 | 20 | uint16_t sum2 = sum0 + sum1; 21 | if (!((sum0 ^ sum1) & 0x8000) && ((sum0 ^ sum2) & 0x8000) && !(sum0 & 0x8000)) 22 | sum2 = 0x7FFF; 23 | else if (!((sum0 ^ sum1) & 0x8000) && ((sum0 ^ sum2) & 0x8000) && (sum0 & 0x8000)) 24 | sum2 = 0x8001; 25 | 26 | if(ops.size() > 1) { //additional op is acc 27 | sum = sum2 + (uint16_t)ops[1]; 28 | if (!((sum2 ^ (uint16_t)ops[1]) & 0x8000) && ((sum2 ^ sum) & 0x8000) && !(sum2 & 0x8000)) 29 | sum = 0x7FFF; 30 | else if (!((sum2 ^ (uint16_t)ops[1]) & 0x8000) && ((sum2 ^ sum) & 0x8000) && (sum2 & 0x8000)) 31 | sum = 0x8001; 32 | } else { 33 | sum = sum2; 34 | } 35 | 36 | return sum; 37 | -------------------------------------------------------------------------------- /softbrain-config/src/insts/FxRed32x2.h: -------------------------------------------------------------------------------- 1 | uint32_t r0 = (ops[0]&0x00000000FFFFFFFF)>>0; 2 | uint32_t r1 = (ops[0]&0xFFFFFFFF00000000)>>32; 3 | 4 | uint32_t sum; 5 | 6 | uint32_t sum0 = r0 + r1; 7 | if (!((r0 ^ r1) & 0x80000000) && ((r0 ^ sum0) & 0x80000000) && !(r0 & 0x80000000)) 8 | sum0 = 0x7FFFFFFF; 9 | else if (!((r0 ^ r1) & 0x80000000) && ((r0 ^ sum0) & 0x80000000) && (r0 & 0x80000000)) 10 | sum0 = 0x80000001; 11 | 12 | if(ops.size() > 1) { //additional op is acc 13 | sum = sum0 + (uint32_t)ops[1]; 14 | if (!((sum0 ^ (uint32_t)ops[1]) & 0x80000000) && ((sum0 ^ sum) & 0x80000000) && !(sum0 & 0x80000000)) 15 | sum = 0x7FFFFFFF; 16 | else if (!((sum0 ^ (uint32_t)ops[1]) & 0x80000000) && ((sum0 ^ sum) & 0x80000000) && (sum0 & 0x80000000)) 17 | sum = 0x80000001; 18 | } else { 19 | sum = sum0; 20 | } 21 | 22 | return sum; 23 | -------------------------------------------------------------------------------- /softbrain-config/src/insts/FxRelu16x4.h: -------------------------------------------------------------------------------- 1 | uint16_t i1 = (ops[0]&0x000000000000FFFF)>>0; 2 | uint16_t i2 = (ops[0]&0x00000000FFFF0000)>>16; 3 | uint16_t i3 = (ops[0]&0x0000FFFF00000000)>>32; 4 | uint16_t i4 = (ops[0]&0xFFFF000000000000)>>48; 5 | 6 | if ((ops.size() > 1) && (ops[1] == 0)) 7 | return ops[0]; 8 | 9 | if (i1 & 0x8000) 10 | i1 = 0; 11 | 12 | if (i2 & 0x8000) 13 | i2 = 0; 14 | 15 | if (i3 & 0x8000) 16 | i3 = 0; 17 | 18 | if (i4 & 0x8000) 19 | i4 = 0; 20 | 21 | uint64_t o1 = (uint64_t)(i1)<<0; 22 | uint64_t o2 = (uint64_t)(i2)<<16; 23 | uint64_t o3 = (uint64_t)(i3)<<32; 24 | uint64_t o4 = (uint64_t)(i4)<<48; 25 | 26 | return o1 | o2 | o3 | o4; 27 | -------------------------------------------------------------------------------- /softbrain-config/src/insts/FxSig16x4.h: -------------------------------------------------------------------------------- 1 | int16_t a0 = (ops[0]&0x000000000000FFFF)>>0; 2 | int16_t a1 = (ops[0]&0x00000000FFFF0000)>>16; 3 | int16_t a2 = (ops[0]&0x0000FFFF00000000)>>32; 4 | int16_t a3 = (ops[0]&0xFFFF000000000000)>>48; 5 | 6 | if ((ops.size() > 1) && (ops[1] == 0)) 7 | return ops[0]; 8 | 9 | double d0 = FIX_TO_DOUBLE(a0); 10 | double d1 = FIX_TO_DOUBLE(a1); 11 | double d2 = FIX_TO_DOUBLE(a2); 12 | double d3 = FIX_TO_DOUBLE(a3); 13 | 14 | d0 = 1 / (1 + exp(-d0)); 15 | d1 = 1 / (1 + exp(-d1)); 16 | d2 = 1 / (1 + exp(-d2)); 17 | d3 = 1 / (1 + exp(-d3)); 18 | 19 | int16_t b0 = DOUBLE_TO_FIX(d0); 20 | int16_t b1 = DOUBLE_TO_FIX(d1); 21 | int16_t b2 = DOUBLE_TO_FIX(d2); 22 | int16_t b3 = DOUBLE_TO_FIX(d3); 23 | 24 | uint64_t c0 = ((uint64_t)(b0)<<0)&0x000000000000FFFF; 25 | uint64_t c1 = ((uint64_t)(b1)<<16)&0x00000000FFFF0000; 26 | uint64_t c2 = ((uint64_t)(b2)<<32)&0x0000FFFF00000000; 27 | uint64_t c3 = ((uint64_t)(b3)<<48)&0xFFFF000000000000; 28 | 29 | return c0 | c1 | c2 | c3; 30 | -------------------------------------------------------------------------------- /softbrain-config/src/insts/FxTanh16x4.h: -------------------------------------------------------------------------------- 1 | int16_t a0 = (ops[0]&0x000000000000FFFF)>>0; 2 | int16_t a1 = (ops[0]&0x00000000FFFF0000)>>16; 3 | int16_t a2 = (ops[0]&0x0000FFFF00000000)>>32; 4 | int16_t a3 = (ops[0]&0xFFFF000000000000)>>48; 5 | 6 | if ((ops.size() > 1) && (ops[1] == 0)) 7 | return ops[0]; 8 | 9 | double d0 = FIX_TO_DOUBLE(a0); 10 | double d1 = FIX_TO_DOUBLE(a1); 11 | double d2 = FIX_TO_DOUBLE(a2); 12 | double d3 = FIX_TO_DOUBLE(a3); 13 | 14 | d0 = tanh(d0); 15 | d1 = tanh(d1); 16 | d2 = tanh(d2); 17 | d3 = tanh(d3); 18 | 19 | int16_t b0 = DOUBLE_TO_FIX(d0); 20 | int16_t b1 = DOUBLE_TO_FIX(d1); 21 | int16_t b2 = DOUBLE_TO_FIX(d2); 22 | int16_t b3 = DOUBLE_TO_FIX(d3); 23 | 24 | uint64_t c0 = ((uint64_t)(b0)<<0)&0x000000000000FFFF; 25 | uint64_t c1 = ((uint64_t)(b1)<<16)&0x00000000FFFF0000; 26 | uint64_t c2 = ((uint64_t)(b2)<<32)&0x0000FFFF00000000; 27 | uint64_t c3 = ((uint64_t)(b3)<<48)&0xFFFF000000000000; 28 | 29 | return c0 | c1 | c2 | c3; 30 | -------------------------------------------------------------------------------- /softbrain-config/src/insts/HAdd16x4.h: -------------------------------------------------------------------------------- 1 | uint16_t a0 = (ops[0]&0xFFFF000000000000)>>48; 2 | uint16_t a1 = (ops[0]&0x0000FFFF00000000)>>32; 3 | uint16_t a2 = (ops[0]&0x00000000FFFF0000)>>16; 4 | uint16_t a3 = (ops[0]&0x000000000000FFFF)>>0; 5 | 6 | uint16_t b0 = (ops[1]&0xFFFF000000000000)>>48; 7 | //uint16_t b1 = (ops[1]&0x0000FFFF00000000)>>32; 8 | //uint16_t b2 = (ops[1]&0x00000000FFFF0000)>>16; 9 | //uint16_t b3 = (ops[1]&0x000000000000FFFF)>>0; 10 | 11 | uint64_t c0 = (uint64_t)(a0+a1)<<48; 12 | uint64_t c1 = (uint64_t)(a1+a2)<<32; 13 | uint64_t c2 = (uint64_t)(a2+a3)<<16; 14 | uint64_t c3 = (uint64_t)(a3+b0)<<0; 15 | 16 | return c0 | c1 | c2 | c3; 17 | //return (uint64_t) _mm_adds_pu16((__m64)ops[0], (__m64)ops[1]); 18 | -------------------------------------------------------------------------------- /softbrain-config/src/insts/ICmpEQ.h: -------------------------------------------------------------------------------- 1 | return ops[0] == ops[1]; 2 | -------------------------------------------------------------------------------- /softbrain-config/src/insts/LShf64.h: -------------------------------------------------------------------------------- 1 | if(ops[1]==64) { 2 | return 0; 3 | } 4 | return ops[0] << ops[1]; 5 | -------------------------------------------------------------------------------- /softbrain-config/src/insts/Max16x4.h: -------------------------------------------------------------------------------- 1 | uint16_t a0 = (ops[0]&0x000000000000FFFF)>>0; 2 | uint16_t a1 = (ops[0]&0x00000000FFFF0000)>>16; 3 | uint16_t a2 = (ops[0]&0x0000FFFF00000000)>>32; 4 | uint16_t a3 = (ops[0]&0xFFFF000000000000)>>48; 5 | uint16_t b0 = (ops[1]&0x000000000000FFFF)>>0; 6 | uint16_t b1 = (ops[1]&0x00000000FFFF0000)>>16; 7 | uint16_t b2 = (ops[1]&0x0000FFFF00000000)>>32; 8 | uint16_t b3 = (ops[1]&0xFFFF000000000000)>>48; 9 | uint16_t t0 = a0 >= b0 ? a0 : b0; 10 | uint16_t t1 = a1 >= b1 ? a1 : b1; 11 | uint16_t t2 = a2 >= b2 ? a2 : b2; 12 | uint16_t t3 = a3 >= b3 ? a3 : b3; 13 | uint64_t c0 = (uint64_t)(t0)<<0; 14 | uint64_t c1 = (uint64_t)(t1)<<16; 15 | uint64_t c2 = (uint64_t)(t2)<<32; 16 | uint64_t c3 = (uint64_t)(t3)<<48; 17 | return c0 | c1 | c2 | c3; 18 | -------------------------------------------------------------------------------- /softbrain-config/src/insts/Min16x4.h: -------------------------------------------------------------------------------- 1 | uint16_t a0 = (ops[0]&0x000000000000FFFF)>>0; 2 | uint16_t a1 = (ops[0]&0x00000000FFFF0000)>>16; 3 | uint16_t a2 = (ops[0]&0x0000FFFF00000000)>>32; 4 | uint16_t a3 = (ops[0]&0xFFFF000000000000)>>48; 5 | uint16_t b0 = (ops[1]&0x000000000000FFFF)>>0; 6 | uint16_t b1 = (ops[1]&0x00000000FFFF0000)>>16; 7 | uint16_t b2 = (ops[1]&0x0000FFFF00000000)>>32; 8 | uint16_t b3 = (ops[1]&0xFFFF000000000000)>>48; 9 | uint16_t t0 = a0 <= b0 ? a0 : b0; 10 | uint16_t t1 = a1 <= b1 ? a1 : b1; 11 | uint16_t t2 = a2 <= b2 ? a2 : b2; 12 | uint16_t t3 = a3 <= b3 ? a3 : b3; 13 | uint64_t c0 = (uint64_t)(t0)<<0; 14 | uint64_t c1 = (uint64_t)(t1)<<16; 15 | uint64_t c2 = (uint64_t)(t2)<<32; 16 | uint64_t c3 = (uint64_t)(t3)<<48; 17 | return c0 | c1 | c2 | c3; 18 | -------------------------------------------------------------------------------- /softbrain-config/src/insts/Mul16x4.h: -------------------------------------------------------------------------------- 1 | uint16_t a0 = (ops[0]&0x000000000000FFFF)>>0; 2 | uint16_t a1 = (ops[0]&0x00000000FFFF0000)>>16; 3 | uint16_t a2 = (ops[0]&0x0000FFFF00000000)>>32; 4 | uint16_t a3 = (ops[0]&0xFFFF000000000000)>>48; 5 | uint16_t b0 = (ops[1]&0x000000000000FFFF)>>0; 6 | uint16_t b1 = (ops[1]&0x00000000FFFF0000)>>16; 7 | uint16_t b2 = (ops[1]&0x0000FFFF00000000)>>32; 8 | uint16_t b3 = (ops[1]&0xFFFF000000000000)>>48; 9 | a0*=b0; 10 | a1*=b1; 11 | a2*=b2; 12 | a3*=b3; 13 | uint64_t c0 = (uint64_t)(a0)<<0; 14 | uint64_t c1 = (uint64_t)(a1)<<16; 15 | uint64_t c2 = (uint64_t)(a2)<<32; 16 | uint64_t c3 = (uint64_t)(a3)<<48; 17 | return c0 | c1 | c2 | c3; 18 | //return (uint64_t) _mm_mullo_pi16((__m64)ops[0], (__m64)ops[1]); 19 | -------------------------------------------------------------------------------- /softbrain-config/src/insts/Mul32x2.h: -------------------------------------------------------------------------------- 1 | uint32_t a0 = (ops[0]&0x00000000FFFFFFFF)>>0; 2 | uint32_t a1 = (ops[0]&0xFFFFFFFF00000000)>>32; 3 | uint32_t b0 = (ops[1]&0x00000000FFFFFFFF)>>0; 4 | uint32_t b1 = (ops[1]&0xFFFFFFFF00000000)>>32; 5 | 6 | a0*=b0; 7 | a1*=b1; 8 | 9 | uint64_t c0 = (uint64_t)(a0)<<0; 10 | uint64_t c1 = (uint64_t)(a1)<<32; 11 | return c0 | c1; 12 | 13 | //return (uint64_t) _mm_mullo_pi32((__m64)ops[0], (__m64)ops[1]); -- mullo_pi32 doesnt exisit in mm intrinsics 14 | -------------------------------------------------------------------------------- /softbrain-config/src/insts/Mul64.h: -------------------------------------------------------------------------------- 1 | return ops[0] * ops[1]; 2 | -------------------------------------------------------------------------------- /softbrain-config/src/insts/Or.h: -------------------------------------------------------------------------------- 1 | return ops[0] | ops[1]; 2 | -------------------------------------------------------------------------------- /softbrain-config/src/insts/RShf16x4.h: -------------------------------------------------------------------------------- 1 | uint16_t a0 = (ops[0]&0x00000000FFFFFFFF)>>0; 2 | uint16_t a1 = (ops[0]&0xFFFFFFFF00000000)>>32; 3 | 4 | uint64_t b = ops[1]; 5 | if(ops.size()==1) { 6 | b = 2; 7 | } 8 | uint64_t c0 = (uint64_t)(a0>>b)<<0; 9 | uint64_t c1 = (uint64_t)(a1>>b)<<32; 10 | return c0 | c1; 11 | //return (uint64_t) _mm_adds_pu16((__m64)ops[0], (__m64)ops[1]); 12 | -------------------------------------------------------------------------------- /softbrain-config/src/insts/RShf2_16x4.h: -------------------------------------------------------------------------------- 1 | uint16_t a0 = (ops[0]&0x000000000000FFFF)>>0; 2 | uint16_t a1 = (ops[0]&0x00000000FFFF0000)>>16; 3 | uint16_t a2 = (ops[0]&0x0000FFFF00000000)>>32; 4 | uint16_t a3 = (ops[0]&0xFFFF000000000000)>>48; 5 | 6 | uint64_t b = ops[1]; 7 | if(ops.size()==1) { 8 | b = 2; 9 | } 10 | uint64_t c0 = (uint64_t)(a0>>b)<<0; 11 | uint64_t c1 = (uint64_t)(a1>>b)<<16; 12 | uint64_t c2 = (uint64_t)(a2>>b)<<32; 13 | uint64_t c3 = (uint64_t)(a3>>b)<<48; 14 | return c0 | c1 | c2 | c3; 15 | //return (uint64_t) _mm_adds_pu16((__m64)ops[0], (__m64)ops[1]); 16 | -------------------------------------------------------------------------------- /softbrain-config/src/insts/RShf32x2.h: -------------------------------------------------------------------------------- 1 | uint16_t a0 = (ops[0]&0x000000000000FFFF)>>0; 2 | uint16_t a1 = (ops[0]&0x00000000FFFF0000)>>16; 3 | uint16_t a2 = (ops[0]&0x0000FFFF00000000)>>32; 4 | uint16_t a3 = (ops[0]&0xFFFF000000000000)>>48; 5 | 6 | uint64_t b = ops[1]; 7 | if(ops.size()==1) { 8 | b = 2; 9 | } 10 | uint64_t c0 = (uint64_t)(a0>>b)<<0; 11 | uint64_t c1 = (uint64_t)(a1>>b)<<16; 12 | uint64_t c2 = (uint64_t)(a2>>b)<<32; 13 | uint64_t c3 = (uint64_t)(a3>>b)<<48; 14 | return c0 | c1 | c2 | c3; 15 | //return (uint64_t) _mm_adds_pu16((__m64)ops[0], (__m64)ops[1]); 16 | -------------------------------------------------------------------------------- /softbrain-config/src/insts/RShf4_16x4.h: -------------------------------------------------------------------------------- 1 | uint16_t a0 = (ops[0]&0x000000000000FFFF)>>0; 2 | uint16_t a1 = (ops[0]&0x00000000FFFF0000)>>16; 3 | uint16_t a2 = (ops[0]&0x0000FFFF00000000)>>32; 4 | uint16_t a3 = (ops[0]&0xFFFF000000000000)>>48; 5 | 6 | uint64_t b = ops[1]; 7 | if(ops.size()==1) { 8 | b = 4; 9 | } 10 | uint64_t c0 = (uint64_t)(a0>>b)<<0; 11 | uint64_t c1 = (uint64_t)(a1>>b)<<16; 12 | uint64_t c2 = (uint64_t)(a2>>b)<<32; 13 | uint64_t c3 = (uint64_t)(a3>>b)<<48; 14 | return c0 | c1 | c2 | c3; 15 | //return (uint64_t) _mm_adds_pu16((__m64)ops[0], (__m64)ops[1]); 16 | -------------------------------------------------------------------------------- /softbrain-config/src/insts/RShf64.h: -------------------------------------------------------------------------------- 1 | if(ops[1]==64) { 2 | return 0; 3 | } 4 | return ops[0] >> ops[1]; 5 | -------------------------------------------------------------------------------- /softbrain-config/src/insts/Red16x4.h: -------------------------------------------------------------------------------- 1 | uint16_t r0 = (ops[0]&0x000000000000FFFF)>>0; 2 | uint16_t r1 = (ops[0]&0x00000000FFFF0000)>>16; 3 | uint16_t r2 = (ops[0]&0x0000FFFF00000000)>>32; 4 | uint16_t r3 = (ops[0]&0xFFFF000000000000)>>48; 5 | 6 | if(ops.size() > 1) { //additional op is acc 7 | return (r0+r1+r2+r3+((uint16_t)ops[1])); 8 | } 9 | return (r0+r1+r2+r3); 10 | 11 | -------------------------------------------------------------------------------- /softbrain-config/src/insts/Red32x2.h: -------------------------------------------------------------------------------- 1 | uint32_t r0 = (ops[0]&0x00000000FFFFFFFF)>>0; 2 | uint32_t r1 = (ops[0]&0xFFFFFFFF00000000)>>32; 3 | 4 | if(ops.size() > 1) { //additional op is acc 5 | return (r0+r1+((uint32_t)ops[1])); 6 | } 7 | return (r0+r1); 8 | 9 | -------------------------------------------------------------------------------- /softbrain-config/src/insts/RedMax16x4.h: -------------------------------------------------------------------------------- 1 | uint16_t r0 = (ops[0]&0x000000000000FFFF)>>0; 2 | uint16_t r1 = (ops[0]&0x00000000FFFF0000)>>16; 3 | uint16_t r2 = (ops[0]&0x0000FFFF00000000)>>32; 4 | uint16_t r3 = (ops[0]&0xFFFF000000000000)>>48; 5 | 6 | uint16_t x = r0; 7 | if(r1 > x) {x=r1;} 8 | if(r2 > x) {x=r2;} 9 | if(r3 > x) {x=r3;} 10 | 11 | if(ops.size() > 1) { //additional op is acc 12 | uint16_t b = (uint16_t)ops[1]; 13 | if(b > x) {x=b;} 14 | } 15 | return x; 16 | 17 | -------------------------------------------------------------------------------- /softbrain-config/src/insts/RedMin16x4.h: -------------------------------------------------------------------------------- 1 | uint16_t r0 = (ops[0]&0x000000000000FFFF)>>0; 2 | uint16_t r1 = (ops[0]&0x00000000FFFF0000)>>16; 3 | uint16_t r2 = (ops[0]&0x0000FFFF00000000)>>32; 4 | uint16_t r3 = (ops[0]&0xFFFF000000000000)>>48; 5 | 6 | uint16_t x = r0; 7 | if(r1 < x) {x=r1;} 8 | if(r2 < x) {x=r2;} 9 | if(r3 < x) {x=r3;} 10 | 11 | if(ops.size() > 1) { //additional op is acc 12 | uint16_t b = (uint16_t)ops[1]; 13 | if(b < x) {x=b;} 14 | } 15 | return x; 16 | 17 | -------------------------------------------------------------------------------- /softbrain-config/src/insts/RedSMax16x4.h: -------------------------------------------------------------------------------- 1 | int16_t r0 = (ops[0]&0x000000000000FFFF)>>0; 2 | int16_t r1 = (ops[0]&0x00000000FFFF0000)>>16; 3 | int16_t r2 = (ops[0]&0x0000FFFF00000000)>>32; 4 | int16_t r3 = (ops[0]&0xFFFF000000000000)>>48; 5 | 6 | int16_t x = r0; 7 | if(r1 > x) {x=r1;} 8 | if(r2 > x) {x=r2;} 9 | if(r3 > x) {x=r3;} 10 | 11 | if(ops.size() > 1) { //additional op is acc 12 | int16_t b = (int16_t)ops[1]; 13 | if(b > x) {x=b;} 14 | } 15 | return (uint64_t)x; 16 | 17 | -------------------------------------------------------------------------------- /softbrain-config/src/insts/RedSMin16x4.h: -------------------------------------------------------------------------------- 1 | int16_t r0 = (ops[0]&0x000000000000FFFF)>>0; 2 | int16_t r1 = (ops[0]&0x00000000FFFF0000)>>16; 3 | int16_t r2 = (ops[0]&0x0000FFFF00000000)>>32; 4 | int16_t r3 = (ops[0]&0xFFFF000000000000)>>48; 5 | 6 | int16_t x = r0; 7 | if(r1 < x) {x=r1;} 8 | if(r2 < x) {x=r2;} 9 | if(r3 < x) {x=r3;} 10 | 11 | if(ops.size() > 1) { //additional op is acc 12 | int16_t b = (int16_t)ops[1]; 13 | if(b < x) {x=b;} 14 | } 15 | return (uint64_t)x; 16 | 17 | -------------------------------------------------------------------------------- /softbrain-config/src/insts/SMax16x4.h: -------------------------------------------------------------------------------- 1 | int16_t a0 = (ops[0]&0x000000000000FFFF)>>0; 2 | int16_t a1 = (ops[0]&0x00000000FFFF0000)>>16; 3 | int16_t a2 = (ops[0]&0x0000FFFF00000000)>>32; 4 | int16_t a3 = (ops[0]&0xFFFF000000000000)>>48; 5 | int16_t b0 = (ops[1]&0x000000000000FFFF)>>0; 6 | int16_t b1 = (ops[1]&0x00000000FFFF0000)>>16; 7 | int16_t b2 = (ops[1]&0x0000FFFF00000000)>>32; 8 | int16_t b3 = (ops[1]&0xFFFF000000000000)>>48; 9 | int16_t t0 = a0 >= b0 ? a0 : b0; 10 | int16_t t1 = a1 >= b1 ? a1 : b1; 11 | int16_t t2 = a2 >= b2 ? a2 : b2; 12 | int16_t t3 = a3 >= b3 ? a3 : b3; 13 | uint64_t c0 = (uint64_t)(t0)<<0; 14 | uint64_t c1 = (uint64_t)(t1)<<16; 15 | uint64_t c2 = (uint64_t)(t2)<<32; 16 | uint64_t c3 = (uint64_t)(t3)<<48; 17 | return c0 | c1 | c2 | c3; 18 | -------------------------------------------------------------------------------- /softbrain-config/src/insts/SMin16x4.h: -------------------------------------------------------------------------------- 1 | int16_t a0 = (ops[0]&0x000000000000FFFF)>>0; 2 | int16_t a1 = (ops[0]&0x00000000FFFF0000)>>16; 3 | int16_t a2 = (ops[0]&0x0000FFFF00000000)>>32; 4 | int16_t a3 = (ops[0]&0xFFFF000000000000)>>48; 5 | int16_t b0 = (ops[1]&0x000000000000FFFF)>>0; 6 | int16_t b1 = (ops[1]&0x00000000FFFF0000)>>16; 7 | int16_t b2 = (ops[1]&0x0000FFFF00000000)>>32; 8 | int16_t b3 = (ops[1]&0xFFFF000000000000)>>48; 9 | int16_t t0 = a0 <= b0 ? a0 : b0; 10 | int16_t t1 = a1 <= b1 ? a1 : b1; 11 | int16_t t2 = a2 <= b2 ? a2 : b2; 12 | int16_t t3 = a3 <= b3 ? a3 : b3; 13 | uint64_t c0 = (uint64_t)(t0)<<0; 14 | uint64_t c1 = (uint64_t)(t1)<<16; 15 | uint64_t c2 = (uint64_t)(t2)<<32; 16 | uint64_t c3 = (uint64_t)(t3)<<48; 17 | return c0 | c1 | c2 | c3; 18 | -------------------------------------------------------------------------------- /softbrain-config/src/insts/Select.h: -------------------------------------------------------------------------------- 1 | return ops[2]==0 ? ops[0] : ops[1]; 2 | -------------------------------------------------------------------------------- /softbrain-config/src/insts/Sig16.h: -------------------------------------------------------------------------------- 1 | #define SIG (op*1024/(1024+op)) 2 | //#define SIG op 3 | 4 | uint16_t op = (uint16_t)ops[0]; 5 | 6 | if(ops.size() > 1) { 7 | if(ops[1]) { 8 | return (uint64_t) SIG; 9 | } else { 10 | return ops[0]; 11 | } 12 | } 13 | return (uint64_t) SIG; 14 | -------------------------------------------------------------------------------- /softbrain-config/src/insts/Sub16x4.h: -------------------------------------------------------------------------------- 1 | uint16_t a0 = (ops[0]&0x000000000000FFFF)>>0; 2 | uint16_t a1 = (ops[0]&0x00000000FFFF0000)>>16; 3 | uint16_t a2 = (ops[0]&0x0000FFFF00000000)>>32; 4 | uint16_t a3 = (ops[0]&0xFFFF000000000000)>>48; 5 | uint16_t b0 = (ops[1]&0x000000000000FFFF)>>0; 6 | uint16_t b1 = (ops[1]&0x00000000FFFF0000)>>16; 7 | uint16_t b2 = (ops[1]&0x0000FFFF00000000)>>32; 8 | uint16_t b3 = (ops[1]&0xFFFF000000000000)>>48; 9 | a0-=b0; 10 | a1-=b1; 11 | a2-=b2; 12 | a3-=b3; 13 | uint64_t c0 = (uint64_t)(a0)<<0; 14 | uint64_t c1 = (uint64_t)(a1)<<16; 15 | uint64_t c2 = (uint64_t)(a2)<<32; 16 | uint64_t c3 = (uint64_t)(a3)<<48; 17 | return c0 | c1 | c2 | c3; 18 | //return (uint64_t) _mm_adds_pu16((__m64)ops[0], (__m64)ops[1]); 19 | -------------------------------------------------------------------------------- /softbrain-config/src/insts/Sub64.h: -------------------------------------------------------------------------------- 1 | return ops[0] - ops[1]; 2 | -------------------------------------------------------------------------------- /softbrain-config/src/insts/TAdd16x4.h: -------------------------------------------------------------------------------- 1 | uint32_t a0 = (ops[0]&0x000000000000FFFF)>>0; 2 | uint32_t a1 = (ops[0]&0x00000000FFFF0000)>>16; 3 | uint32_t a2 = (ops[0]&0x0000FFFF00000000)>>32; 4 | uint32_t a3 = (ops[0]&0xFFFF000000000000)>>48; 5 | uint32_t b0 = (ops[1]&0x000000000000FFFF)>>0; 6 | uint32_t b1 = (ops[1]&0x00000000FFFF0000)>>16; 7 | uint32_t b2 = (ops[1]&0x0000FFFF00000000)>>32; 8 | uint32_t b3 = (ops[1]&0xFFFF000000000000)>>48; 9 | a0+=b0; 10 | a1+=b1; 11 | a2+=b2; 12 | a3+=b3; 13 | uint64_t c0 = (uint64_t)(a0&0x0000FFFF)<<0; 14 | uint64_t c1 = (uint64_t)(a1&0x0000FFFF)<<16; 15 | uint64_t c2 = (uint64_t)(a2&0x0000FFFF)<<32; 16 | uint64_t c3 = (uint64_t)(a3&0x0000FFFF)<<48; 17 | return c0 | c1 | c2 | c3; 18 | //return (uint64_t) _mm_adds_pu16((__m64)ops[0], (__m64)ops[1]); 19 | -------------------------------------------------------------------------------- /softbrain-config/src/insts/Xor.h: -------------------------------------------------------------------------------- 1 | return ops[0] ^ ops[1]; 2 | -------------------------------------------------------------------------------- /softbrain-config/src/model.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | #include 8 | 9 | #include "model.h" 10 | #include "model_parsing.h" 11 | 12 | using namespace std; 13 | using namespace SB_CONFIG; 14 | 15 | void SbModel::printGamsKinds(ostream& os) { 16 | os << "set K \"Type of Node\" /Input,Output"; 17 | 18 | for(int i = 2; i < SB_NUM_TYPES; ++i) { 19 | os << "," << name_of_inst((sb_inst_t)i); 20 | } 21 | os << "/"; 22 | } 23 | 24 | SbModel::SbModel(SubModel* subModel, bool multi_config) { 25 | 26 | if (subModel) { 27 | _subModel = subModel; 28 | } else { 29 | _subModel = new SubModel(5, 5, SubModel::PortType::everysw, multi_config); 30 | } 31 | } 32 | 33 | SbModel::SbModel(bool multi_config) { 34 | _subModel = new SubModel(5, 5, SubModel::PortType::everysw, multi_config); 35 | } 36 | 37 | void SbModel::parse_exec(std::istream& istream) { 38 | string param,value; 39 | while(istream.good()) { 40 | if(istream.peek()=='[') break; //break out if done 41 | 42 | ModelParsing::ReadPair(istream,param,value); 43 | 44 | ModelParsing::trim(param); 45 | ModelParsing::trim(value); 46 | 47 | if(param.length()==0) { 48 | continue; 49 | } 50 | 51 | if(param == string("CMD_DISPATCH")) { 52 | if(value == string("INORDER")) { 53 | set_dispatch_inorder(true); 54 | } else if (value == string("OOO")) { 55 | set_dispatch_inorder(false); 56 | } else { 57 | assert(0 && "Dispatch was not INORDER or OOO"); 58 | } 59 | } else if(param == string("CMD_DISPATCH_WIDTH")) { 60 | istringstream(value) >> _dispatch_width; 61 | } 62 | 63 | } 64 | } 65 | 66 | //File constructor 67 | SbModel::SbModel(const char* filename, bool multi_config) 68 | { 69 | ifstream ifs(filename, ios::in); 70 | string param,value; 71 | 72 | if(ifs.fail()) 73 | { 74 | cerr << "Could Not Open: " << filename << "\n"; 75 | return; 76 | } 77 | 78 | char line[512]; 79 | 80 | while(ifs.good()) 81 | { 82 | ifs.getline(line,512); 83 | //string line; 84 | 85 | if(ModelParsing::StartsWith(line,"[exec-model]")) { 86 | parse_exec(ifs); 87 | } 88 | 89 | if(ModelParsing::StartsWith(line,"[fu-model]")){ 90 | _fuModel= new FuModel(ifs); 91 | } 92 | 93 | if(ModelParsing::StartsWith(line,"[sub-model]")){ 94 | if(_fuModel==NULL) { 95 | cerr<< "No Fu Model Specified\n"; 96 | exit(1); 97 | } 98 | _subModel=new SubModel(ifs, _fuModel, multi_config); 99 | } 100 | 101 | if(ModelParsing::StartsWith(line,"[io-model]")) { 102 | if(_subModel==NULL) { 103 | cerr<< "No Sub Model Specified\n"; 104 | exit(1); 105 | } 106 | 107 | _subModel->parse_io(ifs); 108 | } 109 | } 110 | } 111 | 112 | extern "C" void libsbconfig_is_present() {} 113 | 114 | -------------------------------------------------------------------------------- /softbrain-config/src/model.h: -------------------------------------------------------------------------------- 1 | #ifndef __SB_MODEL_H__ 2 | #define __SB_MODEL_H__ 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | //#include "inst_model.h" 11 | #include "fu_model.h" 12 | #include "sub_model.h" 13 | 14 | namespace SB_CONFIG { 15 | 16 | class SbModel { 17 | public: 18 | 19 | SbModel(bool multi=false); 20 | SbModel(const char* filename, bool multi=false); 21 | SbModel(SubModel* sub, bool multi=false); 22 | 23 | FuModel* fuModel() {return (_fuModel);} 24 | SubModel* subModel() {return (_subModel);} 25 | 26 | void printGamsKinds(std::ostream& os); 27 | 28 | void set_dispatch_inorder(bool d) { _dispatch_inorder = d; } 29 | bool dispatch_inorder() { return _dispatch_inorder; } 30 | 31 | void set_dispatch_width(int w) { _dispatch_width = w;} 32 | int dispatch_width() { return _dispatch_width; } 33 | 34 | private: 35 | //InstModel *instModel; 36 | FuModel *_fuModel; 37 | SubModel *_subModel; 38 | 39 | bool _dispatch_inorder = false; 40 | int _dispatch_width = 2; 41 | void parse_exec(std::istream& istream); 42 | }; 43 | 44 | } 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | #endif 54 | -------------------------------------------------------------------------------- /softbrain-config/src/model_parsing.cpp: -------------------------------------------------------------------------------- 1 | #include "model_parsing.h" 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | using namespace SB_CONFIG; 9 | using namespace std; 10 | 11 | bool ModelParsing::StartsWith(const std::string& text,const std::string& token) { 12 | if(text.length() < token.length()) return false; 13 | return (text.compare(0, token.length(), token) == 0); 14 | } 15 | 16 | bool ModelParsing::StartsWith(const std::string& text,const char* token) { 17 | if(text.length() < strlen(token)) return false; 18 | return (text.compare(0, strlen(token), token) == 0); 19 | } 20 | 21 | void ModelParsing::trim_comments(std::string& s) { 22 | s = s.substr(0, s.find("#")); 23 | } 24 | 25 | 26 | //This function reads line from an ifstream, and gets a param and value, 27 | //seperated by a ":" 28 | bool ModelParsing::ReadPair(istream& is, string& param, string& value) 29 | { 30 | //char line[512]; 31 | //is.getline(line,512); 32 | 33 | string line; 34 | getline(is, line); 35 | 36 | if(is.fail()) { 37 | param=""; 38 | value=""; 39 | return false; 40 | } 41 | 42 | trim_comments(line); 43 | 44 | std::stringstream ss(line); 45 | getline(ss, param, ':'); 46 | getline(ss, value); 47 | return true; 48 | } 49 | 50 | 51 | // trim from start 52 | void ModelParsing::ltrim(std::string &s) { 53 | s.erase(s.begin(), std::find_if(s.begin(), s.end(), std::not1(std::ptr_fun(std::isspace)))); 54 | } 55 | 56 | // trim from end 57 | void ModelParsing::rtrim(std::string &s) { 58 | s.erase(std::find_if(s.rbegin(), s.rend(), std::not1(std::ptr_fun(std::isspace))).base(), s.end()); 59 | } 60 | 61 | // trim from both ends 62 | void ModelParsing::trim(std::string &s) { 63 | rtrim(s); 64 | ltrim(s); 65 | } 66 | 67 | bool ModelParsing::stricmp(const std::string& str1, const std::string& str2) { 68 | if (str1.size() != str2.size()) { 69 | return false; 70 | } 71 | for (std::string::const_iterator c1 = str1.begin(), c2 = str2.begin(); c1 != str1.end(); ++c1, ++c2) { 72 | if (tolower(*c1) != tolower(*c2)) { 73 | return false; 74 | } 75 | } 76 | return true; 77 | } 78 | 79 | void ModelParsing::split(const std::string &s, const char delim, std::vector &elems) { 80 | std::stringstream ss(s); 81 | std::string item; 82 | while(std::getline(ss, item, delim)) { 83 | elems.push_back(item); 84 | } 85 | } 86 | 87 | -------------------------------------------------------------------------------- /softbrain-config/src/model_parsing.h: -------------------------------------------------------------------------------- 1 | #ifndef __SB_MODEL_PARSING_H__ 2 | #define __SB_MODEL_PARSING_H__ 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | 9 | namespace SB_CONFIG { 10 | class ModelParsing { 11 | public: 12 | static bool StartsWith(const std::string& text,const std::string& token); 13 | static bool StartsWith(const std::string& text,const char* ); 14 | static bool ReadPair(std::istream& ifs, std::string& param, std::string& value); 15 | static void split(const std::string &s, const char delim, std::vector &elems); 16 | 17 | static void ltrim(std::string &s); 18 | static void rtrim(std::string &s); 19 | static void trim(std::string &s); 20 | static bool stricmp(const std::string& str1, const std::string& str2); 21 | static void trim_comments(std::string &s); 22 | }; 23 | 24 | 25 | } 26 | 27 | 28 | 29 | 30 | #endif 31 | -------------------------------------------------------------------------------- /softbrain-config/src/sub_model.h: -------------------------------------------------------------------------------- 1 | #ifndef __SB_SUB_MODEL_H__ 2 | #define __SB_SUB_MODEL_H__ 3 | 4 | #include "fu_model.h" 5 | #include "direction.h" 6 | 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | 15 | namespace SB_CONFIG { 16 | 17 | class sbnode; 18 | class sbinput; 19 | class sboutput; 20 | 21 | class sbio_interface { 22 | public: 23 | //interf_vec_port_num -> [cgra port_num -> vector_offset_elements] 24 | std::map > > > in_vports; 25 | std::map > > > out_vports; 26 | 27 | //intef_port_num -> possible_elements 28 | std::map > in_ports; 29 | std::map > out_ports; 30 | 31 | 32 | void sort_in_vports(std::vector>& portID2size) { 33 | sort(portID2size, in_vports); 34 | } 35 | 36 | void sort_out_vports(std::vector>& portID2size) { 37 | sort(portID2size, out_vports); 38 | } 39 | 40 | std::vector > >& getDesc_I(int id) { 41 | assert(in_vports.count(id) != 0); 42 | return in_vports[id]; 43 | } 44 | std::vector > >& getDesc_O(int id) { 45 | assert(out_vports.count(id) != 0); 46 | return out_vports[id]; 47 | } 48 | private: 49 | void sort(std::vector>& portID2size, std::map > > >& vports) { 50 | int index = 0; 51 | portID2size.resize(vports.size()); 52 | for(auto i : vports) { 53 | int id = i.first; 54 | int size = i.second.size(); 55 | portID2size[index++] = std::make_pair(id,size); 56 | } 57 | std::sort(portID2size.begin(), portID2size.end(), [](std::pair& left, std::pair& right){ 58 | return left.second < right.second; 59 | }); 60 | } 61 | 62 | 63 | }; 64 | 65 | class sblink { 66 | public: 67 | //enum linktype { input, output, inter, cross } 68 | 69 | sblink() : _ID(LINK_ID++) {} 70 | 71 | sbnode* orig() const {return _orig;} 72 | sbnode* dest() const {return _dest;} 73 | SbDIR::DIR dir() const {return _dir;} 74 | void setdir(SbDIR::DIR dir) { _dir=dir;} 75 | 76 | //Constructor 77 | sblink(sbnode* orig, sbnode* dest) { 78 | _orig=orig; 79 | _dest=dest; 80 | } 81 | 82 | std::string name() const; 83 | std::string gams_name(int config) const; 84 | std::string gams_name(int , int) const; 85 | 86 | protected: 87 | int _ID; 88 | sbnode* _orig; 89 | sbnode* _dest; 90 | SbDIR::DIR _dir; 91 | 92 | private: 93 | friend class SubModel; 94 | static int LINK_ID; 95 | }; 96 | 97 | 98 | class sbnode { 99 | public: 100 | sbnode() : _ID(NODE_ID++) {} 101 | 102 | sblink* add_link(sbnode* node) { 103 | sblink* link = new sblink(this, node); 104 | _out_links.push_back(link); 105 | node->add_back_link(link); 106 | return link; 107 | } 108 | 109 | void add_back_link(sblink* link) { 110 | _in_links.push_back(link); 111 | } 112 | 113 | virtual std::string name() const { 114 | return std::string("loadslice"); 115 | } 116 | virtual std::string gams_name(int config=0) const { 117 | return std::string("loadslice"); 118 | } 119 | 120 | typedef std::vector::const_iterator const_iterator; 121 | const_iterator ibegin() const {return _in_links.begin();} 122 | const_iterator iend() const {return _in_links.end();} 123 | const_iterator obegin() const {return _out_links.begin();} 124 | const_iterator oend() const {return _out_links.end();} 125 | 126 | sblink* getFirstOutLink() { 127 | if(_out_links.size()>0) { 128 | return _out_links[0]; 129 | } else { 130 | return NULL; 131 | } 132 | } 133 | 134 | sblink* getFirstInLink() { 135 | if(_in_links.size()>0) { 136 | return _in_links[0]; 137 | } else { 138 | return NULL; 139 | } 140 | } 141 | 142 | sblink* getInLink(SbDIR::DIR dir) { 143 | for(const_iterator I=ibegin(), E=iend();I!=E; ++I) { 144 | sblink* dlink= *I; 145 | if(dlink->dir() == dir) return dlink; 146 | } 147 | return NULL; 148 | } 149 | 150 | sblink* getOutLink(SbDIR::DIR dir) { 151 | for(const_iterator I=obegin(), E=oend();I!=E; ++I) { 152 | sblink* dlink= *I; 153 | if(dlink->dir() == dir) return dlink; 154 | } 155 | return NULL; 156 | } 157 | 158 | int id() {return _ID;} 159 | 160 | protected: 161 | int _ID; 162 | std::vector _in_links; 163 | std::vector _out_links; 164 | 165 | private: 166 | friend class SubModel; 167 | static int NODE_ID; 168 | }; 169 | 170 | class sbswitch : public sbnode { 171 | public: 172 | 173 | sbswitch() : sbnode() {} 174 | 175 | void setXY(int x,int y) {_x=x;_y=y;} 176 | int x() const {return _x;} 177 | int y() const {return _y;} 178 | 179 | std::string name() const { 180 | std::stringstream ss; 181 | ss << "SW" << "_" << _x << "_" << _y; 182 | return ss.str(); 183 | } 184 | 185 | std::string gams_name(int config) const { 186 | std::stringstream ss; 187 | if(config!=0) { 188 | ss << "Sw" << _x << _y << "c" << config; 189 | } else { 190 | ss << "Sw" << _x << _y; 191 | } 192 | return ss.str(); 193 | } 194 | 195 | sbinput* getInput(int i); 196 | 197 | sboutput* getOutput(int i); 198 | 199 | protected: 200 | int _x, _y; 201 | }; 202 | 203 | class sbfu : public sbnode { 204 | public: 205 | 206 | sbfu() : sbnode() {} 207 | 208 | void setFUDef(func_unit_def* fu_def) {_fu_def = fu_def;} 209 | void setXY(int x, int y) {_x=x;_y=y;} 210 | 211 | int x() const {return _x;} 212 | int y() const {return _y;} 213 | 214 | std::string name() const { 215 | std::stringstream ss; 216 | ss << "FU" << "_" << _x << "_" << _y; 217 | return ss.str(); 218 | } 219 | 220 | std::string gams_name(int config) const { 221 | std::stringstream ss; 222 | if(config!=0) { 223 | ss << "Fu" << _x << _y << "c" << config; 224 | } else { 225 | ss << "Fu" << _x << _y; 226 | } 227 | return ss.str(); 228 | } 229 | 230 | func_unit_def* fu_def() {return _fu_def;} 231 | 232 | protected: 233 | int _x, _y; 234 | func_unit_def* _fu_def; 235 | 236 | private: 237 | friend class SubModel; 238 | }; 239 | 240 | class sbinput : public sbnode { 241 | public: 242 | 243 | sbinput() : sbnode() {} 244 | 245 | void setPort(int port) {_port=port;} 246 | int port() const {return _port;} 247 | 248 | std::string name() const { 249 | std::stringstream ss; 250 | ss << "IP" << "_" << _port; 251 | return ss.str(); 252 | } 253 | std::string gams_name(int config) const { 254 | std::stringstream ss; 255 | if(config!=0) { 256 | ss << "I" << _port << "c" << config; 257 | } else { 258 | ss << "I" << _port; 259 | } 260 | return ss.str(); 261 | } 262 | 263 | protected: 264 | int _port; 265 | }; 266 | 267 | class sboutput : public sbnode { 268 | public: 269 | sboutput() : sbnode() {} 270 | 271 | void setPort(int port) {_port=port;} 272 | int port() const {return _port;} 273 | 274 | std::string name() const { 275 | std::stringstream ss; 276 | ss << "OP" << "_" << _port; 277 | return ss.str(); 278 | } 279 | 280 | std::string gams_name(int config) const { 281 | std::stringstream ss; 282 | if(config!=0) { 283 | ss << "O" << _port << "i" << config; 284 | } else { 285 | ss << "O" << _port; 286 | } 287 | return ss.str(); 288 | } 289 | 290 | protected: 291 | int _port; 292 | }; 293 | 294 | 295 | class SubModel { 296 | public: 297 | 298 | //Port type of the substrate nodes 299 | //opensp -- dyser opensplyser N + N -1 ips 300 | //three ins -- Softbrain 3 x N 301 | //everywitch -- all switches has ops and ips 302 | enum class PortType {opensp, everysw, threein}; 303 | 304 | typedef std::vector::const_iterator const_input_iterator; 305 | typedef std::vector::const_iterator const_output_iterator; 306 | 307 | SubModel(std::istream& istream, FuModel*, bool multi_config=true); 308 | SubModel(int x, int y, PortType pt=PortType::opensp, int ips=2, int ops=2, bool multi_config=true); 309 | 310 | void PrintGraphviz(std::ostream& ofs); 311 | void PrintGamsModel(std::ostream& ofs, 312 | std::unordered_map >&, 313 | std::unordered_map >&, 314 | std::unordered_map >&, 315 | std::unordered_map>&, /*isInput, port*/ 316 | int n_configs=1); 317 | 318 | int sizex() {return _sizex;} 319 | int sizey() {return _sizey;} 320 | 321 | sbfu* fuAt(int x, int y) {return &(_fus[x][y]);} 322 | sbswitch* switchAt(int x, int y) {return &(_switches[x][y]);} 323 | 324 | sbinput* get_input(int i) {return &(_inputs[i]); } 325 | sboutput* get_output(int i) {return &(_outputs[i]);} 326 | 327 | const_input_iterator input_begin() { return _inputs.begin();} 328 | const_input_iterator input_end() { return _inputs.end();} 329 | 330 | const_output_iterator output_begin() { return _outputs.begin();} 331 | const_output_iterator output_end() { return _outputs.end();} 332 | 333 | //const_output_iterator output_begin() { return _outputs.begin();} 334 | //const_output_iterator output_end() { return _outputs.end();} 335 | 336 | std::vector >& fus() {return _fus;} 337 | std::vector >& switches() {return _switches;} 338 | 339 | bool multi_config() { return _multi_config;} 340 | 341 | sbswitch* cross_switch() {return &_cross_switch;} 342 | sbnode* load_slice() {return &_load_slice;} 343 | 344 | int num_fu() {return NUM_FU;} 345 | 346 | void parse_io(std::istream& istream); 347 | sbio_interface& io_interf() {return _sbio_interf;} 348 | 349 | private: 350 | 351 | //void CreateFUArray(int,int); 352 | 353 | //void SetTotalFUByRatio(); 354 | //void RandDistributeFUs(); 355 | void build_substrate(int x, int y); 356 | void connect_substrate(int x, int y, PortType pt, int ips, int ops,bool multi_config); 357 | 358 | int _sizex, _sizey; //size of SB cgra 359 | bool _multi_config; 360 | std::vector _inputs; 361 | std::vector _outputs; 362 | std::vector > _fus; 363 | std::vector > _switches; 364 | 365 | sbswitch _cross_switch; 366 | sbnode _load_slice; 367 | sbio_interface _sbio_interf; 368 | 369 | int NUM_FU; 370 | }; 371 | 372 | } 373 | 374 | #endif 375 | -------------------------------------------------------------------------------- /softbrain-emu/Makefile: -------------------------------------------------------------------------------- 1 | #CPP=riscv32-unknown-elf-g++ 2 | ifndef SS_TOOLS 3 | $(error SS_TOOLS is undefined) 4 | endif 5 | 6 | 7 | prefix= $(SS_TOOLS) 8 | MKDIR_P = mkdir -p 9 | CPP=g++ 10 | 11 | CFLAGS= -c --std=c++11 -fPIC -g -gdwarf-3 #-DSB_DEBUG_MSG 12 | DFG_FLAGS= --std=c++11 -O3 13 | LIBFLAGS= -shared --std=c++11 14 | 15 | SRCDIR = src 16 | OBJDIR = obj 17 | BINDIR = lib 18 | INCLUDEDIR = include 19 | TARGET = libsoftbrain-emu 20 | 21 | SOURCES := $(wildcard $(SRCDIR)/*.C) 22 | INCLUDES := $(wildcard $(SRCDIR)/*.h) 23 | OBJECTS := $(SOURCES:$(SRCDIR)/%.C=$(OBJDIR)/%.o) 24 | 25 | all: mkdirs install install-lib 26 | create-insts: $(BINDIR)/compile-insts 27 | 28 | install-lib: $(BINDIR)/libsoftbrain-emu $(BINDIR)/libsoftbrain-emu-perf $(BINDIR)/libsoftbrain-emu-dbg 29 | 30 | mkdirs: 31 | ${MKDIR_P} ${OBJDIR} 32 | ${MKDIR_P} ${BINDIR} 33 | ${MKDIR_P} ${INCLUDEDIR} 34 | 35 | install: mkdirs install-lib 36 | ${MKDIR_P} ${prefix}/lib 37 | cp lib/* ${prefix}/lib 38 | ${MKDIR_P} ${prefix}/include/softbrain-lib 39 | cp include/* ${prefix}/include/softbrain-lib/ 40 | 41 | $(OBJDIR)/softbrain.o: $(SOURCES) $(INCLUDES) 42 | @echo "Building default library" 43 | $(CPP) $(CFLAGS) $(SOURCES) -o $@ 44 | 45 | $(OBJDIR)/softbrain-dbg.o: $(SOURCES) $(INCLUDES) 46 | @echo "Building debug feedback library." 47 | $(CPP) $(CFLAGS) $(SOURCES) -DSB_DEBUG_MSG -o $@ 48 | 49 | $(OBJDIR)/softbrain-perf.o: $(SOURCES) $(INCLUDES) 50 | @echo "Building performance feedback library." 51 | $(CPP) $(CFLAGS) $(SOURCES) -DSB_PERF_MSG -o $@ 52 | 53 | $(BINDIR)/libsoftbrain-emu: $(OBJDIR)/softbrain.o 54 | $(CPP) $(LIBFLAGS) -Wl,-soname,libsoftbrain-emu.so -o $@.so $^ 55 | cp -f $(SRCDIR)/*.h $(INCLUDEDIR)/ 56 | @echo "Build complete." 57 | 58 | $(BINDIR)/libsoftbrain-emu-perf: $(OBJDIR)/softbrain-perf.o 59 | $(CPP) $(LIBFLAGS) -Wl,-soname,libsoftbrain-emu-perf.so -o $@.so $^ 60 | cp -f $(SRCDIR)/*.h $(INCLUDEDIR)/ 61 | @echo "Build complete." 62 | 63 | $(BINDIR)/libsoftbrain-emu-dbg: $(OBJDIR)/softbrain-dbg.o 64 | $(CPP) $(LIBFLAGS) -Wl,-soname,libsoftbrain-emu-dbg.so -o $@.so $^ 65 | cp -f $(SRCDIR)/*.h $(INCLUDEDIR)/ 66 | @echo "Build complete." 67 | 68 | $(BINDIR)/compile-insts: $(SRCDIR)/create_insts.c 69 | $(CPP) $(DFG_FLAGS) $^ -o $@ 70 | @echo "Built compile insts." 71 | 72 | clean: 73 | @rm -rf $(OBJECTS) $(OBJDIR)/*.o $(BINDIR)/*.so $(BINDIR)/compile-insts 74 | @echo "Clean complete." 75 | -------------------------------------------------------------------------------- /softbrain-emu/src/.gitignore: -------------------------------------------------------------------------------- 1 | #ignore temp files 2 | *~ 3 | -------------------------------------------------------------------------------- /softbrain-emu/src/create_insts.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | std::vector open(std::string path = ".") { 8 | 9 | DIR* dir; 10 | dirent* pdir; 11 | std::vector files; 12 | 13 | dir = opendir(path.c_str()); 14 | while(pdir = readdir(dir)) { 15 | files.push_back(pdir->d_name); 16 | } 17 | 18 | return files; 19 | } 20 | 21 | static void usage(const char * program_name) { 22 | printf("Usage: %s [CONFIG_PATH] [INCLUDE_PATH]\n", program_name); 23 | 24 | fputs("\n\ 25 | CONFIG_PATH -- Path to instructions in Softbrain-Config \n\ 26 | INCLUDE_PATH -- Path where the sb_c_insts.h file needs to be generated \ 27 | \n", stdout); 28 | } 29 | 30 | 31 | int main(int argc, char* argv[]) { 32 | 33 | if (argc < 3){ 34 | usage(argv[0]); 35 | exit(0); 36 | } 37 | 38 | 39 | std::vector f; 40 | std::string path = "."; 41 | std::string includePath = "."; 42 | std::string exportName = "sb_c_insts.h"; 43 | if(argc > 2) { 44 | path = argv[1]; 45 | includePath = argv[2]; 46 | } 47 | 48 | f = open(path); 49 | path = path.append("/"); 50 | includePath = includePath.append("/"); 51 | std::string rawPath = path; 52 | std::ofstream instsHeader(includePath.append(exportName)); 53 | path = rawPath; 54 | std::string header = ".h"; 55 | instsHeader << "#ifndef _SB_EMU_INSTS" << std::endl; 56 | instsHeader << "#define _SB_EMU_INSTS" << std::endl; 57 | instsHeader << "#include " << std::endl; 58 | instsHeader << "#include " << std::endl; 59 | instsHeader << "#include " << std::endl; 60 | instsHeader << "#include \"sb_init.h\"" << std::endl; 61 | /* instsHeader << "float as_float(std::uint32_t ui);" << std::endl; */ 62 | /* instsHeader << "uint32_t as_uint32(float f);" << std::endl; */ 63 | /* instsHeader << "double as_double(std::uint64_t ui);" << std::endl; */ 64 | /* instsHeader << "uint64_t as_uint64(double f);" << std::endl << std::endl; */ 65 | /* instsHeader << "float as_float(uint32_t ui) {" << std::endl */ 66 | /* << " float f;" << std::endl */ 67 | /* << " std::memcpy(&f, &ui, sizeof(float));" << std::endl */ 68 | /* << " return f;" << std::endl */ 69 | /* << "}" << std::endl << std::endl; */ 70 | 71 | 72 | /* instsHeader << "uint32_t as_uint32(float f) {" << std::endl */ 73 | /* << " uint32_t ui;" << std::endl */ 74 | /* << " std::memcpy(&ui, &f, sizeof(uint32_t));" << std::endl */ 75 | /* << " return ui;" << std::endl */ 76 | /* << "}" << std::endl << std::endl; */ 77 | 78 | /* instsHeader << "double as_double(uint64_t ui) {" << std::endl */ 79 | /* << " double f;" << std::endl */ 80 | /* << " std::memcpy(&f, &ui, sizeof(double));" << std::endl */ 81 | /* << " return f;" << std::endl */ 82 | /* << "}" << std::endl << std::endl; */ 83 | 84 | /* instsHeader << "uint64_t as_uint64(double f) {" << std::endl */ 85 | /* << " uint64_t ui;" << std::endl */ 86 | /* << " std::memcpy(&ui, &f, sizeof(uint64_t));" << std::endl */ 87 | /* << " return ui;" << std::endl */ 88 | /* << "}" << std::endl << std::endl; */ 89 | 90 | for(auto iter = f.begin(); iter != f.end(); iter++) { 91 | if((*iter).length() > header.length()) { 92 | if((*iter).compare((*iter).length() - header.length(), header.length(), header) == 0) { 93 | //A valid file 94 | //Get header name as name of instruction 95 | instsHeader << "inline uint64_t "; 96 | instsHeader << (*iter).substr(0, (*iter).find_last_of(".")) << ("(std::array ops) {") << std::endl; 97 | //Done the header for the file. Now open and iterate through. 98 | std::ifstream newFile(path.append((*iter))); 99 | path = rawPath; 100 | std::string newLine; 101 | if(newFile.is_open()) { 102 | while(std::getline(newFile,newLine)) { 103 | instsHeader << "\t" << newLine << std::endl; 104 | } 105 | } else { 106 | std::cout << "Failed to open " << path.append((*iter)) << std::endl; 107 | path = rawPath; 108 | } 109 | newFile.close(); 110 | //Done iterating. Exit definition 111 | instsHeader << "}" << std::endl << std::endl; 112 | } 113 | } 114 | } 115 | instsHeader << "#endif" << std::endl; 116 | instsHeader.close(); 117 | } 118 | -------------------------------------------------------------------------------- /softbrain-emu/src/sb.h: -------------------------------------------------------------------------------- 1 | #ifndef SB_H 2 | #define SB_H 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | using namespace std; 12 | 13 | #if defined(SB_DEBUG_MSG) 14 | #define DEBUG_PRINTF(message, arg) \ 15 | printf(message, arg); 16 | #else 17 | #define DEBUG_PRINTF(message, arg) ; 18 | #endif 19 | 20 | #define SCRATCHPAD_SIZE 8192 21 | //Will need to read in the config file to set proper values for the 22 | //CGRA itself in terms of types, but will write that later 23 | struct sb_config { 24 | void (*dfg_func)(uint64_t**, uint64_t**); 25 | int num_inputs; 26 | int* input_widths; 27 | int num_outputs; 28 | int* output_widths; 29 | int work; 30 | int span; 31 | }; 32 | 33 | enum class InputMode {DATA, RECURRENCE}; 34 | enum class OutputMode {PTR_U, PTR_I, SHF16_U, SHF16_I, RECURRENCE, GARBAGE, SCRATCH}; 35 | enum class OutputType {ULL, ILL, UL, IL, U, I, UC, IC}; 36 | class SoftBrain { 37 | public: 38 | SoftBrain(); 39 | SoftBrain(sb_config mem_addr, long size); 40 | ~SoftBrain(); 41 | void dma_read(void* mem_addr, long stride, long access_size, long num_strides, int port); 42 | void dma_write(int port, long stride, long access_size, long num_strides, void* mem_addr); 43 | void dma_write_shf16(int port, long stride, long access_size, long num_strides, void* mem_addr); 44 | void dma_scratch_load(void* mem_addr, long stride, long access_size, long num_strides, int scratch_addr); 45 | void scr_port_stream(int scr_addr, long stride, long access_size, long num_strides, int port); 46 | void scratch_read(int scr_addr, long num_bytes, int port); 47 | void scratch_write(int port, long num_bytes, int scr_addr); 48 | template 49 | void sb_const(int port, T val, int num); 50 | void wait_all(); 51 | void recurrence(uint64_t output_port, int input_port, int num_strides); 52 | void garbage(int port, int num); 53 | private: 54 | void (*dfg_func)(uint64_t**, uint64_t**); 55 | void verify_lengths(); 56 | int execute_dfg(); 57 | void process_recurrence(uint64_t** outputs); 58 | 59 | uint64_t **inputs; 60 | uint64_t **outputs; 61 | uint64_t *input_temp; 62 | uint64_t *output_temp; 63 | 64 | int num_inputs; 65 | int num_outputs; 66 | int iterations; 67 | int executions; 68 | int aggregate_iterations; 69 | int aggregate_executions; 70 | int pipeline_fill; 71 | bool recurrence_check; 72 | int work; 73 | int span; 74 | long size; 75 | sb_config saved_config; 76 | //Unlike reading from memory, scratchpad MUST be able to be accessed at byte level 77 | uint8_t* scratchpad; 78 | int scratchpad_tail; 79 | //streams 80 | struct input_port_instance { 81 | InputMode mode; //Mode = 0 means second value is the value 82 | uint64_t* data; 83 | }; 84 | 85 | struct input_stream { 86 | int width; 87 | deque fifo; 88 | input_stream() { 89 | } 90 | }; 91 | 92 | struct output_port_instance { 93 | OutputMode mode; //Mode = 0 means second value is the value 94 | OutputType typing; //Which type to use for this output iteration in mem 95 | void** data; 96 | }; 97 | 98 | struct output_stream { 99 | int width; 100 | deque fifo; 101 | output_stream() { 102 | } 103 | }; 104 | 105 | input_stream* input_streams; 106 | output_stream* output_streams; 107 | }; 108 | 109 | template 110 | void SoftBrain::sb_const(int port, T val, int num) { 111 | assert(port < num_inputs); 112 | assert((num % input_streams[port].width) == 0); 113 | //Need to copy the contents of val into an iteration 114 | uint64_t* ullval = (uint64_t*) malloc(sizeof(uint64_t)); 115 | *ullval = 0; 116 | std::memcpy(ullval, &val, sizeof(T)); 117 | for(int str = 0; str < (num/input_streams[port].width); str++) { 118 | input_port_instance next_instance; 119 | next_instance.mode = InputMode::DATA; 120 | next_instance.data = (uint64_t*) malloc(sizeof(uint64_t)*input_streams[port].width); 121 | for(int j = 0; j < input_streams[port].width; j++) { 122 | next_instance.data[j] = *ullval; 123 | } 124 | input_streams[port].fifo.push_back(next_instance); 125 | } 126 | free(ullval); 127 | while(execute_dfg()); 128 | } 129 | 130 | #endif 131 | -------------------------------------------------------------------------------- /softbrain-emu/src/sb_emu.h: -------------------------------------------------------------------------------- 1 | #ifndef SB_EMU_H 2 | #define SB_EMU_H 3 | #ifndef _REENTRANT 4 | #include 5 | #include 6 | #include "sb.h" 7 | 8 | using namespace std; 9 | //Will need to read in the config file to set proper values for the 10 | //CGRA itself in terms of types, but will write that later 11 | 12 | //Goal here is to have each define as used by the SB map to a call 13 | //in a class file that does what the softbrain does. Hence, each define 14 | //maps to a matching call on the class SoftBrain. 15 | extern SoftBrain *sb_emu; 16 | 17 | //Stream in the Config 18 | #define SB_CONFIG(mem_addr, size) \ 19 | if(sb_emu == NULL) { \ 20 | sb_emu = new SoftBrain(mem_addr, size); \ 21 | } else { \ 22 | delete sb_emu; \ 23 | sb_emu = new SoftBrain(mem_addr, size); \ 24 | } 25 | 26 | //Fill the scratchpad from DMA (from memory or cache) 27 | //Note that scratch_addr will be written linearly 28 | #define SB_DMA_SCRATCH_LOAD(mem_addr, stride, access_size, num_strides, scratch_addr) \ 29 | sb_emu->dma_scratch_load(static_cast(mem_addr), stride, access_size, num_strides, scratch_addr); 30 | 31 | //Read from scratch into a cgra port 32 | #define SB_SCR_PORT_STREAM(scr_addr, stride, access_size, num_strides, port ) \ 33 | sb_emu->scr_port_stream(scr_addr, stride, access_size, num_strides, port); 34 | 35 | //A convienience CMD if you want to read linearly 36 | #define SB_SCRATCH_READ(scr_addr, num_bytes, port) \ 37 | sb_emu->scratch_read(scr_addr, num_bytes, port); 38 | 39 | //Read from DMA into a port 40 | #define SB_DMA_READ(mem_addr, stride, access_size, num_strides, port ) \ 41 | sb_emu->dma_read(static_cast(mem_addr), stride, access_size, num_strides, port); 42 | 43 | //Throw away some outputs. We will add a proper instruction for this at some point, rather then writing to memory 44 | #define SB_GARBAGE(output_port, num_elem) \ 45 | sb_emu->garbage(output_port, num_elem); 46 | 47 | //Write to DMA. 48 | #define SB_DMA_WRITE(output_port, stride, access_size, num_strides, mem_addr) \ 49 | sb_emu->dma_write(output_port, stride, access_size, num_strides, static_cast(mem_addr)); 50 | 51 | //Write to DMA, but throw away all but the last 16-bits from each word 52 | #define SB_DMA_WRITE_SHF16(output_port, stride, access_size, num_strides, mem_addr) \ 53 | sb_emu->dma_write_shf16(output_port, stride, access_size, num_strides, static_cast(mem_addr)); 54 | 55 | //Write to DMA, but throw away all but the last 16-bits from each word 56 | //WARNING -- (NOT IMPLEMENTED IN SIMULTOR YET) 57 | //#define SB_DMA_WRITE_SHF32(output_port, stride, access_size, num_strides, mem_addr) \ 58 | __asm__ __volatile__("sb_stride %0, %1" : : "r"(stride), "r"(access_size)); \ 59 | __asm__ __volatile__("sb_wr_dma %0, %1, %2" : : "r"(mem_addr), "r"(num_stirides), "i"(output_port|0x80)); 60 | 61 | // __asm__ __volatile__("sb_dma_addr %0, %1" : : "r"(access_size), "r"(stride)); \ 62 | // __asm__ __volatile__("sb_wr %0 " : : "i"(output_port)); \ 63 | // __asm__ __volatile__("sb_stride %0, %1" : : "r"(mem_addr), "r"(stride)); \ 64 | // __asm__ __volatile__("sb_dma_addr_p %0, %1, " #output_port : : "r"(mem_addr), "r"(stride_size)); \ 65 | // __asm__ __volatile__("sb_dma_wr %0, " : : "r"(num_strides)); 66 | 67 | //Send a constant value, repetated num_elements times to a port 68 | #define SB_CONST(port, val, num_elements) \ 69 | sb_emu->sb_const(port, val, num_elements); 70 | 71 | //Write to Scratch from a CGRA output port. Note that only linear writes are currently allowed 72 | #define SB_SCRATCH_WRITE(output_port, num_bytes, scratch_addr) \ 73 | sb_emu->scratch_write(output_port, num_bytes, scratch_addr); 74 | 75 | //Write from output to input port 76 | #define SB_RECURRENCE(output_port, input_port, num_strides) \ 77 | sb_emu->recurrence(static_cast(output_port), input_port, num_strides); 78 | 79 | //Wait with custom bit vector -- probably don't need to use 80 | //#define SB_WAIT(bit_vec) \ 81 | __asm__ __volatile__("sb_wait t0, t0, " #bit_vec); \ 82 | 83 | //Wait for all softbrain commands to be done -- This will block the processor indefinately if there is 84 | //unbalanced commands 85 | #define SB_WAIT_ALL() \ 86 | sb_emu->wait_all(); 87 | 88 | //For now, cast wait to wait all 89 | #define SB_WAIT(wait_amt) \ 90 | ; 91 | 92 | //Wait for all prior scratch writes to be complete. 93 | #define SB_WAIT_SCR_WR() ; 94 | //Do nothing for a wait \ 95 | 96 | //wait for everything except outputs to be complete. (useful for debugging) 97 | #define SB_WAIT_COMPUTE() ; 98 | //__asm__ __volatile__("sb_wait t0, t0, 2"); \ 99 | 100 | //wait for all prior scratch reads to be complete (NOT IMPLEMENTED IN SIMULTOR YET) 101 | #define SB_WAIT_SCR_RD() ; 102 | //__asm__ __volatile__("sb_wait t0, t0, 4"); \ 103 | 104 | #endif 105 | 106 | #ifdef _REENTRANT 107 | #include 108 | #include 109 | #include "sb.h" 110 | #include 111 | #include 112 | 113 | 114 | using namespace std; 115 | //Will need to read in the config file to set proper values for the 116 | //CGRA itself in terms of types, but will write that later 117 | 118 | //Goal here is to have each define as used by the SB map to a call 119 | //in a class file that does what the softbrain does. Hence, each define 120 | //maps to a matching call on the class SoftBrain. 121 | extern map* softbrains; 122 | extern pthread_mutex_t configlock; 123 | 124 | //Stream in the Config 125 | #define SB_CONFIG(mem_addr, size) \ 126 | pthread_mutex_lock(&configlock); \ 127 | if(softbrains == NULL) { \ 128 | softbrains = new map(); \ 129 | } \ 130 | auto sb = softbrains->find(pthread_self()); \ 131 | if(sb == softbrains->end()) { \ 132 | softbrains->insert(make_pair(pthread_self(), new SoftBrain(mem_addr, size))); \ 133 | } else { \ 134 | delete sb->second; \ 135 | sb->second = new SoftBrain(mem_addr, size); \ 136 | } \ 137 | pthread_mutex_unlock(&configlock); 138 | 139 | //Fill the scratchpad from DMA (from memory or cache) 140 | //Note that scratch_addr will be written linearly 141 | #define SB_DMA_SCRATCH_LOAD(mem_addr, stride, access_size, num_strides, scratch_addr) \ 142 | softbrains->find(pthread_self())->second->dma_scratch_load(static_cast(mem_addr), stride, access_size, num_strides, scratch_addr); 143 | 144 | //Read from scratch into a cgra port 145 | #define SB_SCR_PORT_STREAM(scr_addr, stride, access_size, num_strides, port ) \ 146 | softbrains->find(pthread_self())->second->scr_port_stream(scr_addr, stride, access_size, num_strides, port); 147 | 148 | //A convienience CMD if you want to read linearly 149 | #define SB_SCRATCH_READ(scr_addr, num_bytes, port) \ 150 | softbrains->find(pthread_self())->second->scratch_read(scr_addr, num_bytes, port); 151 | 152 | //Read from DMA into a port 153 | #define SB_DMA_READ(mem_addr, stride, access_size, num_strides, port ) \ 154 | softbrains->find(pthread_self())->second->dma_read(static_cast(mem_addr), stride, access_size, num_strides, port); 155 | 156 | //Throw away some outputs. We will add a proper instruction for this at some point, rather then writing to memory 157 | #define SB_GARBAGE(output_port, num_elem) \ 158 | softbrains->find(pthread_self())->second->garbage(output_port, num_elem); 159 | 160 | //Write to DMA. 161 | #define SB_DMA_WRITE(output_port, stride, access_size, num_strides, mem_addr) \ 162 | softbrains->find(pthread_self())->second->dma_write(output_port, stride, access_size, num_strides, static_cast(mem_addr)); 163 | 164 | //Write to DMA, but throw away all but the last 16-bits from each word 165 | #define SB_DMA_WRITE_SHF16(output_port, stride, access_size, num_strides, mem_addr) \ 166 | softbrains->find(pthread_self())->second->dma_write_shf16(output_port, stride, access_size, num_strides, static_cast(mem_addr)); 167 | 168 | //Write to DMA, but throw away all but the last 16-bits from each word 169 | //WARNING -- (NOT IMPLEMENTED IN SIMULTOR YET) 170 | //#define SB_DMA_WRITE_SHF32(output_port, stride, access_size, num_strides, mem_addr) \ 171 | __asm__ __volatile__("sb_stride %0, %1" : : "r"(stride), "r"(access_size)); \ 172 | __asm__ __volatile__("sb_wr_dma %0, %1, %2" : : "r"(mem_addr), "r"(num_stirides), "i"(output_port|0x80)); 173 | 174 | // __asm__ __volatile__("sb_dma_addr %0, %1" : : "r"(access_size), "r"(stride)); \ 175 | // __asm__ __volatile__("sb_wr %0 " : : "i"(output_port)); \ 176 | // __asm__ __volatile__("sb_stride %0, %1" : : "r"(mem_addr), "r"(stride)); \ 177 | // __asm__ __volatile__("sb_dma_addr_p %0, %1, " #output_port : : "r"(mem_addr), "r"(stride_size)); \ 178 | // __asm__ __volatile__("sb_dma_wr %0, " : : "r"(num_strides)); 179 | 180 | //Send a constant value, repetated num_elements times to a port 181 | #define SB_CONST(port, val, num_elements) \ 182 | softbrains->find(pthread_self())->second->sb_const(port, val, num_elements); 183 | 184 | //Write to Scratch from a CGRA output port. Note that only linear writes are currently allowed 185 | #define SB_SCRATCH_WRITE(output_port, num_bytes, scratch_addr) \ 186 | softbrains->find(pthread_self())->second->scratch_write(output_port, num_bytes, scratch_addr); 187 | 188 | //Write from output to input port 189 | #define SB_RECURRENCE(output_port, input_port, num_strides) \ 190 | softbrains->find(pthread_self())->second->recurrence(static_cast(output_port), input_port, num_strides); 191 | 192 | //Wait with custom bit vector -- probably don't need to use 193 | //#define SB_WAIT(bit_vec) \ 194 | __asm__ __volatile__("sb_wait t0, t0, " #bit_vec); \ 195 | 196 | //Wait for all softbrain commands to be done -- This will block the processor indefinately if there is 197 | //unbalanced commands 198 | #define SB_WAIT_ALL() \ 199 | softbrains->find(pthread_self())->second->wait_all(); 200 | 201 | //For now, cast wait to wait all 202 | #define SB_WAIT(wait_amt) \ 203 | ; 204 | 205 | //Wait for all prior scratch writes to be complete. 206 | #define SB_WAIT_SCR_WR() ; 207 | //Do nothing for a wait \ 208 | 209 | //wait for everything except outputs to be complete. (useful for debugging) 210 | #define SB_WAIT_COMPUTE() ; 211 | //__asm__ __volatile__("sb_wait t0, t0, 2"); \ 212 | 213 | //wait for all prior scratch reads to be complete (NOT IMPLEMENTED IN SIMULTOR YET) 214 | #define SB_WAIT_SCR_RD() ; 215 | //__asm__ __volatile__("sb_wait t0, t0, 4"); \ 216 | 217 | #endif 218 | 219 | #endif 220 | -------------------------------------------------------------------------------- /softbrain-emu/src/sb_init.h: -------------------------------------------------------------------------------- 1 | #ifndef __SB_INIT__ 2 | #define __SB_INIT__ 3 | #include 4 | #include 5 | 6 | // NOTE: The macros below were copied from cambricon/include/fix_common.h 7 | // in the softbrain-workloads repository 8 | 9 | // In 16-bit integer representation, 10 | // one bit is reserved for sign, 11 | // the maximum supported number is 32767, 12 | // the minimum supported number is -32768. 13 | // Here FIX_MAX = 32767, and FIX_MIN is chosen to be negative 14 | // of FIX_MAX instead of -32768 to keep the symmetry. 15 | // 16 | // FIX_TRUNC is to keep the number falling within the range 17 | // between FIX_MIN and FIX_MAX (both inclusively) 18 | #define FIX_MAX ((1 << 15) - 1) 19 | #define FIX_MIN (-FIX_MAX) 20 | #define FIX_TRUNC(x) (x > FIX_MAX ? FIX_MAX : (x < FIX_MIN ? FIX_MIN : x) ) 21 | 22 | // FRAC_BITS is the number of bits reserved for fractional parts. 23 | // So the integer part has 15 - FRAC_BITS bits. 24 | // 25 | // DELTA is the minimum positive amount that can be represented in this number system. 26 | // 27 | // FLOAT_MAX is the largest real value that can be represented in this number system. 28 | // FLOAT_MIN is the smallest real value that can be represented in this number system. 29 | // 30 | // FLOAT_TRUNC is to keep numbers within the range 31 | // between FLOAT_MIN and FLOAT_MAX (both inclusively) 32 | #define FRAC_BITS 11 // 11 or 12 is recommended 33 | #define DELTA (((double)1.0)/(1 << FRAC_BITS)) 34 | #define FLOAT_MAX (FIX_MAX * DELTA) 35 | #define FLOAT_MIN (FIX_MIN * DELTA) 36 | #define FLOAT_TRUNC(x) (x > FLOAT_MAX ? FLOAT_MAX : (x < FLOAT_MIN ? FLOAT_MIN : x) ) 37 | 38 | // DOUBLE_TO_FIX converts a double number to integer in our fixed representation. 39 | // FIX_TO_DOUBLE converts a integer number to double in our fixed representation. 40 | #define DOUBLE_TO_FIX(x) ( (int)(FLOAT_TRUNC(x) / DELTA) ) 41 | #define FIX_TO_DOUBLE(x) (x * DELTA) 42 | 43 | // FIX_ADD fixed addition. 44 | // FIX_MINUS fixed subtraction. 45 | // FIX_MUL fixed multiplication. 46 | // FIX_TAN_H fixed tanh, but is right now using tanh from math.h 47 | #define FIX_ADD(a, b) ( FIX_TRUNC( (int)a + (int)b ) ) 48 | #define FIX_MINUS(a, b) ( FIX_ADD(a, -b) ) 49 | #define FIX_MUL(a, b) ( FIX_TRUNC( ((int)a * (int)b) >> FRAC_BITS ) ) 50 | #define FIX_TAN_H(x) ( DOUBLE_TO_FIX(tanh(FIX_TO_DOUBLE(x))) ) 51 | 52 | extern uint64_t accum; 53 | 54 | inline float as_float(std::uint32_t ui) { 55 | float f; 56 | std::memcpy(&f, &ui, sizeof(float)); 57 | return f; 58 | } 59 | 60 | inline uint32_t as_uint32(float f) { 61 | uint32_t ui; 62 | std::memcpy(&ui, &f, sizeof(uint32_t)); 63 | return ui; 64 | } 65 | 66 | inline double as_double(std::uint64_t ui) { 67 | double f; 68 | std::memcpy(&f, &ui, sizeof(double)); 69 | return f; 70 | } 71 | 72 | inline uint64_t as_uint64(double f) { 73 | uint64_t ui; 74 | std::memcpy(&ui, &f, sizeof(uint64_t)); 75 | return ui; 76 | } 77 | #endif 78 | -------------------------------------------------------------------------------- /softbrain-scheduler/.gitignore: -------------------------------------------------------------------------------- 1 | *.d 2 | src/gams_models/*.h 3 | build/ 4 | *.swo 5 | *.swp 6 | *.swn 7 | drivers/sb_dfg_emu 8 | drivers/sb_sched 9 | drivers/stat-config 10 | dfgs/*/*.h 11 | gams/ 12 | verif/ 13 | viz/ 14 | remap.dot 15 | -------------------------------------------------------------------------------- /softbrain-scheduler/Makefile: -------------------------------------------------------------------------------- 1 | include $(SS_STACK)/msg.mk 2 | prefix:=$(SS_TOOLS) 3 | 4 | 5 | level=./ 6 | include make.config 7 | 8 | all: directories program make_drivers 9 | 10 | include make.rules 11 | 12 | program: 13 | +make -C src 14 | 15 | make_drivers: program 16 | make -C drivers 17 | 18 | install: directories install_headers install_program install_drivers 19 | 20 | 21 | install_headers: 22 | ${MKDIR_P} ${prefix}/include/softbrain-scheduler 23 | cp src/*.h ${prefix}/include/softbrain-scheduler/ 24 | 25 | install_drivers: make_drivers 26 | ${MKDIR_P} ${prefix}/bin 27 | cp drivers/sb_dfg_emu ${prefix}/bin 28 | 29 | 30 | install_program: program 31 | ${MKDIR_P} ${prefix}/lib 32 | cp ${build}/lib/* ${prefix}/lib 33 | 34 | clean: 35 | make -C src clean 36 | make -C drivers clean 37 | 38 | 39 | -------------------------------------------------------------------------------- /softbrain-scheduler/dfgs/5x4/bfs.dfg: -------------------------------------------------------------------------------- 1 | Input: H 2 | Input: L 3 | Input: ONE 4 | Input: reset 5 | 6 | IncH = Add64(H,ONE) 7 | Cond = ICmpEQ(L, H) 8 | 9 | NewL = Select(L,IncH,Cond) 10 | CNT = Acc64(ONE,reset) 11 | 12 | Output: NewL 13 | Output: CNT 14 | 15 | -------------------------------------------------------------------------------- /softbrain-scheduler/dfgs/5x4/dot.dfg: -------------------------------------------------------------------------------- 1 | #R = A . B + carry 2 | 3 | Input: A [4] 4 | Input: B [4] 5 | Input: carry 6 | 7 | M0 = Mul16x4(A0 , B0 ) 8 | M1 = Mul16x4(A1 , B1 ) 9 | M2 = Mul16x4(A2 , B2 ) 10 | M3 = Mul16x4(A3 , B3 ) 11 | 12 | T0 = Add16x4(M0 , M1 ) 13 | T1 = Add16x4(M2 , M3 ) 14 | 15 | T2 = Add16x4(T0 , T1 ) 16 | 17 | R = Red16x4(T2, carry) 18 | 19 | Output: R 20 | 21 | -------------------------------------------------------------------------------- /softbrain-scheduler/dfgs/5x4/long.dfg: -------------------------------------------------------------------------------- 1 | Input: I [8] # Original Image 2 | Input: F [4] # Filter 3 | Input: C [8] # Carry 4 | 5 | 6 | M0 = Mul64(I0, F0) 7 | M1 = Mul64(I1, F0) 8 | M2 = Mul64(I2, F1) 9 | M3 = Mul64(I3, F1) 10 | M4 = Mul64(I4, F2) 11 | M5 = Mul64(I5, F2) 12 | M6 = Mul64(I6, F3) 13 | M7 = Mul64(I7, F3) 14 | 15 | O0 = Add64(M0, C0) 16 | O1 = Add64(M1, C1) 17 | O2 = Add64(M2, C2) 18 | O3 = Add64(M3, C3) 19 | O4 = Add64(M4, C4) 20 | O5 = Add64(M5, C5) 21 | O6 = Add64(M6, C6) 22 | O7 = Add64(M7, C7) 23 | 24 | Output: O [8] 25 | 26 | 27 | -------------------------------------------------------------------------------- /softbrain-scheduler/dfgs/5x4/medium.dfg: -------------------------------------------------------------------------------- 1 | 2 | Input: I [8] 3 | Input: A 4 | 5 | O0 =Add64(A,I0) 6 | O1 =Add64(A,I1) 7 | O2 =Add64(I2,A) 8 | O3 =Add64(I3,A) 9 | O4 =Add64(I4,A) 10 | O5 =Add64(I5,A) 11 | O6 =Add64(I6,A) 12 | O7 =Add64(I7,A) 13 | 14 | Output: O [8] 15 | -------------------------------------------------------------------------------- /softbrain-scheduler/dfgs/5x4/medium_short.dfg: -------------------------------------------------------------------------------- 1 | 2 | Input: I [4] 3 | Input: A 4 | 5 | O0 =Add64(A,I0) 6 | O1 =Add64(A,I1) 7 | O2 =Add64(I2,A) 8 | O3 =Add64(I3,A) 9 | 10 | Output: O [4] 11 | -------------------------------------------------------------------------------- /softbrain-scheduler/dfgs/5x4/mm_sb.dfg: -------------------------------------------------------------------------------- 1 | Input: A 2 | Input: B [8] 3 | Input: reset 4 | 5 | M0 = Mul64(B0, A) 6 | M1 = Mul64(B1, A) 7 | M2 = Mul64(B2, A) 8 | M3 = Mul64(B3, A) 9 | M4 = Mul64(B4, A) 10 | M5 = Mul64(B5, A) 11 | M6 = Mul64(B6, A) 12 | M7 = Mul64(B7, A) 13 | 14 | R0 = Acc64(M0, reset) 15 | R1 = Acc64(M1, reset) 16 | R2 = Acc64(M2, reset) 17 | R3 = Acc64(M3, reset) 18 | R4 = Acc64(M4, reset) 19 | R5 = Acc64(M5, reset) 20 | R6 = Acc64(M6, reset) 21 | R7 = Acc64(M7, reset) 22 | 23 | Output: R [8] 24 | -------------------------------------------------------------------------------- /softbrain-scheduler/dfgs/5x4/out.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PolyArch/stream-dataflow/e9a40c04268501202f4591d914ed69c46881baa3/softbrain-scheduler/dfgs/5x4/out.txt -------------------------------------------------------------------------------- /softbrain-scheduler/dfgs/5x4/pool2x2l4avg.dfg: -------------------------------------------------------------------------------- 1 | Input: R0 2 | Input: R1 3 | Input: R2 4 | Input: R3 5 | Input: R4 6 | 7 | Input: P [4] 8 | 9 | I0 =Add16x4(R0, R1) 10 | I1 =Add16x4(R1, R2) 11 | I2 =Add16x4(R2, R3) 12 | I3 =Add16x4(R3, R4) 13 | 14 | H0 =Add16x4(P0, I0) 15 | H1 =Add16x4(P1, I1) 16 | H2 =Add16x4(P2, I2) 17 | H3 =Add16x4(P3, I3) 18 | 19 | O0=RShf16x4(H0,2) 20 | O1=RShf16x4(H1,2) 21 | O2=RShf16x4(H2,2) 22 | O3=RShf16x4(H3,2) 23 | 24 | Output: I [4] 25 | 26 | Output: O0 27 | Output: O1 28 | Output: O2 29 | Output: O3 30 | -------------------------------------------------------------------------------- /softbrain-scheduler/dfgs/5x4/pool4x4l2avg.dfg: -------------------------------------------------------------------------------- 1 | Input: R0 2 | Input: R1 3 | Input: R2 4 | Input: R3 5 | Input: R4 6 | 7 | Input: Xa [2] 8 | Input: Xb [2] 9 | Input: Xc [2] 10 | 11 | R12=Add16x4(R1, R2) 12 | R123=Add16x4(R12, R3) 13 | R0123=Add16x4(R0,R123) 14 | R1234=Add16x4(R123, R4) 15 | 16 | Xd0=R0123 17 | Xd1=R1234 18 | 19 | Xcd0=Add16x4(Xc0,Xd0) 20 | Xcd1=Add16x4(Xc1,Xd1) 21 | 22 | Xab0=Add16x4(Xa0,Xb0) 23 | Xab1=Add16x4(Xa1,Xb1) 24 | 25 | O0_p=Add16x4(Xab0,Xcd0) 26 | O1_p=Add16x4(Xab1,Xcd1) 27 | 28 | O0=RShf16x4(O0_p,4) 29 | O1=RShf16x4(O1_p,4) 30 | 31 | Oa0=Xb0 32 | Oa1=Xb1 33 | Ob0=Xc0 34 | Ob1=Xc1 35 | Oc0=R0123 36 | Oc1=R1234 37 | 38 | Output: Oa [2] 39 | Output: Ob [2] 40 | Output: Oc [2] 41 | 42 | Output: O0 43 | Output: O1 44 | -------------------------------------------------------------------------------- /softbrain-scheduler/dfgs/5x4/pool_simple.dfg: -------------------------------------------------------------------------------- 1 | Input: in [8] 2 | Input: acc [8] 3 | 4 | out0=Add16x4(in0, acc0) 5 | out1=Add16x4(in1, acc1) 6 | out2=Add16x4(in2, acc2) 7 | out3=Add16x4(in3, acc3) 8 | out4=Add16x4(in4, acc4) 9 | out5=Add16x4(in5, acc5) 10 | out6=Add16x4(in6, acc6) 11 | out7=Add16x4(in7, acc7) 12 | 13 | Output: out [8] 14 | 15 | -------------------------------------------------------------------------------- /softbrain-scheduler/dfgs/5x4/red16to1sig.dfg: -------------------------------------------------------------------------------- 1 | Input: N [4] 2 | Input: S [8] 3 | Input: acc [2] 4 | Input: pred 5 | 6 | #compute lanes "A" and "B" 7 | 8 | AM0 =Mul16x4(N0, S0) 9 | AM1 =Mul16x4(N1, S1) 10 | AM2 =Mul16x4(N2, S2) 11 | AM3 =Mul16x4(N3, S3) 12 | 13 | AS0 =Add16x4(AM0, AM1) 14 | AS1 =Add16x4(AM2, AM3) 15 | 16 | AS2 =Add16x4(AS0, AS1) 17 | 18 | AR = Red16x4(AS2, acc0) 19 | 20 | out0 = Sig16(AR, pred) 21 | 22 | 23 | 24 | BM0 =Mul16x4(N0, S4) 25 | BM1 =Mul16x4(N1, S5) 26 | BM2 =Mul16x4(N2, S6) 27 | BM3 =Mul16x4(N3, S7) 28 | 29 | BS0 =Add16x4(BM0, BM1) 30 | BS1 =Add16x4(BM2, BM3) 31 | 32 | BS2 =Add16x4(BS0, BS1) 33 | 34 | BR = Red16x4(BS2, acc1) 35 | 36 | out1 = Sig16(BR, pred) 37 | 38 | Output: out [2] 39 | 40 | -------------------------------------------------------------------------------- /softbrain-scheduler/dfgs/5x4/red16to1sigx2-simple.dfg: -------------------------------------------------------------------------------- 1 | Input: NA [4] 2 | Input: SA [4] 3 | Input: NB [4] 4 | Input: SB [4] 5 | 6 | #compute lanes "A" and "B" 7 | 8 | MA0 =Mul16x4(NA0, SA0) 9 | MA1 =Mul16x4(NA1, SA1) 10 | MA2 =Mul16x4(NA2, SA2) 11 | MA3 =Mul16x4(NA3, SA3) 12 | 13 | SA0 =Add16x4(MA0, MA1) 14 | SA1 =Add16x4(MA2, MA3) 15 | 16 | SA2 =Add16x4(SA0, SA1) 17 | 18 | RA = Add16x4(SA2,0) 19 | 20 | out0 = Sig16(RA,0) 21 | 22 | 23 | 24 | MB0 =Mul16x4(NB0, SB0) 25 | MB1 =Mul16x4(NB1, SB1) 26 | MB2 =Mul16x4(NB2, SB2) 27 | MB3 =Mul16x4(NB3, SB3) 28 | 29 | SB0 =Add16x4(MB0, MB1) 30 | SB1 =Add16x4(MB2, MB3) 31 | 32 | SB2 =Add16x4(SB0, SB1) 33 | 34 | RB = Add16x4(SB2,0) 35 | 36 | out1 = Sig16(RB,0) 37 | 38 | Output: out [2] 39 | 40 | 41 | -------------------------------------------------------------------------------- /softbrain-scheduler/dfgs/5x4/red16to1sigx2.dfg: -------------------------------------------------------------------------------- 1 | Input: NA [4] 2 | Input: SA [4] 3 | Input: NB [4] 4 | Input: SB [4] 5 | 6 | Input: acc [2] 7 | Input: pred [2] 8 | 9 | #compute lanes "A" and "B" 10 | 11 | MA0 =Mul16x4(NA0, SA0) 12 | MA1 =Mul16x4(NA1, SA1) 13 | MA2 =Mul16x4(NA2, SA2) 14 | MA3 =Mul16x4(NA3, SA3) 15 | 16 | SA0 =Add16x4(MA0, MA1) 17 | SA1 =Add16x4(MA2, MA3) 18 | 19 | SA2 =Add16x4(SA0, SA1) 20 | 21 | RA = Red16x4(SA2, acc0) 22 | 23 | out0 = Sig16(RA,pred0) 24 | 25 | 26 | 27 | MB0 =Mul16x4(NB0, SB0) 28 | MB1 =Mul16x4(NB1, SB1) 29 | MB2 =Mul16x4(NB2, SB2) 30 | MB3 =Mul16x4(NB3, SB3) 31 | 32 | SB0 =Add16x4(MB0, MB1) 33 | SB1 =Add16x4(MB2, MB3) 34 | 35 | SB2 =Add16x4(SB0, SB1) 36 | 37 | RB = Red16x4(SB2, acc1) 38 | 39 | out1 = Sig16(RB,pred1) 40 | 41 | Output: out [2] 42 | 43 | 44 | -------------------------------------------------------------------------------- /softbrain-scheduler/dfgs/5x4/red32to1sig.dfg: -------------------------------------------------------------------------------- 1 | Input: N [8] # 8Wide 1Deep 2 | Input: S [8] # 8Wide 1Deep 3 | Input: acc 4 | Input: pred 5 | 6 | M0 =Mul16x4(N0, S0) 7 | M1 =Mul16x4(N1, S1) 8 | M2 =Mul16x4(N2, S2) 9 | M3 =Mul16x4(N3, S3) 10 | M4 =Mul16x4(N4, S4) 11 | M5 =Mul16x4(N5, S5) 12 | M6 =Mul16x4(N6, S6) 13 | M7 =Mul16x4(N7, S7) 14 | 15 | A0 =Add16x4(M0, M1) 16 | A1 =Add16x4(M2, M3) 17 | A2 =Add16x4(M4, M5) 18 | A3 =Add16x4(M6, M7) 19 | 20 | A8 =Add16x4(A0, A1) 21 | A9 =Add16x4(A2, A3) 22 | 23 | A10 = Add16x4(A8, A9) 24 | 25 | R = Red16x4(A10, acc) 26 | 27 | out=Sig16(R, pred) 28 | 29 | Output: out 30 | 31 | 32 | -------------------------------------------------------------------------------- /softbrain-scheduler/dfgs/5x4/red8to1sig.dfg: -------------------------------------------------------------------------------- 1 | Input: N [2] 2 | Input: S [4] 3 | Input: acc [2] 4 | Input: pred 5 | 6 | #compute lanes "A" and "B" 7 | 8 | AM0 =Mul16x4(N0, S0) 9 | AM1 =Mul16x4(N1, S1) 10 | 11 | AS0 =Add16x4(AM0, AM1) 12 | 13 | AR = Red16x4(AS0, acc0) 14 | 15 | out0 = Sig16(AR, pred) 16 | 17 | 18 | 19 | BM0 =Mul16x4(N0, S2) 20 | BM1 =Mul16x4(N1, S3) 21 | 22 | BS0 =Add16x4(BM0, BM1) 23 | 24 | BR = Red16x4(BS0, acc1) 25 | 26 | out1 = Sig16(BR, pred) 27 | 28 | Output: out [2] 29 | 30 | -------------------------------------------------------------------------------- /softbrain-scheduler/dfgs/5x4/run-all-sched.sh: -------------------------------------------------------------------------------- 1 | #subalg="M.R.T M.RT MR.T MR.RT MR'.RT MRT'.RT MRT" 2 | 3 | #subalg="MR.RT MRT'.RT MR'.RT MR' MRT' MRT" 4 | subalg="MRT' MRT'.RT" 5 | #subalg=MRT 6 | #subalg="MR'.RT MRT'.RT" 7 | 8 | 9 | logfile=log.txt 10 | sum=summary.txt 11 | 12 | 13 | for ed in 15 7 3; do 14 | 15 | echo "ed = $ed" | tee -a $sum 16 | 17 | for i in $subalg; do 18 | echo $i | tee -a $sum 19 | 20 | echo -e "\n\n\n\n\n\n********** $i *********" >> $logfile 21 | run-sched.sh gams $i $ed >> $logfile 22 | 23 | cat sum.txt | tee -a $sum 24 | done 25 | 26 | done 27 | -------------------------------------------------------------------------------- /softbrain-scheduler/dfgs/5x4/run-sched.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | mkdir -p output 3 | 4 | lat="" 5 | lat_eq="" 6 | 7 | lat_eq="0" 8 | time_eq="0" 9 | 10 | if [ -z "$1" ]; then 11 | alg="sg" 12 | else 13 | alg=$1 14 | fi 15 | 16 | if [ -z "$2" ]; then 17 | subalg="MR.RT" 18 | else 19 | subalg=$2 20 | fi 21 | 22 | if [ -z "$3" ]; then 23 | ed=15 24 | else 25 | ed=$3 26 | fi 27 | 28 | 29 | bench="" 30 | 31 | for i in *.dfg; do 32 | echo "************ $i *************"; 33 | cmd="$SS_TOOLS/bin/sb_sched $SS_TOOLS/configs/softbrain_5x4.sbmodel $i --verbose --algorithm $alg --sub-alg $subalg --show-gams --mipstart --max-edge-delay=$ed --timeout=3600"; 34 | 35 | echo $cmd 36 | $cmd | tee out.txt 37 | #$cmd > out.txt 38 | bench="$bench $i" 39 | lat="$lat `grep "latency:" out.txt | cut -d" " -f 2`" 40 | time="$time `grep "sched_time:" out.txt | cut -d" " -f 2`" 41 | lat_eq="$lat_eq+`grep "latency:" out.txt | cut -d" " -f 2`" 42 | time_eq="$time_eq+`grep "sched_time:" out.txt | cut -d" " -f 2`" 43 | done 44 | 45 | echo $bench | tee sum.txt 46 | echo $lat | tee -a sum.txt 47 | echo $time | tee -a sum.txt 48 | 49 | -------------------------------------------------------------------------------- /softbrain-scheduler/dfgs/5x4/spmv.dfg: -------------------------------------------------------------------------------- 1 | Input: Val 2 | Input: Vec 3 | Input: reset 4 | 5 | M0 = Mul64(Val, Vec) 6 | 7 | O = Acc64(M0, reset) 8 | 9 | Output: O 10 | -------------------------------------------------------------------------------- /softbrain-scheduler/dfgs/5x4/stencil.dfg: -------------------------------------------------------------------------------- 1 | Input: M 2 | Input: MIP 3 | Input: MIM 4 | Input: MJP 5 | Input: MJM 6 | Input: MKP 7 | Input: MKM 8 | Input: C0 9 | Input: C1 10 | 11 | AI = Add64(MIP, MIM) 12 | AJ = Add64(MJP, MJM) 13 | AK = Add64(MKP, MKM) 14 | 15 | AIJ = Add64(AI, AJ) 16 | AIJK = Add64(AIJ, AK) 17 | 18 | P0 = Mul64(AIJK,C1) 19 | P1 = Mul64(M,C0) 20 | 21 | R = Add64(P0, P1) 22 | 23 | Output: R 24 | 25 | -------------------------------------------------------------------------------- /softbrain-scheduler/dfgs/5x4/sum.txt: -------------------------------------------------------------------------------- 1 | bfs.dfg dot.dfg long.dfg medium.dfg medium_short.dfg mm_sb.dfg pool2x2l4avg.dfg pool4x4l2avg.dfg pool_simple.dfg red16to1sig.dfg red16to1sigx2.dfg red16to1sigx2-simple.dfg red32to1sig.dfg red8to1sig.dfg spmv.dfg stencil.dfg vadd4.dfg vadd5.dfg vadd6.dfg vadd.dfg viterbi.dfg 2 | 1.07 1.49 1.29 1.79 0.86 2.27 10.24 21.27 1.56 2.58 2.38 1.64 1.88 1.40 0.48 1.93 1.29 1.16 1.66 1.54 2.34 | 62.12 3 | 1.07 1.49 1.29 1.79 0.86 2.27 10.24 21.27 1.56 2.58 2.38 1.64 1.88 1.40 0.48 1.93 1.29 1.16 1.66 1.54 2.34 | 62.12 4 | -------------------------------------------------------------------------------- /softbrain-scheduler/dfgs/5x4/vadd.dfg: -------------------------------------------------------------------------------- 1 | 2 | Input: I [8] 3 | Input: A [8] 4 | 5 | O0 =Add64(A0,I0) 6 | O1 =Add64(A1,I1) 7 | O2 =Add64(A2,I2) 8 | O3 =Add64(A3,I3) 9 | O4 =Add64(A4,I4) 10 | O5 =Add64(A5,I5) 11 | O6 =Add64(A6,I6) 12 | O7 =Add64(A7,I7) 13 | 14 | Output: O [8] 15 | -------------------------------------------------------------------------------- /softbrain-scheduler/dfgs/5x4/vadd4.dfg: -------------------------------------------------------------------------------- 1 | 2 | Input: I [4] 3 | Input: A [4] 4 | 5 | O0 =Add64(A0,I0) 6 | O1 =Add64(A1,I1) 7 | O2 =Add64(A2,I2) 8 | O3 =Add64(A3,I3) 9 | 10 | Output: O [4] 11 | -------------------------------------------------------------------------------- /softbrain-scheduler/dfgs/5x4/vadd5.dfg: -------------------------------------------------------------------------------- 1 | 2 | Input: I [5] 3 | Input: A [5] 4 | 5 | O0 =Add64(A0,I0) 6 | O1 =Add64(A1,I1) 7 | O2 =Add64(A2,I2) 8 | O3 =Add64(A3,I3) 9 | O4 =Add64(A4,I4) 10 | 11 | Output: O [5] 12 | -------------------------------------------------------------------------------- /softbrain-scheduler/dfgs/5x4/vadd6.dfg: -------------------------------------------------------------------------------- 1 | 2 | Input: I [6] 3 | Input: A [6] 4 | 5 | O0 =Add64(A0,I0) 6 | O1 =Add64(A1,I1) 7 | O2 =Add64(A2,I2) 8 | O3 =Add64(A3,I3) 9 | O4 =Add64(A4,I4) 10 | O5 =Add64(A5,I5) 11 | 12 | Output: O [6] 13 | -------------------------------------------------------------------------------- /softbrain-scheduler/dfgs/5x4/viterbi.dfg: -------------------------------------------------------------------------------- 1 | Input: llike [4] 2 | Input: trans [4] 3 | Input: reset 4 | Input: emission 5 | 6 | S0 = Add64(llike0,trans0) 7 | S1 = Add64(llike1,trans1) 8 | S2 = Add64(llike2,trans2) 9 | S3 = Add64(llike3,trans3) 10 | 11 | #these should be mins 12 | M01 = Add64(S0,S1) 13 | M12 = Add64(S2,S3) 14 | 15 | M = Add64(M01,M12) 16 | ME = Add64(M,emission) 17 | 18 | MR = Acc64(ME,reset) 19 | 20 | Output: MR 21 | 22 | -------------------------------------------------------------------------------- /softbrain-scheduler/drivers/Makefile: -------------------------------------------------------------------------------- 1 | include $(SS_STACK)/msg.mk 2 | 3 | level=../ 4 | include ../make.config 5 | 6 | SYS = $(shell sys) 7 | CXX = g++ 8 | 9 | CXXFLAGS := -Wall -g -std=c++11 10 | 11 | LIB_PATH=$(SS_TOOLS)/lib 12 | INC_SBMODEL_PATH=$(SS_TOOLS)/include/softbrain-config 13 | INC_SBSCHED_PATH=$(SS_TOOLS)/include/softbrain-scheduler 14 | 15 | CXXFLAGS += -I$(INC_SBMODEL_PATH) -I$(INC_SBSCHED_PATH) -Wl,-rpath,$(LIB_PATH) 16 | 17 | #all: reschedule stat-config 18 | 19 | all: sb_dfg_emu 20 | #reschedul 21 | 22 | 23 | sb_dfg_emu : sb_dfg_emu.cpp 24 | $(CXX) $(CXXFLAGS) -MD -o $@ $< -L$(LIB_PATH) -lsbscheduler -lsbconfig -Wl,-rpath,${SS_TOOLS}/lib 25 | 26 | 27 | clean: 28 | rm -rf *.o sb_dfg_emu sb_sched stat-config reschedule *.d 29 | 30 | include ../make.rules 31 | 32 | -------------------------------------------------------------------------------- /softbrain-scheduler/drivers/sb_dfg_emu.cpp: -------------------------------------------------------------------------------- 1 | #include "model.h" 2 | #include 3 | #include 4 | #include 5 | 6 | #include "sbpdg.h" 7 | #include 8 | #include 9 | 10 | using namespace std; 11 | using namespace SB_CONFIG; 12 | 13 | std::string basename(std::string& filename) { 14 | size_t lastindex = filename.find_last_of("."); 15 | string basename = filename.substr(0, lastindex); 16 | 17 | lastindex = filename.find_last_of("\\/"); 18 | if(lastindex != string::npos) { 19 | basename = basename.substr(lastindex+1); 20 | } 21 | return basename; 22 | } 23 | 24 | std::string basedir(std::string& filename) { 25 | size_t lastindex = filename.find_last_of("\\/"); 26 | if(lastindex == string::npos) { 27 | return std::string("./"); 28 | } 29 | return filename.substr(0, lastindex); 30 | } 31 | 32 | 33 | 34 | int main(int argc, char* argv[]) 35 | { 36 | 37 | if(argc<2) { 38 | cerr << "Usage: sb_dfg_emu \n"; 39 | exit(1); 40 | } 41 | 42 | 43 | std::string pdg_filename=argv[2]; 44 | 45 | int lastindex = pdg_filename.find_last_of("."); 46 | string pdg_rawname = pdg_filename.substr(0, lastindex); 47 | string dfg_rawname = pdg_rawname; 48 | if(dfg_rawname.find_last_of("/") < dfg_rawname.length()) { 49 | dfg_rawname = dfg_rawname.substr(dfg_rawname.find_last_of("/")+1,dfg_rawname.length()); 50 | } 51 | //sbpdg object based on the dfg 52 | SbPDG sbpdg(pdg_filename); 53 | 54 | 55 | std::string dfg_emu_header=pdg_rawname+string(".h"); 56 | std::ofstream out_file(dfg_emu_header); 57 | assert(out_file.good()); 58 | sbpdg.printEmuDFG(out_file, dfg_rawname); 59 | } 60 | 61 | -------------------------------------------------------------------------------- /softbrain-scheduler/make.config: -------------------------------------------------------------------------------- 1 | MKDIR_P = mkdir -p 2 | 3 | SYS = $(shell sys) 4 | CXX = g++ 5 | CXXFLAGS := -Wall -g -std=c++11 -O3 6 | 7 | build ?= $(shell pwd)/${level}/build 8 | prefix ?= $(shell pwd) 9 | boost ?= /usr/lib64 10 | -------------------------------------------------------------------------------- /softbrain-scheduler/make.rules: -------------------------------------------------------------------------------- 1 | .PHONY: directories 2 | 3 | directories: 4 | ${MKDIR_P} ${build}/obj 5 | ${MKDIR_P} ${build}/lib 6 | -------------------------------------------------------------------------------- /softbrain-scheduler/src/Makefile: -------------------------------------------------------------------------------- 1 | include $(SS_STACK)/msg.mk 2 | level=../ 3 | include ../make.config 4 | 5 | 6 | SYS = $(shell sys) 7 | CXX = g++ 8 | 9 | OPT = -O3 10 | #OPT = -Og 11 | #OPT = -O0 12 | 13 | CXXFLAGS := -Wall -g -std=c++11 $(OPT) -ggdb -gdwarf-3 -lm $(FLAGS) 14 | SOURCES= sbpdg.cpp 15 | 16 | INCLUDE_DEST=../src 17 | LIB_DEST=${build}/lib 18 | OBJ_DEST=${build}/obj 19 | 20 | 21 | PRE_OBJECTS=$(SOURCES:.cpp=.o) 22 | OBJECTS = $(patsubst %,$(OBJ_DEST)/%,$(PRE_OBJECTS)) 23 | 24 | BOOST_PATH=${boost} 25 | INC_SBMODEL_PATH=${SS_TOOLS}/include/softbrain-config/ 26 | 27 | all: $(LIB_DEST)/libsbscheduler.a $(LIB_DEST)/libsbscheduler.so 28 | 29 | CXXFLAGS += -I$(INC_SBMODEL_PATH) 30 | 31 | CXXFLAGS += -I$(INCLUDE_DEST) -L$(BOOST_PATH) -fPIC -lboost_regex 32 | 33 | $(LIB_DEST)/libsbscheduler.a: $(OBJECTS) 34 | ar crs $@ $^ 35 | 36 | $(LIB_DEST)/libsbscheduler.so: $(OBJECTS) 37 | $(CXX) $(CXXFLAGS) -MD -shared -o $@ $^ 38 | 39 | 40 | 41 | $(OBJ_DEST)/%.o: %.cpp $(INCLUDE_DEST)/%.h 42 | $(CXX) $(CXXFLAGS) -MD -c -o $@ $< 43 | 44 | 45 | 46 | .phony: clean 47 | 48 | clean: 49 | -rm -Rf $(LIB_DEST)/*.so $(LIB_DEST)/*.a *.o $(OBJ_DEST)/*.o *.d $(OBJ_DEST)/*.d $(GAMS_INC) 50 | 51 | include ../make.rules 52 | 53 | -------------------------------------------------------------------------------- /softbrain-scheduler/src/sbpdg.h: -------------------------------------------------------------------------------- 1 | #ifndef __SBPDG_H__ 2 | #define __SBPDG_H__ 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include "sbinst.h" 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | 16 | #include "model.h" 17 | 18 | class SbPDG_Node; 19 | 20 | class SbPDG_Edge { 21 | public: 22 | enum EdgeType { data, ctrl_true, ctrl_false }; 23 | 24 | EdgeType etype() {return _etype;} 25 | 26 | SbPDG_Edge(SbPDG_Node* def, SbPDG_Node* use, EdgeType etype) { 27 | _def=def; 28 | _use=use; 29 | _etype=etype; 30 | _ID=ID_SOURCE++; 31 | } 32 | 33 | SbPDG_Node* def() const {return _def;} 34 | SbPDG_Node* use() const {return _use;} 35 | 36 | std::string gamsName(); 37 | std::string name(); 38 | 39 | void set_delay(int d) {_delay=d;} 40 | int delay() {return _delay;} 41 | private: 42 | int _ID; 43 | SbPDG_Node *_def, *_use; 44 | EdgeType _etype; 45 | 46 | int _delay =0; 47 | 48 | 49 | 50 | private: 51 | static int ID_SOURCE; 52 | }; 53 | 54 | //PDG Node -- abstract base class 55 | class SbPDG_Node { 56 | public: 57 | virtual void printGraphviz(std::ostream& os); 58 | virtual void printEmuDFG(std::ostream& os, std::string dfg_name); 59 | void setScalar() {_scalar = true;}; 60 | bool getScalar() {return _scalar;}; 61 | int findDepth(std::ostream& os, std::string dfg_name, int level); 62 | SbPDG_Node() { 63 | _ID=ID_SOURCE++; 64 | } 65 | 66 | typedef std::vector::const_iterator const_edge_iterator; 67 | 68 | void addIncEdge(unsigned pos, SbPDG_Edge *edge) { 69 | assert(pos <=4); 70 | if(_ops.size()<=pos) { 71 | _ops.resize(pos+1,NULL); 72 | } 73 | 74 | if(_ops[pos]) { 75 | std::cerr << "ERROR: overwriting op at pos" << pos 76 | << " name:" << _ops[pos]->def()->name() << "\n"; 77 | assert(0); 78 | } 79 | _ops[pos]=edge; 80 | } 81 | 82 | void addOutEdge(unsigned pos, SbPDG_Edge *edge) { 83 | assert(pos <= 64 && "more than 64 users, check this! (may be okay if really large grid\n"); 84 | if(_uses.size()<=pos) { 85 | _uses.resize(pos+1,NULL); 86 | } 87 | 88 | if(_uses[pos]) { 89 | std::cerr << "ERROR: overwriting use at pos" << pos 90 | << " name: " << _uses[pos]->use()->name() << "\n"; 91 | assert(0); 92 | } 93 | 94 | _uses[pos]=edge; 95 | } 96 | 97 | SbPDG_Edge* getLinkTowards(SbPDG_Node* to) { 98 | for(unsigned i = 0; i < _uses.size(); ++ i) { 99 | if(_uses[i] && _uses[i]->use()==to) { 100 | return _uses[i]; 101 | } 102 | } 103 | return NULL; 104 | } 105 | 106 | int num_inc() const { return _ops.size(); } 107 | int num_out() const { return _uses.size(); } 108 | 109 | virtual std::string name() = 0; //pure func 110 | void setName(std::string& name) {_name = name;} 111 | virtual std::string gamsName() = 0; 112 | 113 | const_edge_iterator ops_begin() const {return _ops.begin();} 114 | const_edge_iterator ops_end() const {return _ops.end();} 115 | const_edge_iterator uses_begin() const {return _uses.begin();} 116 | const_edge_iterator uses_end() const {return _uses.end();} 117 | 118 | int id() {return _ID;} 119 | 120 | void set_value(uint64_t v) {_val=v;} 121 | uint64_t get_value() {return _val;} 122 | bool input = false; 123 | bool output = false; 124 | int _iter; 125 | 126 | protected: 127 | uint64_t _val; 128 | int _ID; 129 | std::string _name; 130 | std::vector _ops; //in edges 131 | std::vector _uses; //out edges 132 | bool _scalar = false; 133 | 134 | private: 135 | static int ID_SOURCE; 136 | }; 137 | 138 | 139 | class SbPDG_IO : public SbPDG_Node { 140 | public: 141 | void setVPort(int vport) { _vport = vport; } 142 | int vport() {return _vport;} 143 | 144 | protected: 145 | int _vport; 146 | }; 147 | 148 | //Instruction 149 | class SbPDG_Inst : public SbPDG_Node { 150 | public: 151 | SbPDG_Inst() : SbPDG_Node(), _predInv(false), _imm_slot(-1), _subFunc(0),_accum(0){ 152 | } 153 | 154 | void printGraphviz(std::ostream& os); 155 | void printEmuDFG(std::ostream& os, std::string dfg_name); 156 | 157 | void setImm( uint64_t val ) { _imm=val; } 158 | // void setImm( float val ) { _imm=*reinterpret_cast(&val); } 159 | 160 | // float getImmFloat() { return *reinterpret_cast(&_imm); } 161 | int getImmInt() { return _imm; } 162 | 163 | uint64_t imm() { return _imm; } 164 | 165 | void setPredInv(bool predInv) { _predInv=predInv;} 166 | bool predInv() {return _predInv;} 167 | 168 | void setInst(SB_CONFIG::sb_inst_t sbinst) { _sbinst=sbinst; } 169 | SB_CONFIG::sb_inst_t inst() { return _sbinst; } 170 | 171 | std::string name() { 172 | std::stringstream ss; 173 | ss << _name << ":"; 174 | ss << SB_CONFIG::name_of_inst(_sbinst); 175 | if(_imm_slot!=-1) { 176 | ss<<" Imm:"<<_imm; 177 | } 178 | return ss.str(); 179 | } 180 | 181 | std::string gamsName(); 182 | 183 | void setImmSlot(int i); 184 | int immSlot() const { return _imm_slot; } 185 | 186 | void setSubFunc(int i) {_subFunc=i;} 187 | int subFunc() const {return _subFunc;} 188 | 189 | void compute(bool print, bool verif); 190 | 191 | void set_verif_id(std::string s) {_verif_id = s;} 192 | 193 | private: 194 | std::ofstream _verif_stream; 195 | std::string _verif_id; 196 | std::vector _input_vals; 197 | bool _predInv; 198 | int _imm_slot; 199 | int _subFunc; 200 | uint64_t _accum; 201 | uint64_t _imm; 202 | SB_CONFIG::sb_inst_t _sbinst; 203 | }; 204 | 205 | class SbPDG_Input : public SbPDG_IO { //inturn inherits sbnode 206 | public: 207 | void printGraphviz(std::ostream& os); 208 | void printEmuDFG(std::ostream& os, std::string dfg_name, std::string* realName, int* iter, std::vector* input_sizes); 209 | 210 | std::string name() { 211 | std::stringstream ss; 212 | ss << _name << ":"; 213 | ss << "I" << _vport; 214 | ss << _name; 215 | return ss.str(); 216 | } 217 | std::string gamsName(); 218 | 219 | std::string _realName; 220 | int _subIter; 221 | int _size; 222 | }; 223 | 224 | class SbPDG_Output : public SbPDG_IO { 225 | public: 226 | void printGraphviz(std::ostream& os); 227 | void printDirectAssignments(std::ostream& os, std::string dfg_name); 228 | void printEmuDFG(std::ostream& os, std::string dfg_name, std::string* realName, int* iter, std::vector* output_sizes); 229 | 230 | std::string name() { 231 | std::stringstream ss; 232 | ss << _name << ":"; 233 | ss << "O" << _vport; 234 | ss << _name; 235 | return ss.str(); 236 | } 237 | std::string gamsName(); 238 | 239 | //returns the instruction producing the 240 | //value to this output node 241 | //Returns NULL if the producing instruction is an input! 242 | SbPDG_Inst* out_inst() { 243 | return dynamic_cast(_ops[0]->def()); 244 | } 245 | 246 | //retrieve the value of the def 247 | uint64_t retrieve() { 248 | assert(_ops.size()==1); 249 | return _ops[0]->def()->get_value(); 250 | } 251 | 252 | std::string _realName; 253 | int _subIter; 254 | int _size; 255 | }; 256 | 257 | //vector class 258 | class SbPDG_Vec { 259 | public: 260 | SbPDG_Vec(std::string name, int id) : _name(name), _ID(id) { 261 | _locMap.resize(1); //set up default loc map 262 | _locMap[0].push_back(0); 263 | } 264 | 265 | void setLocMap(std::vector >& vec) { _locMap=vec;} 266 | std::vector >& locMap() {return _locMap;} 267 | 268 | int id() {return _ID;} 269 | 270 | virtual std::string gamsName() = 0; 271 | virtual std::string name() {return _name;} 272 | 273 | protected: 274 | std::string _name; 275 | std::vector> _locMap; 276 | int _ID; 277 | }; 278 | 279 | class SbPDG_VecInput : public SbPDG_Vec { 280 | public: 281 | 282 | SbPDG_VecInput(std::string name, int id) : SbPDG_Vec(name,id) {} 283 | 284 | virtual std::string gamsName() { 285 | std::stringstream ss; 286 | ss << "IPV_" << _name ; 287 | return ss.str(); 288 | } 289 | 290 | void addInput(SbPDG_Input* in) { _inputs.push_back(in); } 291 | std::vector::iterator input_begin() {return _inputs.begin();} 292 | std::vector::iterator input_end() {return _inputs.end();} 293 | unsigned num_inputs() const {return _inputs.size();} 294 | 295 | SbPDG_Input* getInput(int i) {return _inputs[i];} 296 | 297 | /*bool operator < (const SbPDG_VecInput& s) const 298 | { 299 | return (this->num_inputs() > s.num_inputs()); 300 | }*/ 301 | 302 | private: 303 | std::vector _inputs; 304 | }; 305 | 306 | 307 | class SbPDG_VecOutput : public SbPDG_Vec { 308 | public: 309 | 310 | SbPDG_VecOutput(std::string name, int id) : SbPDG_Vec(name,id) {} 311 | 312 | virtual std::string gamsName() { 313 | std::stringstream ss; 314 | ss << "OPV_" << _name ; 315 | return ss.str(); 316 | } 317 | 318 | void addOutput(SbPDG_Output* out) { _outputs.push_back(out); } 319 | std::vector::iterator output_begin() {return _outputs.begin();} 320 | std::vector::iterator output_end() {return _outputs.end();} 321 | unsigned num_outputs() const {return _outputs.size();} 322 | 323 | SbPDG_Output* getOutput(int i) {return _outputs[i];} 324 | 325 | /*bool operator < (const SbPDG_VecOutput& s) const 326 | { 327 | return (this->num_outputs() > s.num_outputs()); 328 | }*/ 329 | 330 | private: 331 | std::vector _outputs; 332 | }; 333 | 334 | 335 | class SbPDG { 336 | public: 337 | SbPDG(); 338 | SbPDG(std::string filename); 339 | 340 | ~SbPDG(){ 341 | } 342 | 343 | void printGraphviz(std::ostream& os); 344 | void printEmuDFG(std::ostream& os, std::string dfg_name); 345 | void printGraphviz(const char *fname) { 346 | std::ofstream os(fname); 347 | assert(os.good()); 348 | printGraphviz(os); 349 | } 350 | 351 | void printGams(std::ostream& os, std::unordered_map&, 352 | std::unordered_map&, 353 | std::unordered_map&); 354 | 355 | void printPortCompatibilityWith(std::ostream& os, SB_CONFIG::SbModel* sbModel); 356 | 357 | 358 | 359 | SbPDG_Inst* CreateInst() { 360 | return new SbPDG_Inst(); 361 | } 362 | 363 | void addInst(SbPDG_Inst* inst) { 364 | _insts.push_back(inst); 365 | _nodes.push_back(inst);} 366 | 367 | //Just for adding single input without keeping track of name/sym-table 368 | void addInput(SbPDG_Input* input) { 369 | _inputs.push_back(input); 370 | _nodes.push_back(input); 371 | } 372 | 373 | void addOutput(SbPDG_Output* output) { 374 | _outputs.push_back(output); 375 | _nodes.push_back(output); 376 | } 377 | 378 | void addScalarInput(std::string name, std::map& syms) { 379 | SbPDG_VecInput* vec_input = new SbPDG_VecInput(name, _vecInputs.size()); 380 | insert_vec_in(vec_input); 381 | 382 | SbPDG_Input* pdg_in = new SbPDG_Input(); //new input nodes 383 | syms[name]=pdg_in; 384 | pdg_in->setName(name); 385 | pdg_in->setVPort(_vecInputs.size()); 386 | pdg_in->setScalar(); 387 | addInput(pdg_in); 388 | vec_input->addInput(pdg_in); 389 | } 390 | 391 | //scalar output node 392 | void addScalarOutput(std::string name, std::map& syms) { 393 | 394 | SbPDG_Node* out_node = syms[name]; 395 | if(out_node==NULL) { 396 | std::cerr << "Could not find" + name + "\n"; 397 | assert("0"); 398 | } 399 | 400 | //new vector output 401 | SbPDG_VecOutput* vec_output = new SbPDG_VecOutput(name,_vecOutputs.size()); 402 | insert_vec_out(vec_output); 403 | 404 | SbPDG_Output* pdg_out = new SbPDG_Output(); 405 | std::string out_name=name+"_out"; 406 | syms[out_name]=pdg_out; 407 | pdg_out->setName(out_name); 408 | pdg_out->setVPort(_vecOutputs.size()); 409 | pdg_out->setScalar(); 410 | addOutput(pdg_out); 411 | vec_output->addOutput(pdg_out); //its own vector of out nodes 412 | 413 | connect(out_node, pdg_out,0,SbPDG_Edge::data); 414 | } 415 | 416 | 417 | //Need to confirm the functionality here 418 | void addVecOutput(std::string name, 419 | std::vector >& pm, 420 | std::map& syms ) { 421 | 422 | SbPDG_VecOutput* vec_output = new SbPDG_VecOutput(name,_vecOutputs.size()); 423 | vec_output->setLocMap(pm); 424 | insert_vec_out(vec_output); 425 | 426 | int entries = pm.size(); 427 | //std::cout << "entries: " << entries << "\n"; 428 | 429 | for(int i = 0; i < entries; ++i) { 430 | std::stringstream ss; 431 | ss << name << i; 432 | //std::cout << "name: " << name << "\n"; 433 | std::string dep_name = ss.str(); 434 | 435 | SbPDG_Node* out_node = syms[dep_name]; 436 | if(out_node==NULL) { 437 | std::cerr << "Could not find \"" + dep_name + "\"\n"; 438 | assert(0); 439 | } 440 | 441 | SbPDG_Output* pdg_out = new SbPDG_Output(); 442 | std::string out_name = dep_name + "_out"; 443 | syms[out_name]=pdg_out; 444 | pdg_out->setName(out_name); 445 | pdg_out->setVPort(_vecOutputs.size()); 446 | addOutput(pdg_out); 447 | vec_output->addOutput(pdg_out); 448 | 449 | connect(out_node, pdg_out,0,SbPDG_Edge::data); 450 | } 451 | 452 | 453 | //assert(0 && "addVecOutput not implemented"); 454 | } 455 | 456 | void addVecInput(std::string name, 457 | std::vector >& pm, 458 | std::map& syms ) { 459 | 460 | SbPDG_VecInput* vec_input = new SbPDG_VecInput(name,_vecInputs.size()); 461 | vec_input->setLocMap(pm); 462 | insert_vec_in(vec_input); 463 | 464 | //number of vector entries -- each vector element is a input node 465 | int entries = pm.size(); 466 | //std::cout << "entries: " << entries << "\n"; 467 | 468 | for(int i = 0; i < entries; ++i) { 469 | std::stringstream ss; 470 | ss << name << i; //Vector input names: A0, A1 471 | //std::cout << "name: " << name << "\n"; 472 | SbPDG_Input* pdg_in = new SbPDG_Input(); 473 | std::string name = ss.str(); 474 | syms[name]=pdg_in; 475 | pdg_in->setName(name); 476 | pdg_in->setVPort(_vecInputs.size()); 477 | addInput(pdg_in); 478 | vec_input->addInput(pdg_in); 479 | } 480 | } 481 | 482 | void parse_and_add_vec(std::string name, std::string line, 483 | std::map& syms ,bool input); 484 | 485 | SbPDG_Edge* connect(SbPDG_Node* orig, SbPDG_Node* dest,int slot,SbPDG_Edge::EdgeType etype); 486 | 487 | void parse_and_add_inst(std::string var_out, std::string opcode, 488 | std::map& syms, 489 | std::vector inc_nodes); 490 | 491 | typedef std::vector::const_iterator const_node_iterator; 492 | typedef std::vector::const_iterator const_inst_iterator; 493 | typedef std::vector::const_iterator const_input_iterator; 494 | typedef std::vector::const_iterator const_output_iterator; 495 | typedef std::vector::const_iterator const_edge_iterator; 496 | 497 | const_inst_iterator inst_begin() {return _insts.begin();} 498 | const_inst_iterator inst_end() {return _insts.end();} 499 | int num_insts() {return _insts.size();} 500 | 501 | const_input_iterator input_begin() {return _inputs.begin();} 502 | const_input_iterator input_end() {return _inputs.end();} 503 | 504 | const_output_iterator output_begin() {return _outputs.begin();} 505 | const_output_iterator output_end() {return _outputs.end();} 506 | 507 | int num_nodes() {return _nodes.size();} 508 | 509 | int num_vec_input() {return _vecInputs.size();} 510 | int num_vec_output() {return _vecOutputs.size();} 511 | 512 | void insert_vec_in(SbPDG_VecInput* in) {_vecInputs.push_back(in);} 513 | void insert_vec_out(SbPDG_VecOutput* out) {_vecOutputs.push_back(out);} 514 | 515 | SbPDG_VecInput* vec_in(int i) {return _vecInputs[i];} 516 | SbPDG_VecOutput* vec_out(int i) {return _vecOutputs[i];} 517 | void sort_vec_in() { 518 | sort(_vecInputs.begin(), _vecInputs.end(),[](SbPDG_VecInput*& left, SbPDG_VecInput*& right){ 519 | return left->num_inputs() > right->num_inputs(); 520 | }); 521 | } 522 | 523 | void sort_vec_out() { 524 | sort(_vecOutputs.begin(), _vecOutputs.end(),[](SbPDG_VecOutput*& left, SbPDG_VecOutput*& right){ 525 | return left->num_outputs() > right->num_outputs(); 526 | }); 527 | } 528 | void compute(bool print, bool verif); 529 | 530 | private: 531 | std::vector _nodes; 532 | 533 | //redundant storage: 534 | std::vector _insts; 535 | std::vector _inputs; 536 | std::vector _outputs; 537 | 538 | std::vector _orderedInsts; 539 | 540 | 541 | std::vector _vecInputs; 542 | std::vector _vecOutputs; 543 | 544 | std::vector _edges; 545 | 546 | int span; 547 | int work; 548 | }; 549 | 550 | #endif 551 | -------------------------------------------------------------------------------- /workloads/diannao/Makefile: -------------------------------------------------------------------------------- 1 | ifndef SS_TOOLS 2 | $(error SS_TOOLS is undefined) 3 | endif 4 | 5 | CPP=g++ 6 | 7 | OPT?=-O3 8 | CFLAGS=$(OPT) --std=c++11 -g -ggdb -gdwarf-3 9 | MODULE := conv1p conv2p conv3p conv4p pool1p pool3p pool5p class1p class3p conv5 conv5p conv1sb conv2sb conv3sb conv4sb class1sb class3sb 10 | #SRC := $(MODULE:=.cpp) 11 | #OBJ := $(MODULE:=.o) 12 | 13 | .PHONY: all clean 14 | 15 | INCLUDES = -I$(SS_TOOLS)/include/softbrain-lib 16 | LIBS = $(INCLUDES) -L$(SS_TOOLS)/lib -lsoftbrain-emu 17 | 18 | 19 | all: $(MODULE) 20 | 21 | HEADERS=dnn.hpp 22 | 23 | 24 | CONV_DFGS=red32to1sig.dfg red16to1sig.dfg red8to1sig.dfg 25 | CONV_DFG_HEADERS=$(CONV_DFGS:.dfg=.h) 26 | 27 | CLASS_DFGS=red32to1sig.dfg 28 | CLASS_DFG_HEADERS=$(CLASS_DFGS:.dfg=.h) 29 | 30 | #pool2x2avg.dfg test.dfg pool_simple.dfg 31 | POOL_DFGS=pool4x4l2avg.dfg pool2x2l4avg.dfg 32 | POOL_DFG_HEADERS=$(POOL_DFGS:.dfg=.h) 33 | 34 | DFG_HEADERS=$(sort $(CLASS_DFG_HEADERS) $(POOL_DFG_HEADERS) $(CONV_DFG_HEADERS)) 35 | 36 | SB_CONFIG=$(SS_TOOLS)/configs/diannao_simd64.sbmodel 37 | #SB_CONFIG=$(SS_TOOLS)/../softbrain-compiler/softbrain-config/configs/diannao_simd64_half.sbmodel 38 | 39 | 40 | $(DFG_HEADERS): %.h: %.dfg 41 | $(SS_TOOLS)/bin/sb_dfg_emu $(SB_CONFIG) $< 42 | #$(SS_TOOLS)/bin/sb_sched $(SB_CONFIG) $< 43 | 44 | #conv1 Nx=500, Ny=375, Kx=9, Ky=9, Ni=32, No=48, priv=False 45 | #conv2 Nx=200, Ny=200, Kx=18, Ky=18, Ni=8, No=8, priv=True 46 | #conv3 Nx=32, Ny=32, Kx=4, Ky=4, Ni=108, No=200, priv=False 47 | #conv4 Nx=32, Ny=32, Kx=7, Ky=7, Ni=16, No=512, priv=False 48 | #conv5 Nx=256, Ny=256, Kx=11, Ky=11, Ni=256, No=384, priv=True 49 | # 50 | #pool1 Nx=492, Ny=367, Kx=2, Ky=2, Ni=12, 51 | #pool3 Nx=32, Ny=32, Kx=4, Ky=4, Ni=100, 52 | #pool5 Nx=256, Ny=256, Kx=2, Ky=2, Ni=256, 53 | # 54 | #class1 Ni=200, No=100, 55 | #class3 Ni=960, No=20, 56 | 57 | #padded versions 58 | 59 | TF=-DTn=16 -DTi=16 -DTii=32 -DTnn=32 -DTx=16 -DTy=16 60 | 61 | #Tii not in convolution 62 | conv1p: convolution.cpp $(HEADERS) 63 | $(CPP) $^ $(LIBS) $(CFLAGS) $(TF) -o $@ -DNx=10 -DNy=25 -DKx=9 -DKy=9 -DNi=32 -DNn=64 -DSHARED=1 -DTnn=64 -DTn=64 -DTx=10 -DTy=25 -DTi=32 64 | #$(CPP) $^ $(LIB) $(CFLAGS) $(TF) -o $@ -DNx=500 -DNy=375 -DKx=9 -DKy=9 -DNi=32 -DNn=64 -DSHARED=1 -DTnn=64 -DTn=32 -DTx=10 -DTy=25 -DTi=32 65 | 66 | conv2p: convolution.cpp $(HEADERS) 67 | $(CPP) $^ $(LIBS) $(CFLAGS) $(TF) -o $@ -DNx=16 -DNy=16 -DKx=18 -DKy=18 -DNi=8 -DNn=8 -DSHARED=0 -DTii=8 -DTi=8 -DTnn=8 -DTn=8 -DTx=4 -DTy=16 68 | #$(CPP) $^ $(LIB) $(CFLAGS) $(TF) -o $@ -DNx=200 -DNy=200 -DKx=18 -DKy=18 -DNi=16 -DNn=16 -DSHARED=0 -DTii=16 -DTi=16 -DTnn=16 -DTn=16 -DTx=4 -DTy=20 69 | 70 | conv3p: convolution.cpp $(HEADERS) 71 | $(CPP) $^ $(LIB) $(CFLAGS) $(TF) -o $@ -DNx=32 -DNy=32 -DKx=4 -DKy=4 -DNi=128 -DNn=224 -DSHARED=1 -DTi=64 -DTx=4 72 | 73 | conv4p: convolution.cpp $(HEADERS) 74 | $(CPP) $^ $(LIB) $(CFLAGS) $(TF) -o $@ -DNx=32 -DNy=32 -DKx=7 -DKy=7 -DNi=16 -DNn=512 -DSHARED=1 -DTnn=64 -DTn=64 -DTx=8 75 | 76 | conv5p: convolution.cpp $(HEADERS) 77 | #$(CPP) $^ $(LIB) $(CFLAGS) $(TF) -o $@ -DNx=8 -DNy=8 -DKx=11 -DKy=11 -DNi=256 -DNn=384 -DSHARED=0 -DTnn=64 -DTi=64 -DTx=8 -DTy=8 78 | $(CPP) $^ $(LIBS) $(CFLAGS) $(TF) -o $@ -DNx=2 -DNy=2 -DKx=11 -DKy=11 -DNi=256 -DNn=64 -DSHARED=0 -DTnn=64 -DTi=32 -DTx=2 -DTy=2 -DTn=64 -DTii=32 -DTnn=64 -DSB 79 | # $(CPP) $^ $(LIB) $(CFLAGS) $(TF) -o $@ -DNx=256 -DNy=256 -DKx=11 -DKy=11 -DNi=256 -DNn=384 -DSHARED=0 -DTnn=64 -DTi=64 -DTx=8 -DTy=8 80 | 81 | pool1p: pooling.cpp $(HEADERS) 82 | $(CPP) $^ $(LIB) $(CFLAGS) $(TF) -o $@ -DNx=492 -DNy=368 -DKx=2 -DKy=2 -DNi=16 -DTii=16 -DTx=2 -DTy=2 83 | 84 | pool3p: pooling.cpp $(HEADERS) 85 | $(CPP) $^ $(LIB) $(CFLAGS) $(TF) -o $@ -DNx=32 -DNy=32 -DKx=4 -DKy=4 -DNi=128 -DTii=64 -DTi=32 -DTx=2 -DTy=2 86 | 87 | pool5p: pooling.cpp $(HEADERS) 88 | $(CPP) $^ $(LIB) $(CFLAGS) $(TF) -o $@ -DNx=256 -DNy=256 -DKx=2 -DKy=2 -DNi=256 -DTii=256 -DTi=32 -DTx=4 -DTy=4 89 | 90 | class1p: classifier.cpp $(HEADERS) 91 | $(CPP) $^ $(LIB) $(CFLAGS) $(TF) -o $@ -DNi=960 -DNn=32 -DTii=192 -DTi=192 -DTx=2 -DTy=4 92 | 93 | 94 | class3p: classifier.cpp $(HEADERS) 95 | $(CPP) $^ $(LIB) $(CFLAGS) $(TF) -o $@ -DNi=224 -DNn=128 -DTii=32 -DTi=32 -DTx=2 -DTy=4 96 | 97 | 98 | # --------------------------------------------------------------------------------------------------------- 99 | 100 | conv1sb: convolution.cpp $(CONV_DFG_HEADERS) $(HEADERS) 101 | $(CPP) $^ $(LIBS) $(CFLAGS) $(TF) -o $@ -DNx=10 -DNy=25 -DKx=9 -DKy=9 -DNi=32 -DNn=64 -DSHARED=1 -DTnn=64 -DTn=64 -DTx=10 -DTy=25 -DTi=32 -DSB 102 | # $(CPP) $^ $(LIBS) $(CFLAGS) $(TF) -o $@ -DNx=500 -DNy=375 -DKx=9 -DKy=9 -DNi=32 -DNn=64 -DSHARED=1 -DTnn=64 -DTn=32 -DTx=10 -DTy=25 -DTi=32 103 | 104 | 105 | conv2sb: convolution.cpp $(CONV_DFG_HEADERS) $(HEADERS) 106 | $(CPP) $^ $(LIBS) $(CFLAGS) $(TF) -o $@ -DNx=16 -DNy=16 -DKx=18 -DKy=18 -DNi=8 -DNn=8 -DSHARED=0 -DTii=8 -DTi=8 -DTnn=8 -DTn=8 -DTx=4 -DTy=16 -DSB 107 | # $(CPP) $^ $(LIBS) $(CFLAGS) $(TF) -o $@ -DNx=200 -DNy=200 -DKx=18 -DKy=18 -DNi=16 -DNn=16 -DSHARED=0 -DTii=16 -DTi=16 -DTnn=16 -DTn=16 -DTx=4 -DTy=20 108 | 109 | 110 | conv3sb: convolution.cpp $(CONV_DFG_HEADERS) $(HEADERS) 111 | $(CPP) $^ $(LIBS) $(CFLAGS) $(TF) -o $@ -DNx=4 -DNy=4 -DKx=4 -DKy=4 -DNi=128 -DNn=64 -DSHARED=1 -DTi=32 -DTx=4 -DTy=4 -DTn=64 -DTii=32 -DTnn=64 -DSB #Nx=32, Ny=32, Nn=224 112 | # $(CPP) $^ $(LIBS) $(CFLAGS) $(TF) -o $@ -DNx=32 -DNy=32 -DKx=4 -DKy=4 -DNi=128 -DNn=224 -DSHARED=1 -DTi=32 -DTx=4 -DTy=4 -DSB 113 | 114 | 115 | conv0sb: convolution.cpp $(CONV_DFG_HEADERS) $(HEADERS) 116 | $(CPP) $^ $(LIBS) $(CFLAGS) $(TF) -o $@ -DNx=2 -DNy=2 -DKx=2 -DKy=2 -DNi=32 -DNn=32 -DSHARED=1 -DTi=32 -DTx=1 -DTy=1 -DTn=32 -DTii=32 -DTnn=32 -DSB 117 | 118 | 119 | conv4sb: convolution.cpp $(CONV_DFG_HEADERS) $(HEADERS) 120 | $(CPP) $^ $(LIBS) $(CFLAGS) $(TF) -o $@ -DNx=8 -DNy=8 -DKx=7 -DKy=7 -DNi=16 -DNn=512 -DSHARED=1 -DTnn=64 -DTn=64 -DTx=8 -DTy=8 -DTii=16 -DSB 121 | # $(CPP) $^ $(LIBS) $(CFLAGS) $(TF) -o $@ -DNx=32 -DNy=32 -DKx=7 -DKy=7 -DNi=16 -DNn=512 -DSHARED=1 -DTnn=64 -DTn=64 -DTx=8 -DSB 122 | 123 | 124 | conv5sb: convolution.cpp $(CONV_DFG_HEADERS) $(HEADERS) 125 | $(CPP) $^ $(LIBS) $(CFLAGS) $(TF) -o $@ -DNx=2 -DNy=2 -DKx=11 -DKy=11 -DNi=256 -DNn=64 -DSHARED=0 -DTnn=64 -DTi=32 -DTx=2 -DTy=2 -DTn=64 -DTii=32 -DTnn=64 -DSB 126 | # $(CPP) $^ $(LIBS) $(CFLAGS) $(TF) -o $@ -DNx=8 -DNy=8 -DKx=11 -DKy=11 -DNi=256 -DNn=384 -DSHARED=0 -DTnn=64 -DTi=64 -DTx=8 -DTy=8 -DSB 127 | 128 | 129 | 130 | 131 | pool1sb: pooling.cpp $(POOL_DFG_HEADERS) $(HEADERS) 132 | $(CPP) $^ $(LIBS) $(CFLAGS) $(TF) -o $@ -DNx=492 -DNy=368 -DKx=2 -DKy=2 -DNi=16 -DTii=16 -DTx=2 -DTy=2 -DSB 133 | #$(CPP) $^ $(LIBS) $(CFLAGS) $(TF) -o $@ -DNx=512 -DNy=368 -DKx=2 -DKy=2 -DNi=16 -DTii=16 -DTx=16 -DTy=16 -DSB 134 | 135 | pool3sb: pooling.cpp $(POOL_DFG_HEADERS) $(HEADERS) 136 | $(CPP) $^ $(LIBS) $(CFLAGS) $(TF) -o $@ -DNx=32 -DNy=32 -DKx=4 -DKy=4 -DNi=128 -DTii=64 -DTi=32 -DTx=16 -DTy=4 -DSB 137 | 138 | pool5sb: pooling.cpp $(HEADERS) $(POOL_DFG_HEADERS) 139 | $(CPP) $^ $(LIBS) $(CFLAGS) $(TF) -o $@ -DNx=256 -DNy=256 -DKx=2 -DKy=2 -DNi=256 -DTii=256 -DTi=32 -DTx=16 -DTy=8 -DSB 140 | 141 | class1sb: classifier.cpp $(CLASS_DFG_HEADERS) $(HEADERS) 142 | $(CPP) $^ $(LIBS) $(CFLAGS) $(TF) -o $@ -DNi=960 -DNn=20 -DTii=32 -DTi=32 -DTx=2 -DTy=4 -DSB 143 | 144 | class3sb: classifier.cpp $(CLASS_DFG_HEADERS) $(HEADERS) 145 | $(CPP) $^ $(LIBS) $(CFLAGS) $(TF) -o $@ -DNi=224 -DNn=128 -DTii=32 -DTi=32 -DTx=2 -DTy=4 -DSB 146 | 147 | 148 | # -------------------------------------------------------------------------------------------------------------------------------- 149 | 150 | conv1: convolution.cpp $(HEADERS) 151 | $(CPP) $^ $(LIB) $(CFLAGS) -o $@ -DNx=500 -DNy=375 -DKx=9 -DKy=9 -DNi=32 -DNn=48 -DSHARED=1 -DTnn=64 -DTn=32 -DTx=10 -DTy=25 -DTi=32 152 | 153 | conv2: convolution.cpp $(HEADERS) 154 | $(CPP) $^ $(LIB) $(CFLAGS) -o $@ -DNx=200 -DNy=200 -DKx=18 -DKy=18 -DNi=8 -DNn=8 -DSHARED=0 -DTnn=8 -DTn=8 -DTi=8 -DTx=16 -DTy=16 155 | 156 | conv3: convolution.cpp $(HEADERS) 157 | $(CPP) $^ $(LIB) $(CFLAGS) -o $@ -DNx=32 -DNy=32 -DKx=4 -DKy=4 -DNi=108 -DNn=200 -DSHARED=1 -DTnn=20 -DTn=10 -DTi=20 -DTx=16 -DTy=16 158 | 159 | conv4: convolution.cpp $(HEADERS) 160 | $(CPP) $^ $(LIB) $(CFLAGS) -o $@ -DNx=32 -DNy=32 -DKx=7 -DKy=7 -DNi=16 -DNn=512 -DSHARED=1 -DTnn=64 -DTn=16 -DTi=16 -DTx=16 -DTy=16 161 | 162 | conv5: convolution.cpp $(HEADERS) 163 | $(CPP) $^ $(LIB) $(CFLAGS) -o $@ -DNx=8 -DNy=8 -DKx=11 -DKy=11 -DNi=256 -DNn=384 -DSHARED=0 -DTnn=64 -DTn=16 -DTi=16 -DTx=16 -DTy=16 164 | # $(CPP) $^ $(LIB) $(CFLAGS) -o $@ -DNx=256 -DNy=256 -DKx=11 -DKy=11 -DNi=256 -DNn=384 -DSHARED=0 -DTnn=64 -DTn=16 -DTi=16 -DTx=16 -DTy=16 165 | 166 | 167 | pool1: pooling.cpp $(HEADERS) 168 | $(CPP) $^ $(LIB) $(CFLAGS) -o $@ -DNx=492 -DNy=367 -DKx=2 -DKy=2 -DNi=12 -DTii=12 -DTi=12 -DTx=16 -DTy=16 169 | 170 | pool3: pooling.cpp $(HEADERS) 171 | $(CPP) $^ $(LIB) $(CFLAGS) -o $@ -DNx=32 -DNy=32 -DKx=4 -DKy=4 -DNi=100 -DTii=50 -DTi=25 -DTx=16 -DTy=16 172 | 173 | pool5: pooling.cpp $(HEADERS) 174 | $(CPP) $^ $(LIB) $(CFLAGS) -o $@ -DNx=256 -DNy=256 -DKx=2 -DKy=2 -DNi=256 -DTii=64 -DTi=16 -DTx=16 -DTy=16 175 | 176 | 177 | class1: classifier.cpp $(HEADERS) 178 | $(CPP) $^ $(LIB) $(CFLAGS) -o $@ -DNi=960 -DNn=20 -DTii=80 -DTi=20 -DTnn=20 -DTn=20 179 | 180 | 181 | class3: classifier.cpp $(HEADERS) 182 | $(CPP) $^ $(LIB) $(CFLAGS) -o $@ -DNi=200 -DNn=100 -DTii=40 -DTi=20 -DTnn=40 -DTn=20 183 | 184 | 185 | clean: 186 | @rm -f $(MODULE) convolution pooling classifier $(DFG_HEADERS) 187 | 188 | -------------------------------------------------------------------------------- /workloads/diannao/classifier.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include "dnn.hpp" 3 | #include "softbrain.hpp" 4 | #include 5 | 6 | #if SB 7 | #include "red32to1sig.h" 8 | #endif 9 | 10 | #include "sim_timing.h" 11 | 12 | 13 | 14 | using namespace std; 15 | 16 | // Problem Size 17 | //#define Nn 100 // Number of Output Layers 18 | //#define Ni 200 // Number of Input Layers 19 | 20 | #ifndef Nn 21 | #define Nn 128 // Number of Output Layers 22 | #define Ni 224 // Number of Input Layers 23 | #endif 24 | 25 | #ifndef Tii 26 | // Tiling Sizes 27 | #define Tnn 32 28 | #define Tii 32 29 | //#define Tn 5 30 | //#define Ti 25 31 | #define Tn 16 32 | #define Ti 16 33 | #endif 34 | 35 | //Arrays: 36 | VTYPE synapse[Nn][Ni] __attribute__((aligned(64))); 37 | VTYPE neuron_i[Ni] __attribute__((aligned(64))); 38 | VTYPE neuron_n[Nn] __attribute__((aligned(64))), neuron_n2[Nn] __attribute__((aligned(64))); 39 | 40 | 41 | int classifier_layer(VTYPE (&synapse)[Nn][Ni], VTYPE (&neuron_i)[Ni], VTYPE (&neuron_n)[Nn]) { 42 | int total_calc=0; 43 | for (int n = 0; n < Nn; n++) { 44 | VTYPE temp=0; 45 | for (int i = 0; i < Ni; i++) { 46 | temp += synapse[n][i] * neuron_i[i]; 47 | } 48 | neuron_n[n] = sigmoid(temp); 49 | } 50 | return total_calc; 51 | } 52 | 53 | #if SB 54 | 55 | // CGRA Pipe 56 | #define PIPEWIDTH 8 // adders at mouth of 1 CGRA pipe 57 | #define PIPEDEPTH 32 // approx. depth of CGRA pipeline 58 | int classifier_layer_sb(VTYPE (&synapse)[Nn][Ni], VTYPE (&neuron_i)[Ni], VTYPE (&neuron_n)[Nn]) { 59 | // Fits in scratchpad? (should be true for our benches) 60 | if(Ni > SCRATCHSIZE){ 61 | cout << "Error: inputs do not fit in scratch for classifier layer" << endl; 62 | return -1; 63 | } 64 | 65 | // Handle class3, Nn = 32 -- not sure what this does 66 | int pipedepth = PIPEDEPTH; 67 | if(Nn < PIPEDEPTH){ 68 | pipedepth = Nn; 69 | } 70 | 71 | // Stream in CGRA config (do this somewhere else?) 72 | SB_CONFIG(red32to1sig_config, red32to1sig_size); 73 | 74 | // Stream in inputs to scratch 75 | SB_DMA_SCRATCH_LOAD(&neuron_i, sizeof(VTYPE)*4, sizeof(VTYPE)*4, Ni/4, 0); 76 | SB_WAIT_ALL(); 77 | 78 | for(int n = 0; n < Nn; n += pipedepth){ 79 | SB_CONST(P_red32to1sig_acc, 0, pipedepth); 80 | 81 | for(int i = 0; i < Ni; i+= PIPEWIDTH*4){ 82 | // Enable sigmoid on final itr 83 | if(i + PIPEWIDTH*4 < Ni){ 84 | SB_CONST(P_red32to1sig_pred, 0, pipedepth); 85 | SB_RECURRENCE(P_red32to1sig_out, P_red32to1sig_acc, pipedepth); 86 | } else { 87 | SB_CONST(P_red32to1sig_pred, 1, pipedepth); 88 | } 89 | 90 | SB_DMA_READ(&synapse[n][i], sizeof(VTYPE)*Ni, 4*sizeof(VTYPE)*PIPEWIDTH, pipedepth, P_red32to1sig_S); //Read Synapses 91 | SB_SCR_PORT_STREAM(i*sizeof(VTYPE), 0, 4*sizeof(VTYPE)*PIPEWIDTH, pipedepth, P_red32to1sig_N); //Read Neurons 92 | } 93 | 94 | // write completed outputs out to memory 95 | SB_DMA_WRITE_SHF16(P_red32to1sig_out, 4*sizeof(VTYPE), 4*sizeof(VTYPE), pipedepth/4, &neuron_n[n]); 96 | } 97 | 98 | SB_WAIT_ALL(); 99 | 100 | return 0; 101 | } 102 | #endif 103 | 104 | void fill_classifier(VTYPE (&synapse)[Nn][Ni], VTYPE (&neuron_i)[Ni], 105 | VTYPE (&neuron_n)[Nn], VTYPE (&neuron_n2)[Nn]) { 106 | for(int n = 0; n < Nn; ++n) { 107 | for(int i = 0; i < Ni; ++i) { 108 | synapse[n][i] = rand()%(1+n/4); //n*Ni+i; 109 | } 110 | } 111 | for(int i = 0; i < Ni; ++i) { 112 | neuron_i[i] = rand()%16; //i; 113 | } 114 | for(int n = 0; n < Nn; ++n) { 115 | neuron_n[n] = 0; //i; 116 | neuron_n2[n] = 0; //i; 117 | } 118 | } 119 | 120 | int classifier_layer_blocked(VTYPE (&synapse)[Nn][Ni], VTYPE (&neuron_i)[Ni], 121 | VTYPE (&neuron_n)[Nn]) { 122 | int total_calc=0; 123 | VTYPE sum[Nn]={0}; 124 | for (int nnn = 0; nnn < Nn; nnn += Tnn) { // tiling for output neurons; 125 | for (int iii = 0; iii < Ni; iii += Tii) { // tiling for input neurons; 126 | for (int nn = nnn; nn < nnn + Tnn; nn += Tn) { 127 | for (int ii = iii; ii < iii + Tii; ii += Ti) { 128 | //total_calc++; 129 | 130 | // — Original code — 131 | for (int n = nn; n < nn + Tn; n++) { 132 | VTYPE sum_sc=0; 133 | for (int i = ii; i < ii + Ti; i++) { 134 | sum_sc += (synapse[n][i] * neuron_i[i]); 135 | //sum_sc += synapse[n][i] * i; 136 | } 137 | sum[n]+=sum_sc; 138 | } 139 | } 140 | } 141 | } 142 | for (int nn = nnn; nn < nnn + Tnn; nn++) { 143 | neuron_n[nn] = sigmoid(sum[nn]); 144 | } 145 | } 146 | return total_calc; 147 | } 148 | 149 | int main(int argc, char** argv) { 150 | fill_classifier(synapse,neuron_i,neuron_n,neuron_n2); 151 | 152 | if(argc==3) { 153 | 154 | } else if(argc==2) { 155 | begin_roi(); 156 | #ifdef SB 157 | int calc = classifier_layer_sb(synapse,neuron_i,neuron_n); 158 | #else 159 | int calc = classifier_layer_blocked(synapse,neuron_i,neuron_n); 160 | #endif 161 | end_roi(); 162 | 163 | if(calc > 0) { 164 | cout << "calc: " << calc << "\n"; 165 | } 166 | //cout << "Perf Run Complete\n"; 167 | } else { 168 | int calc = classifier_layer(synapse,neuron_i,neuron_n); 169 | 170 | begin_roi(); 171 | #ifdef SB 172 | int calc2 = classifier_layer_sb(synapse,neuron_i,neuron_n2); 173 | #else 174 | int calc2 = classifier_layer_blocked(synapse,neuron_i,neuron_n2); 175 | #endif 176 | end_roi(); 177 | 178 | cout << "C1: " << calc << " C2: " << calc2 << "\n"; 179 | 180 | compare(neuron_n,neuron_n2,Nn); 181 | 182 | cout << "mults: " << Nn*Ni << " sigmoids: " << Nn << "\n"; 183 | } 184 | sb_stats(); 185 | } 186 | 187 | -------------------------------------------------------------------------------- /workloads/diannao/convolution_old.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include "dnn.hpp" 4 | 5 | using namespace std; 6 | 7 | #ifndef SHARED 8 | #define SHARED 1 9 | #endif 10 | 11 | #ifndef Ny 12 | //Problem Size 13 | #define Ny 32 14 | #define Ny 32 15 | 16 | #define Kx 4 17 | #define Ky 4 18 | //#define Ni 108 19 | //#define Nn 200 20 | 21 | #define Ni 112 22 | #define Nn 224 23 | #endif 24 | 25 | //slide increment 26 | #ifndef Sy 27 | #define Sy 1 28 | #define Sx 1 29 | #endif 30 | 31 | #ifndef Tnn 32 | //Tiling Sizes 33 | #define Tnn 32 34 | //#define Tn 25 35 | //#define Ti 16 36 | #define Tn 16 37 | #define Ti 16 38 | 39 | #define Ty 8 40 | #define Tx 8 41 | #endif 42 | 43 | #define NYPAD (Ny+Ky) 44 | #define NXPAD (Ny+Kx) 45 | 46 | #define NYSCL (Ny/Sy) 47 | #define NXSCL (Ny/Sx) 48 | 49 | 50 | //Arrays: 51 | #if SHARED == 1 52 | #define SYNAPSE_SIZE (1L*Ky*Kx*Nn*Ni) 53 | #else 54 | #define SYNAPSE_SIZE (1L*NYSCL*NXSCL*Ky*Kx*Nn*Ni) 55 | #endif 56 | 57 | #if SHARED == 1 58 | VTYPE (*synapse)[Ky][Kx][Nn][Ni]; 59 | #else 60 | VTYPE (*synapse)[NYSCL][NXSCL][Ky][Kx][Nn][Ni]; 61 | #endif 62 | 63 | //VTYPE neuron_i[NYPAD][NXPAD][Ni]; 64 | //VTYPE neuron_n[NYSCL][NXSCL][Nn]={0}, neuron_n2[NYSCL][NXSCL][Nn]={0}; 65 | 66 | VTYPE (*neuron_i)[NYPAD][NXPAD][Ni]; 67 | VTYPE (*neuron_n)[NYSCL][NXSCL][Nn]; 68 | VTYPE (*neuron_n2)[NYSCL][NXSCL][Nn]; 69 | 70 | 71 | void fill_convolution_shared_simple(VTYPE (&synapse)[Ky][Kx][Nn][Ni], 72 | VTYPE (&neuron_i)[NYPAD][NXPAD][Ni]) { 73 | for(int yy = 0; yy < Ky; ++yy) { 74 | for(int xx = 0; xx < Kx; ++xx) { 75 | for(int nn = 0; nn < Nn; ++nn) { 76 | for(int ni = 0; ni < Ni; ++ni) { 77 | synapse[yy][xx][nn][ni] = 2; 78 | } } } } 79 | for(int yy = 0; yy < NYPAD; ++yy) { 80 | for(int xx = 0; xx < NXPAD; ++xx) { 81 | for(int ni = 0; ni < Ni; ++ni) { 82 | neuron_i[yy][xx][ni] = 1; 83 | } } } 84 | 85 | } 86 | 87 | void fill_convolution_private(VTYPE (&synapse)[NYSCL][NXSCL][Ky][Kx][Nn][Ni], 88 | VTYPE (&neuron_i)[NYPAD][NXPAD][Ni]) { 89 | for(int yout = 0; yout < NYSCL; ++yout) { 90 | for(int xout = 0; xout < NXSCL; ++xout) { 91 | for(int yy = 0; yy < Ky; ++yy) { 92 | for(int xx = 0; xx < Kx; ++xx) { 93 | for(int nn = 0; nn < Nn; ++nn) { 94 | for(int ni = 0; ni < Ni; ++ni) { 95 | synapse[xout][yout][yy][xx][nn][ni] = 2; 96 | } } } } } } 97 | for(int yy = 0; yy < NYPAD; ++yy) { 98 | for(int xx = 0; xx < NXPAD; ++xx) { 99 | for(int ni = 0; ni < Ni; ++ni) { 100 | neuron_i[yy][xx][ni] = 1; 101 | } } } 102 | 103 | } 104 | 105 | 106 | void fill_convolution_shared(VTYPE (&synapse)[Ky][Kx][Nn][Ni], 107 | VTYPE (&neuron_i)[NYPAD][NXPAD][Ni]) { 108 | int total1=0,total2=0; 109 | for(int yy = 0; yy < Ky; ++yy) { 110 | for(int xx = 0; xx < Kx; ++xx) { 111 | for(int nn = 0; nn < Nn; ++nn) { 112 | for(int ni = 0; ni < Ni; ++ni) { 113 | synapse[yy][xx][nn][ni] = total1; 114 | total1+=1; 115 | } 116 | } 117 | } 118 | } 119 | for(int yy = 0; yy < NYPAD; ++yy) { 120 | for(int xx = 0; xx < NXPAD; ++xx) { 121 | for(int ni = 0; ni < Ni; ++ni) { 122 | neuron_i[yy][xx][ni] = total2; 123 | total2+=2; 124 | } 125 | } 126 | } 127 | } 128 | 129 | 130 | 131 | std::pair convolution_layer_blocked( 132 | #if SHARED == 1 133 | VTYPE (&synapse)[Ky][Kx][Nn][Ni], 134 | #else 135 | VTYPE (&synapse)[NYSCL][NXSCL][Ky][Kx][Nn][Ni], 136 | #endif 137 | VTYPE (&neuron_i)[NYPAD][NXPAD][Ni], 138 | VTYPE (&neuron_n)[NYSCL][NXSCL][Nn]) { 139 | int c1=0,c2=0; 140 | VTYPE sum[Nn]={0}; 141 | 142 | for (int yy = 0; yy < Ny; yy += Ty) { 143 | for (int xx = 0; xx < Ny; xx += Tx) { 144 | for (int nnn = 0; nnn < Nn; nnn += Tnn) { 145 | int yout = yy/Sy; 146 | for (int y = yy; y < yy + Ty; y += Sy) { // tiling for y; 147 | int xout = xx/Sx; 148 | 149 | for (int x = xx; x < xx + Tx; x += Sx) { // tiling for x; 150 | 151 | //LOAD_SCRATCH -- read cube from larger 3D cube to compact 3D cube 152 | //start_addr: neuron[y][x][i] 153 | //stride_size: Ni*Kx 154 | //stride: Ni*Nx 155 | //num_strides: ky 156 | 157 | for (int nn = nnn; nn < nnn + Tnn; nn += Tn) { 158 | for (int n = nn; n < nn + Tn; n++) { 159 | sum[n] = 0; 160 | } 161 | 162 | for (int ky = 0; ky < Ky; ky++) { // sliding window; 163 | for (int kx = 0; kx < Kx; kx++) { 164 | 165 | int ii = 0; 166 | VTYPE sum_sc; 167 | 168 | for (; ii < Ni -Ti+1; ii += Ti) { 169 | //SCRATCH -> PORT 170 | // addr: neuron_i[ky + y][kx + x][ii] 171 | // stride_len: Tn*2 172 | // num_strides: 8 173 | 174 | //DMA -> PORT 175 | // addr: synapse[ky][kx][n][ii] 176 | // stride_len: Tn*2 177 | // stride_dist: 178 | // num_strides:8 179 | 180 | //***** 181 | for (int n = nn; n < nn + Tn; n++) { 182 | sum_sc=0; 183 | for (int i = ii; i < ii + Ti; i++) { 184 | #if SHARED == 1 // version with shared kernels 185 | VTYPE sv = synapse[ky][kx][n][i]; 186 | VTYPE nv = neuron_i[ky + y][kx + x][i]; 187 | #else // version with private kernels 188 | VTYPE sv = synapse[yout][xout][ky][kx][n][i]; 189 | VTYPE nv = neuron_i[ky + y][kx + x][i]; 190 | #endif 191 | sum_sc+=(sv*nv)>>1; 192 | } 193 | sum[n]+=sum_sc; 194 | } 195 | //**** 196 | } 197 | } 198 | } 199 | 200 | //sigmoid 201 | for (int n = nn; n < nn + Tn; n++) { 202 | neuron_n[yout][xout][n] = sigmoid(sum[n]); 203 | //c2++; 204 | } 205 | } 206 | xout++; 207 | } 208 | yout++; 209 | } 210 | } 211 | } 212 | } 213 | return make_pair(c1,c2); 214 | } 215 | 216 | /* 217 | * MM convolution layer implemented for softbrain 218 | * Sb config constants (SCRATCHSIZE, NUMPIPES, etc) in softbrain.hh 219 | * Code assumes NUMPIPES = 2 220 | * Safe because inputs % PIPEDEPTH = 0 (for PIPEDEPTH = 32) 221 | * This version does not tile inputs. If the inputs don't fit in the 222 | * scratchpad, it invokes convolution_layer_sb_tiled 223 | * NOTE THE CHANGED ARRAY DIMS FOR SYNAPSE 224 | */ 225 | int convolution_layer_sb( 226 | #if SHARED == 1 227 | VTYPE (&synapse)[Ky][Kx][Nn][Ni], // MM CHANGED ORDERING OF ALL INPUTS 228 | #else 229 | VTYPE (&synapse)[NYSCL][NXSCL][Ky][Kx][Nn][Ni], 230 | #endif 231 | VTYPE (&neuron_i)[NYPAD][NXPAD][Ni], 232 | VTYPE (&neuron_n)[NYSCL][NXSCL][Nn]) { 233 | 234 | // Stream in CGRA config 235 | int *cgra_config; 236 | int cgra_cofig_sz; 237 | DMA_SB_CONFIG(cgra_config, cgra_config_sz); 238 | 239 | if(Kx * Ky * Ni > SCRATCHSIZE){ // input neurons don't fit in scratch 240 | convolution_layer_sb_tiled(synapse, neuron_i, neuron_n); // call tiled version 241 | } 242 | 243 | VTYPE neuron_i_scratch[Ky][Kx][Ni]; 244 | &neuron_i_scratch[0][0][0] = SCRATCHSTART; 245 | int yout = 0; 246 | for (int y = 0; y < Ny; y += Sy) { 247 | int xout = 0; 248 | for (int x = 0; x < Nx; x += Sx) { 249 | IC_DMA_SCRATCH_LOAD(&neuron_i[y][x][0], sizeof(VTYPE) * Ni * Nx, sizeof(VTYPE) * Ni * Kx, Ky, &neuron_i_scratch[0][0][0]); 250 | for(int n = 0; n < Nn; n += 2*PIPEDEPTH){ // each pipe does PIPEDEPTH output layers 251 | IC_CONST(INPUTPRED0, 0, Ni*Kx*Ky - 1); 252 | IC_CONST(INPUTPRED1, 0, Ni*Kx*Ky - 1); 253 | for(int ky = 0; ky < Ky; ++ky){ // Spin through windows... 254 | for(int kx = 0; kx < Kx; ++kx){ 255 | for(int i = 0; i < Ni; i+=PIPEWIDTH){ // ...and input layers 256 | for(int nn = 0; nn < PIPEDEPTH; ++nn){ // Both pipes get PIPEDEPTH copies of same neurons 257 | IC_SCRATCH_READ(&neuron_i_scratch[ky][kx][i], PIPEWIDTH, INPUTNEURON0); 258 | IC_SCRATCH_READ(&neuron_i_scratch[ky][kx][i], PIPEWIDTH, INPUTNEURON1); 259 | } 260 | 261 | // Each entry gets weights for different output layers 262 | #if SHARED == 1 263 | IC_DMA_READ((&synapse)[ky][kx][n][i], sizeof(VTYPE)*Ni, sizeof(VTYPE)*PIPEWIDTH, PIPEDEPTH, INPUTWEIGHT0); 264 | IC_DMA_READ((&synapse)[ky][kx][n+PIPEDEPTH][i], sizeof(VTYPE)*Ni, sizeof(VTYPE)*PIPEWIDTH, PIPEDEPTH, INPUTWEIGHT1); 265 | #else 266 | IC_DMA_READ((&synapse)[yout][xout][ky][kx][n][i], sizeof(VTYPE)*Ni, sizeof(VTYPE)*PIPEWIDTH, PIPEDEPTH, INPUTWEIGHT0); 267 | IC_DMA_READ((&synapse)[yout][xout][ky][kx][n+PIPEWIDTH][i], sizeof(VTYPE)*Ni, sizeof(VTYPE)*PIPEWIDTH, PIPEDEPTH, INPUTWEIGHT1); 268 | #endif 269 | 270 | if(ky + 1 < Ky || kx + 1 < Kx || i + PIPEWIDTH < Ni){ // don't recurse on last pass 271 | OC_RECURRENCE(OUTPUT0, INPUTACC0, PIPEDEPTH); // until input stack complete 272 | OC_RECURRENCE(OUTPUT1, INPUTACC1, PIPEDEPTH); 273 | } 274 | if((kx + 1 == Kx) && (ky + 1 == Ky) && (i + PIPEWIDTH >= Ni - 1)){ // sigmoid -- before last step of last tile 275 | IC_CONST(INPUTPRED0, 1, PIPEDEPTH); 276 | IC_CONST(INPUTPRED1, 1, PIPEDEPTH); 277 | } 278 | } 279 | } 280 | } 281 | // Write completed input stacks out to mem 282 | OC_DMA_WRITE(OUTPUT0, sizeof(VTPYE), sizeof(VTYPE), PIPEDEPTH, neuron_n[yout][xout][n]); 283 | OC_DMA_WRITE(OUTPUT1, sizeof(VTPYE), sizeof(VTYPE), PIPEDEPTH, neuron_n[yout][xout][n+PIPEDEPTH]); 284 | } 285 | xout++; 286 | } 287 | yout++; 288 | } 289 | 290 | return 0; 291 | } 292 | 293 | // The full input stack won't fit in the scratchpad at once, 294 | // so we must tile it into chunks that do. 295 | std::pair convolution_layer_sb_tiled( 296 | #if SHARED == 1 297 | VTYPE (&synapse)[Ky][Kx][Nn][Ni], 298 | #else 299 | VTYPE (&synapse)[NYSCL][NXSCL][Ky][Kx][Nn][Ni], 300 | #endif 301 | VTYPE (&neuron_i)[NYPAD][NYPAD][Ni], 302 | VTYPE (&neuron_n)[NYSCL][NXSCL][Nn]) { 303 | 304 | // Most places in the code, Ni is replaced with ti-excess (number of input layers in current tile) 305 | int ti = SCRATCHSIZE / (Kx*Ky); // tile size: # of input layers that fit in scratch, round down 306 | if(ti % PIPEWIDTH != 0){ // Make sure ti%pipewidth = 0 for easy chunking later 307 | ti = ti - (ti % PIPEWIDTH); 308 | } 309 | int excess; 310 | 311 | VTYPE neuron_i_scratch[Ky][Kx][ti]; 312 | neuron_i_scratch = SCRATCHSTART; 313 | 314 | // Outer tiled loop 315 | for(int ii = 0; ii < Ni - 1 + ti; ii += ti){ 316 | // deal with overflow (Ni%ti != 0) 317 | if(ii >= Ni){ 318 | excess = ii - (Ni - 1); 319 | } else { 320 | excess = 0; 321 | } 322 | int yout = 0; 323 | for (int y = 0; y < Ny; y += Sy) { 324 | int xout = 0; 325 | for (int x = 0; x < Nx; x += Sx) { 326 | IC_DMA_SCRATCH_LOAD(&neuron_i[y][x][ii], sizeof(VTYPE)*Ni*Nx, sizeof(VTYPE)*(ti-excess)*Kx, Ky, neuron_i_scratch); 327 | for(int n = 0; n < Nn; n += 2*PIPEDEPTH){ // each pipe does PIPEDEPTH output layers 328 | IC_CONST(INPUTPRED0, 0, (ti-excess)*Kx*Ky - 1); 329 | IC_CONST(INPUTPRED1, 0, (ti-excess)*Kx*Ky - 1); 330 | 331 | // If not first ii itr, load output acc. from memory 332 | if(ii != 0){ 333 | IC_DMA_READ(neruon_n[yout][xout][n], sizeof(VTYPE) * PIPEDEPTH, sizeof(VTYPE)*PIPEDEPTH, 1, INPUTACC0); 334 | IC_DMA_READ(neruon_n[yout][xout][n+PIPEDEPTH], sizeof(VTYPE) * PIPEDEPTH, sizeof(VTYPE)*PIPEDEPTH, 1, INPUTACC1); 335 | } 336 | 337 | for(int ky = 0; ky < Ky; ++ky){ // Spin through windows... 338 | for(int kx = 0; kx < Kx; ++kx){ 339 | for(int i = ii; i < ii+(ti-excess); i+=PIPEWIDTH){ // ...and input layers 340 | for(int nn = 0; nn < PIPEDEPTH; ++nn){ // Both pipes get PIPEDEPTH copies of same neurons 341 | IC_SCRATCH_READ(&neuron_i_scratch[ky][kx][i], PIPEWIDTH, INPUTNEURON0); 342 | IC_SCRATCH_READ(&neuron_i_scratch[ky][kx][i], PIPEWIDTH, INPUTNEURON1); 343 | } 344 | 345 | // Each entry gets weights for different output layers 346 | #if SHARED == 1 347 | IC_DMA_READ((&synapse)[ky][kx][n][i], sizeof(VTYPE)*Ni, sizeof(VTYPE)*PIPEWIDTH, PIPEDEPTH, INPUTWEIGHT0); 348 | IC_DMA_READ((&synapse)[ky][kx][n+PIPEDEPTH][i], sizeof(VTYPE)*Ni, sizeof(VTYPE)*PIPEWIDTH, PIPEDEPTH, INPUTWEIGHT1); 349 | #else 350 | IC_DMA_READ((&synapse)[yout][xout][ky][kx][n][i], sizeof(VTYPE)*Ni, sizeof(VTYPE)*PIPEWIDTH, PIPEDEPTH, INPUTWEIGHT0); 351 | IC_DMA_READ((&synapse)[yout][xout][ky][kx][n+PIPEWIDTH][i], sizeof(VTYPE)*Ni, 352 | sizeof(VTYPE)*PIPEWIDTH, PIPEDEPTH, INPUTWEIGHT1); 353 | #endif 354 | 355 | if(ky + 1 < Ky || kx + 1 < Kx || i + PIPEWIDTH < (ti-excess)){ // don't recurse on last pass 356 | OC_RECURRENCE(OUTPUT0, INPUTACC0, PIPEDEPTH); // until input stack complete 357 | OC_RECURRENCE(OUTPUT1, INPUTACC1, PIPEDEPTH); 358 | } 359 | if((kx + 1 == Kx) && (ky + 1 == Ky) && (i + PIPEWIDTH >= (ti-excess) - 1)){ // sigmoid -- before last step of last tile 360 | IC_CONST(INPUTPRED0, 1, PIPEDEPTH); 361 | IC_CONST(INPUTPRED1, 1, PIPEDEPTH); 362 | } 363 | } 364 | } 365 | } 366 | // Write partial output stacks out to mem 367 | OC_DMA_WRITE(OUTPUT0, sizeof(VTPYE), sizeof(VTYPE), PIPEDEPTH, neuron_n[yout][xout][n]); 368 | OC_DMA_WRITE(OUTPUT1, sizeof(VTPYE), sizeof(VTYPE), PIPEDEPTH, neuron_n[yout][xout][n+PIPEDEPTH]); 369 | } 370 | xout++; 371 | } 372 | yout++; 373 | } 374 | } 375 | 376 | return 0; 377 | } 378 | 379 | std::pair convolution_layer( 380 | #if SHARED == 1 381 | VTYPE (&synapse)[Ky][Kx][Nn][Ni], 382 | #else 383 | VTYPE (&synapse)[NYSCL][NXSCL][Ky][Kx][Nn][Ni], 384 | #endif 385 | VTYPE (&neuron_i)[NYPAD][NYPAD][Ni], 386 | VTYPE (&neuron_n)[NYSCL][NXSCL][Nn]) { 387 | int c1=0,c2=0; 388 | VTYPE sum[Nn]={0}; 389 | 390 | // — Original code — (excluding nn, ii loops) 391 | int yout = 0; 392 | for (int y = 0; y < Ny; y += Sy) { // tiling for y; 393 | int xout = 0; 394 | for (int x = 0; x < Ny; x += Sx) { // tiling for x; 395 | for (int nn = 0; nn < Nn; nn += Tn) { 396 | for (int n = nn; n < nn + Tn; n++) { 397 | sum[n]=0; 398 | } 399 | 400 | // sliding window; 401 | for (int ky = 0; ky < Ky; ky++) 402 | for (int kx = 0; kx < Kx; kx++) 403 | for (int n = nn; n < nn + Tn; n++) 404 | for (int i = 0; i < Ni; i++) { 405 | #if SHARED == 1 // version with shared kernels 406 | VTYPE sv = synapse[ky][kx][n][i]; 407 | VTYPE nv = neuron_i[ky + y][kx + x][i]; 408 | #else // version with private kernels 409 | VTYPE sv = synapse[yout][xout][ky][kx][n][i]; 410 | VTYPE nv = neuron_i[ky + y][kx + x][i]; 411 | #endif 412 | sum[n]+=(sv*nv)>>1; 413 | } 414 | //sigmoid 415 | for (int n = nn; n < nn + Tn; n++) { 416 | neuron_n[yout][xout][n] = sigmoid(sum[n]); 417 | c2++; 418 | } 419 | } 420 | xout++; 421 | } 422 | yout++; 423 | } 424 | return make_pair(c1,c2); 425 | } 426 | 427 | int main(const int argc, const char** argv) { 428 | 429 | #if SHARED == 1 430 | synapse = (VTYPE (*)[Ky][Kx][Nn][Ni]) malloc(SYNAPSE_SIZE*sizeof(VTYPE)); 431 | #else 432 | synapse = (VTYPE (*)[NYSCL][NXSCL][Ky][Kx][Nn][Ni]) malloc(SYNAPSE_SIZE*sizeof(VTYPE)); 433 | #endif 434 | 435 | neuron_i = (VTYPE (*)[NYPAD][NXPAD][Ni])malloc(NYPAD*NXPAD*Ni*sizeof(VTYPE)); 436 | neuron_n = (VTYPE (*)[NYSCL][NXSCL][Nn])malloc(NYSCL*NXSCL*Nn*sizeof(VTYPE)); 437 | neuron_n2 = (VTYPE (*)[NYSCL][NXSCL][Nn])malloc(NYSCL*NXSCL*Nn*sizeof(VTYPE)); 438 | 439 | #if SHARED == 1 440 | fill_convolution_shared_simple(*synapse,*neuron_i); 441 | #else 442 | fill_convolution_private(*synapse,*neuron_i); 443 | #endif 444 | 445 | begin_roi(); 446 | if(argc==3) { 447 | 448 | // } else if(argc==2 && string(argv[1])=="perf") { 449 | } else if(argc==2) { 450 | auto calc = convolution_layer_blocked(*synapse,*neuron_i,*neuron_n); 451 | //cout << "Perf Run Complete\n"; 452 | } else { 453 | cout << "argc: " << argc << "\n"; 454 | 455 | auto calc = convolution_layer_blocked(*synapse,*neuron_i,*neuron_n); 456 | auto calc2 = convolution_layer(*synapse,*neuron_i,*neuron_n2); 457 | if(calc.first!=0) { 458 | cout << "blocks=" << calc.first << "\n"; 459 | } 460 | compare((VTYPE*)*neuron_n,(VTYPE*)*neuron_n2,NYSCL*NXSCL*Nn); 461 | int n_outputs= Ny/Sy * Ny/Sx * Nn; 462 | cout << "mults: " << n_outputs*Ni*Kx*Ky << " sigmoids: " << n_outputs << "\n"; 463 | cout << "argc: " << argc << "\n"; 464 | } 465 | end_roi(); 466 | 467 | //cout << "mult-block: " << calc.first << " sigmoid-block: " << calc.second << "\n"; 468 | //cout << "mult-orig: " << calc2.first << " sigmoid-orig: " << calc2.second << "\n"; 469 | } 470 | 471 | -------------------------------------------------------------------------------- /workloads/diannao/dnn.hpp: -------------------------------------------------------------------------------- 1 | #ifndef DNN_H 2 | #define DNN_H 3 | 4 | #define M_REPEAT_4(X) X X X X 5 | #define M_REPEAT_8(X) M_REPEAT_4(X) M_REPEAT_4(X) 6 | #define M_REPEAT_16(X) M_REPEAT_8(X) M_REPEAT_8(X) 7 | #define M_REPEAT_32(X) M_REPEAT_16(X) M_REPEAT_16(X) 8 | 9 | #include 10 | 11 | //#define VTYPE uint32_t 12 | #define VTYPE uint16_t 13 | 14 | 15 | //static __inline__ uint64_t rdtsc(void) { 16 | // unsigned a, d; 17 | // //asm("cpuid"); 18 | // //asm volatile("rdtsc" : "=a" (a), "=d" (d)); 19 | // 20 | // return (((uint64_t)a) | (((uint64_t)d) << 32)); 21 | //} 22 | // 23 | //uint64_t ticks; 24 | //__attribute__ ((noinline)) void begin_roi() { 25 | // ticks=rdtsc(); 26 | //} 27 | //__attribute__ ((noinline)) void end_roi() { 28 | // ticks=(rdtsc()-ticks); 29 | // std::cout << "ticks: " << ticks << "\n"; 30 | //} 31 | 32 | //VTYPE a[16]; 33 | //VTYPE b[16]; 34 | // 35 | VTYPE sigmoid(VTYPE i) { 36 | // return a[i&0xF]*i+b[i&0xF]; 37 | return i*1024/(1024+i); 38 | return i; 39 | } 40 | 41 | 42 | 43 | void compare_short(VTYPE* neuron1, VTYPE* neuron2, int size) { 44 | bool error = false; 45 | for(int i = 0; i < size; ++i) { 46 | if(neuron1[i] != neuron2[i]) { 47 | printf("%d: %d %d\n",i,neuron1[i],neuron2[i]); 48 | error=true; 49 | //std::cout << i << " " << neuron1[i] << ":" << neuron2[i] << "\n"; 50 | } 51 | } 52 | if(error) { 53 | std::cout << "ERROR: Results DO NOT Match\n"; 54 | } else { 55 | std::cout << "Results Match\n"; 56 | } 57 | } 58 | 59 | void compare(VTYPE* neuron1, VTYPE* neuron2, int size) { 60 | bool error = false; 61 | for(int i = 0; i < size; ++i) { 62 | if(neuron1[i] != neuron2[i]) { 63 | error = true; 64 | break; 65 | } 66 | } 67 | if(error) { 68 | for(int i = 0; i < size; ++i) { 69 | std::cout << i << " " << neuron1[i] << ":" << neuron2[i];; 70 | if(neuron1[i] != neuron2[i]) { 71 | std::cout << " \t\tERROR"; 72 | } 73 | std::cout << "\n"; 74 | } 75 | } else { 76 | std::cout << "results match\n"; 77 | } 78 | } 79 | 80 | void* aligned_malloc(uint64_t align, uint64_t bytes) { 81 | size_t mask = (align-1)^((size_t)-1); 82 | char* ptr = (((char*)malloc(bytes+align)) + align); 83 | ptr = (char*) (((size_t)ptr) & mask); 84 | 85 | //touch each page to bring into OS! -- yes this takes a long time 86 | //never mind that, touch each cache line to bring into l2 87 | for(int i = 0; i < bytes; i+=32) { 88 | ptr[i]=0; 89 | } 90 | return (void*) ptr; 91 | } 92 | 93 | #endif 94 | -------------------------------------------------------------------------------- /workloads/diannao/pool2x2avg.dfg: -------------------------------------------------------------------------------- 1 | Input: R0 2 | Input: R1 3 | Input: R2 4 | Input: R3 5 | Input: R4 6 | 7 | InputVec: prev [0, 1, 2, 3] 8 | 9 | vsum0 =Add16x4(R0, R1) 10 | vsum1 =Add16x4(R1, R2) 11 | vsum2 =Add16x4(R2, R3) 12 | vsum3 =Add16x4(R3, R4) 13 | 14 | H0 =HAdd16x4(R0, vsum0) 15 | H1 =HAdd16x4(R1, vsum1) 16 | H2 =HAdd16x4(R2, vsum2) 17 | H3 =HAdd16x4(R3, vsum3) 18 | 19 | O0=RShf16x4(H0) 20 | O1=RShf16x4(H1) 21 | O2=RShf16x4(H2) 22 | O3=RShf16x4(H3) 23 | 24 | Output: O0 25 | Output: O1 26 | Output: O2 27 | Output: O3 28 | 29 | -------------------------------------------------------------------------------- /workloads/diannao/pool2x2l4avg.dfg: -------------------------------------------------------------------------------- 1 | Input: R0 2 | Input: R1 3 | Input: R2 4 | Input: R3 5 | Input: R4 6 | 7 | InputVec: P [0, 1, 2, 3] 8 | 9 | I0 =Add16x4(R0, R1) 10 | I1 =Add16x4(R1, R2) 11 | I2 =Add16x4(R2, R3) 12 | I3 =Add16x4(R3, R4) 13 | 14 | H0 =Add16x4(P0, I0) 15 | H1 =Add16x4(P1, I1) 16 | H2 =Add16x4(P2, I2) 17 | H3 =Add16x4(P3, I3) 18 | 19 | O0=RShf2_16x4(H0) 20 | O1=RShf2_16x4(H1) 21 | O2=RShf2_16x4(H2) 22 | O3=RShf2_16x4(H3) 23 | 24 | OutputVec: I [0, 1, 2, 3] 25 | 26 | Output: O0 27 | Output: O1 28 | Output: O2 29 | Output: O3 30 | -------------------------------------------------------------------------------- /workloads/diannao/pool4x4l2avg.dfg: -------------------------------------------------------------------------------- 1 | Input: R0 2 | Input: R1 3 | Input: R2 4 | Input: R3 5 | Input: R4 6 | 7 | InputVec: Xa [0, 1] 8 | InputVec: Xb [0, 1] 9 | InputVec: Xc [0, 1] 10 | 11 | R12=Add16x4(R1, R2) 12 | R123=Add16x4(R12, R3) 13 | R0123=Add16x4(R0,R123) 14 | R1234=Add16x4(R123, R4) 15 | 16 | Xd0=R0123 17 | Xd1=R1234 18 | 19 | Xcd0=Add16x4(Xc0,Xd0) 20 | Xcd1=Add16x4(Xc1,Xd1) 21 | 22 | Xab0=Add16x4(Xa0,Xb0) 23 | Xab1=Add16x4(Xa1,Xb1) 24 | 25 | O0_p=Add16x4(Xab0,Xcd0) 26 | O1_p=Add16x4(Xab1,Xcd1) 27 | 28 | O0=RShf4_16x4(O0_p) 29 | O1=RShf4_16x4(O1_p) 30 | 31 | Oa0=Xb0 32 | Oa1=Xb1 33 | Ob0=Xc0 34 | Ob1=Xc1 35 | Oc0=R0123 36 | Oc1=R1234 37 | 38 | OutputVec: Oa [0, 1] 39 | OutputVec: Ob [0, 1] 40 | OutputVec: Oc [0, 1] 41 | 42 | Output: O0 43 | Output: O1 44 | -------------------------------------------------------------------------------- /workloads/diannao/pooling.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include "dnn.hpp" 4 | #include 5 | #include 6 | 7 | 8 | using namespace std; 9 | 10 | #if SB 11 | #include "pool2x2l4avg.h" 12 | #include "pool4x4l2avg.h" 13 | #endif 14 | 15 | #include "sim_timing.h" 16 | 17 | #define AVG 1 18 | 19 | //Problem Size 20 | #ifndef Ny //if Ny is undefined, then assume nothing is defined 21 | #define Ny 32 22 | #define Nx 32 23 | 24 | #define Kx 4 25 | #define Ky 4 26 | //#define Ni 100 //Input Layers == Ouptut Layers 27 | #define Ni 128 28 | #endif 29 | 30 | //slide increment 31 | #ifndef Sy 32 | #define Sx 1 33 | #define Sy 1 34 | #endif 35 | 36 | #ifndef Tii //Tiling Sizes: 37 | #define Tii 64 38 | #define Ti 16 39 | #define Ty 16 40 | #define Tx 16 41 | #endif 42 | 43 | #define NYPAD (Ny+Ky) 44 | #define NXPAD (Nx+Kx) 45 | 46 | #define NYSCL (Ny/Sy) 47 | #define NXSCL (Nx/Sx) 48 | 49 | 50 | void fill_pooling(VTYPE (&neuron_i)[NYPAD][NXPAD][Ni], 51 | VTYPE (&neuron_n1)[NYSCL][NXSCL][Ni], 52 | VTYPE (&neuron_n2)[NYSCL][NXSCL][Ni]) { 53 | int total=0; 54 | for(int yy = 0; yy < NYPAD; ++yy) { 55 | for(int xx = 0; xx < NXPAD; ++xx) { 56 | for(int ni = 0; ni < Ni; ++ni) { 57 | //neuron_i[yy][xx][ni] = xx+yy+ni; 58 | neuron_i[yy][xx][ni] = rand() &0x3FFF; 59 | 60 | //neuron_i[yy][xx][ni] = 1; 61 | } 62 | } 63 | } 64 | //takes too long.... 65 | for(int yy = 0; yy < NYSCL; ++yy) { 66 | for(int xx = 0; xx < NXSCL; ++xx) { 67 | for(int ni = 0; ni < Ni; ++ni) { 68 | neuron_n1[yy][xx][ni] = 0; 69 | neuron_n2[yy][xx][ni] = 0; 70 | } 71 | } 72 | } 73 | } 74 | 75 | int pooling_layer_blocked(VTYPE (&neuron_i)[NYPAD][NXPAD][Ni], 76 | VTYPE (&neuron_n)[NYSCL][NXSCL][Ni]) { 77 | int c=0; 78 | 79 | VTYPE value[Ni]={0}; 80 | for (int yy = 0; yy < Ny; yy += Ty) { 81 | for (int xx = 0; xx < Nx; xx += Tx) { 82 | for (int iii = 0; iii < Ni; iii += Tii) { 83 | // — Original code — (excluding ii loop) 84 | int yout = yy/Sy; 85 | for (int y = yy; y < yy + Ty; y += Sy) { 86 | int xout = xx/Sx; 87 | for (int x = xx; x < xx + Tx; x += Sx) { 88 | 89 | for (int ii = iii; ii < iii + Tii; ii += Ti) { 90 | for (int i = ii; i < ii + Ti; i++) { 91 | value[i] = 0; 92 | } 93 | 94 | for (int ky = 0; ky < Ky; ky++) { 95 | for (int kx = 0; kx < Kx; kx++) { 96 | //c++; 97 | for (int i = ii; i < ii + Ti; i++) { 98 | #ifdef AVG 99 | value[i] += neuron_i[ky + y][kx + x][i]; 100 | #else 101 | value[i] = max(value[i], neuron_i[ky + y][kx + x][i]); 102 | #endif 103 | } 104 | } 105 | } 106 | 107 | for (int i = ii; i < ii + Ti; i++) { 108 | #ifdef AVG 109 | neuron_n[yout][xout][i] = value[i] / (Kx * Ky); 110 | #else 111 | neuron_n[yout][xout][i] = value[i]; 112 | #endif 113 | } 114 | } 115 | xout++; 116 | } 117 | yout++; 118 | } 119 | } 120 | } 121 | } 122 | return c; 123 | } 124 | 125 | void pooling_layer(VTYPE (&neuron_i)[NYPAD][NXPAD][Ni], 126 | VTYPE (&neuron_n)[NYSCL][NXSCL][Ni]) { 127 | VTYPE value[Ni]={0}; 128 | // — Original code — 129 | int yout = 0; 130 | for (int y = 0; y < Ny; y += Sy) { 131 | int xout = 0; 132 | for (int x = 0; x < Nx; x += Sx) { 133 | for (int i = 0; i < Ni; i++) { 134 | value[i]=0; 135 | } 136 | 137 | for (int ky = 0; ky < Ky; ky++) { 138 | for (int kx = 0; kx < Kx; kx++) { 139 | for (int i = 0; i < Ni; i++) { 140 | #ifdef AVG 141 | value[i] += neuron_i[ky + y][kx + x][i]; 142 | #else 143 | value[i] = max(value[i], neuron_i[ky + y][kx + x][i]); 144 | #endif 145 | } 146 | } 147 | } 148 | 149 | for (int i = 0; i < Ni; i++) { 150 | #ifdef AVG 151 | neuron_n[yout][xout][i] = value[i] / (Kx * Ky); 152 | #else 153 | neuron_n[yout][xout][i] = value[i]; 154 | #endif 155 | } 156 | xout++; 157 | } 158 | yout++; 159 | } 160 | } 161 | 162 | #if SB 163 | 164 | int pooling_layer_blocked_sb_4x4_sx1_sy1(VTYPE (&neuron_i)[NYPAD][NXPAD][Ni], 165 | VTYPE (&neuron_n)[NYSCL][NXSCL][Ni]) { 166 | int c=0; 167 | 168 | int pipedepth=16; 169 | int pipedepth_bytes=(pipedepth*8); 170 | 171 | SB_CONFIG(pool4x4l2avg_config, pool4x4l2avg_size); 172 | 173 | VTYPE value[Ni]={0}; 174 | for (int yy = 0; yy < Ny; yy += Ty) { 175 | for (int xx = 0; xx < Nx; xx += Tx) { 176 | //cout << dec << "\n yy: " << yy << " xx: " << xx << "\n"; 177 | 178 | 179 | for (int iii = 0; iii < Ni; iii += pipedepth*4) { 180 | // — Original code — (excluding ii loop) 181 | int yout = yy/Sy; 182 | 183 | for (int y = yy; y < yy + Ty; y += 2) { // two rows at a time 184 | //int xout = xx/Sx; 185 | //upper -- xx + Tx 186 | //lower - xx 187 | 188 | //cout << dec << "\n yy: " << yy << " xx: " << xx << " iii: " << iii << "\n"; 189 | 190 | //First three loops produce garbage 191 | 192 | SB_CONST(P_pool4x4l2avg_Xa, 0, 2*pipedepth*1); //Initialize garbage inputs 193 | SB_CONST(P_pool4x4l2avg_Xb, 0, 2*pipedepth*1); 194 | SB_CONST(P_pool4x4l2avg_Xc, 0, 2*pipedepth*1); 195 | 196 | SB_DMA_READ(&neuron_i[y+0][xx][iii],Ni*sizeof(VTYPE),pipedepth_bytes,Tx+3,P_pool4x4l2avg_R0); 197 | SB_DMA_READ(&neuron_i[y+1][xx][iii],Ni*sizeof(VTYPE),pipedepth_bytes,Tx+3,P_pool4x4l2avg_R1); 198 | SB_DMA_READ(&neuron_i[y+2][xx][iii],Ni*sizeof(VTYPE),pipedepth_bytes,Tx+3,P_pool4x4l2avg_R2); 199 | SB_DMA_READ(&neuron_i[y+3][xx][iii],Ni*sizeof(VTYPE),pipedepth_bytes,Tx+3,P_pool4x4l2avg_R3); 200 | SB_DMA_READ(&neuron_i[y+4][xx][iii],Ni*sizeof(VTYPE),pipedepth_bytes,Tx+3,P_pool4x4l2avg_R4); 201 | 202 | //each rec has a slightly different number of iterations 203 | SB_RECURRENCE(P_pool4x4l2avg_Oa,P_pool4x4l2avg_Xa,2*pipedepth*(Tx+2)); 204 | SB_RECURRENCE(P_pool4x4l2avg_Ob,P_pool4x4l2avg_Xb,2*pipedepth*(Tx+2)); 205 | SB_RECURRENCE(P_pool4x4l2avg_Oc,P_pool4x4l2avg_Xc,2*pipedepth*(Tx+2)); 206 | 207 | SB_GARBAGE(P_pool4x4l2avg_O0,pipedepth*3); 208 | SB_GARBAGE(P_pool4x4l2avg_O1,pipedepth*3); 209 | 210 | SB_DMA_WRITE(P_pool4x4l2avg_O0,Ni*sizeof(VTYPE),pipedepth_bytes,Tx,&neuron_n[y+0][xx][iii]); 211 | SB_DMA_WRITE(P_pool4x4l2avg_O1,Ni*sizeof(VTYPE),pipedepth_bytes,Tx,&neuron_n[y+1][xx][iii]); 212 | 213 | SB_GARBAGE(P_pool4x4l2avg_Oa,2*pipedepth*1); 214 | SB_GARBAGE(P_pool4x4l2avg_Ob,2*pipedepth*1); 215 | SB_GARBAGE(P_pool4x4l2avg_Oc,2*pipedepth*1); 216 | } 217 | } 218 | } 219 | } 220 | SB_WAIT_ALL(); 221 | return c; 222 | } 223 | 224 | 225 | int pooling_layer_blocked_sb_2x2_sx1_sy1(VTYPE (&neuron_i)[NYPAD][NXPAD][Ni], 226 | VTYPE (&neuron_n)[NYSCL][NXSCL][Ni]) { 227 | int c=0; 228 | 229 | int pipedepth=16; 230 | int pipedepth_bytes=(pipedepth*8); 231 | 232 | SB_CONFIG(pool2x2l4avg_config, pool2x2l4avg_size); 233 | 234 | for (int yy = 0; yy < Ny; yy += Ty) { 235 | for (int xx = 0; xx < Nx; xx += Tx) { 236 | for (int iii = 0; iii < Ni; iii += pipedepth*4) { 237 | // — Original code — (excluding ii loop) 238 | int yout = yy/Sy; 239 | for (int y = yy; y < yy + Ty; y += 4) { // two rows at a time 240 | //int xout = xx/Sx; 241 | //upper -- xx + Tx 242 | //lower - xx 243 | 244 | //cout << dec << "\n yy: " << yy << " xx: " << xx << " iii: " << iii << "\n"; 245 | 246 | //First three loops produce garbage 247 | 248 | SB_CONST(P_pool2x2l4avg_P, 0, 4*pipedepth*1); //Initialize garbage inputs 249 | 250 | SB_DMA_READ(&neuron_i[y+0][xx][iii],Ni*sizeof(VTYPE),pipedepth_bytes,Tx+1,P_pool2x2l4avg_R0); 251 | SB_DMA_READ(&neuron_i[y+1][xx][iii],Ni*sizeof(VTYPE),pipedepth_bytes,Tx+1,P_pool2x2l4avg_R1); 252 | SB_DMA_READ(&neuron_i[y+2][xx][iii],Ni*sizeof(VTYPE),pipedepth_bytes,Tx+1,P_pool2x2l4avg_R2); 253 | SB_DMA_READ(&neuron_i[y+3][xx][iii],Ni*sizeof(VTYPE),pipedepth_bytes,Tx+1,P_pool2x2l4avg_R3); 254 | SB_DMA_READ(&neuron_i[y+4][xx][iii],Ni*sizeof(VTYPE),pipedepth_bytes,Tx+1,P_pool2x2l4avg_R4); 255 | 256 | SB_RECURRENCE(P_pool2x2l4avg_I,P_pool2x2l4avg_P,4*pipedepth*(Tx)); 257 | 258 | SB_GARBAGE(P_pool2x2l4avg_O0,pipedepth*1); 259 | SB_GARBAGE(P_pool2x2l4avg_O1,pipedepth*1); 260 | SB_GARBAGE(P_pool2x2l4avg_O2,pipedepth*1); 261 | SB_GARBAGE(P_pool2x2l4avg_O3,pipedepth*1); 262 | 263 | SB_DMA_WRITE(P_pool2x2l4avg_O0,Ni*sizeof(VTYPE),pipedepth_bytes,Tx,&neuron_n[y+0][xx][iii]); 264 | SB_DMA_WRITE(P_pool2x2l4avg_O1,Ni*sizeof(VTYPE),pipedepth_bytes,Tx,&neuron_n[y+1][xx][iii]); 265 | SB_DMA_WRITE(P_pool2x2l4avg_O2,Ni*sizeof(VTYPE),pipedepth_bytes,Tx,&neuron_n[y+2][xx][iii]); 266 | SB_DMA_WRITE(P_pool2x2l4avg_O3,Ni*sizeof(VTYPE),pipedepth_bytes,Tx,&neuron_n[y+3][xx][iii]); 267 | 268 | SB_GARBAGE(P_pool2x2l4avg_I,4*pipedepth*1); 269 | 270 | } 271 | } 272 | } 273 | } 274 | SB_WAIT_ALL(); 275 | return c; 276 | } 277 | 278 | int pooling_layer_blocked_sb_2x2_sx1_sy1_full_ni(VTYPE (&neuron_i)[NYPAD][NXPAD][Ni], 279 | VTYPE (&neuron_n)[NYSCL][NXSCL][Ni]) { 280 | int c=0; 281 | 282 | int pipedepth=16; 283 | int pipedepth_bytes=(pipedepth*8); 284 | int DP_WIDTH=8; 285 | 286 | SB_CONFIG(pool2x2l4avg_config, pool2x2l4avg_size); 287 | 288 | for (int yy = 0; yy < Ny; yy += Ty) { 289 | for (int xx = 0; xx < Nx; xx += Tx) { 290 | for (int y = yy; y < yy + Ty; y += 4) { // two rows at a time 291 | //cout << dec << "\n yy: " << yy << " xx: " << xx << "\n"; 292 | 293 | //First three loops produce garbage 294 | 295 | int ni_elem = Ni*sizeof(VTYPE)/DP_WIDTH; 296 | SB_CONST(P_pool2x2l4avg_P, 0, 4*ni_elem); 297 | 298 | SB_DMA_READ(&neuron_i[y+0][xx][0],Ni*sizeof(VTYPE),Ni*sizeof(VTYPE),Tx+1,P_pool2x2l4avg_R0); 299 | SB_DMA_READ(&neuron_i[y+1][xx][0],Ni*sizeof(VTYPE),Ni*sizeof(VTYPE),Tx+1,P_pool2x2l4avg_R1); 300 | SB_DMA_READ(&neuron_i[y+2][xx][0],Ni*sizeof(VTYPE),Ni*sizeof(VTYPE),Tx+1,P_pool2x2l4avg_R2); 301 | SB_DMA_READ(&neuron_i[y+3][xx][0],Ni*sizeof(VTYPE),Ni*sizeof(VTYPE),Tx+1,P_pool2x2l4avg_R3); 302 | SB_DMA_READ(&neuron_i[y+4][xx][0],Ni*sizeof(VTYPE),Ni*sizeof(VTYPE),Tx+1,P_pool2x2l4avg_R4); 303 | 304 | SB_RECURRENCE(P_pool2x2l4avg_I,P_pool2x2l4avg_P,4*ni_elem*Tx); 305 | 306 | SB_GARBAGE(P_pool2x2l4avg_O0,ni_elem); 307 | SB_GARBAGE(P_pool2x2l4avg_O1,ni_elem); 308 | SB_GARBAGE(P_pool2x2l4avg_O2,ni_elem); 309 | SB_GARBAGE(P_pool2x2l4avg_O3,ni_elem); 310 | 311 | SB_DMA_WRITE(P_pool2x2l4avg_O0,Ni*sizeof(VTYPE),Ni*sizeof(VTYPE),Tx,&neuron_n[y+0][xx][0]); 312 | SB_DMA_WRITE(P_pool2x2l4avg_O1,Ni*sizeof(VTYPE),Ni*sizeof(VTYPE),Tx,&neuron_n[y+1][xx][0]); 313 | SB_DMA_WRITE(P_pool2x2l4avg_O2,Ni*sizeof(VTYPE),Ni*sizeof(VTYPE),Tx,&neuron_n[y+2][xx][0]); 314 | SB_DMA_WRITE(P_pool2x2l4avg_O3,Ni*sizeof(VTYPE),Ni*sizeof(VTYPE),Tx,&neuron_n[y+3][xx][0]); 315 | 316 | SB_GARBAGE(P_pool2x2l4avg_I,4*ni_elem); 317 | } 318 | } 319 | } 320 | SB_WAIT_ALL(); 321 | return c; 322 | } 323 | #endif 324 | 325 | int test_layer(VTYPE (&neuron_i)[NYPAD][NXPAD][Ni], 326 | VTYPE (&neuron_n)[NYSCL][NXSCL][Ni]) { 327 | begin_roi(); 328 | #ifdef SB 329 | #if Kx == 4 330 | pooling_layer_blocked_sb_4x4_sx1_sy1(neuron_i,neuron_n); 331 | #elif Kx == 2 332 | #if Ni < 64 333 | pooling_layer_blocked_sb_2x2_sx1_sy1_full_ni(neuron_i,neuron_n); 334 | #else 335 | pooling_layer_blocked_sb_2x2_sx1_sy1(neuron_i,neuron_n); 336 | #endif 337 | #else 338 | #error "Kx must be 2 or 4" 339 | #endif 340 | #else 341 | pooling_layer_blocked(neuron_i,neuron_n); 342 | #endif 343 | end_roi(); 344 | } 345 | 346 | 347 | int main(int argc, char** argv) { 348 | //Arrays: 349 | //VTYPE neuron_i[NYPAD][NXPAD][Ni]; 350 | //VTYPE neuron_n[NYSCL][NXSCL][Ni]; 351 | //VTYPE neuron_n2[NYSCL][NXSCL][Ni]; 352 | 353 | VTYPE (*neuron_i)[NYPAD][NXPAD][Ni]; 354 | VTYPE (*neuron_n)[NYSCL][NXSCL][Ni]; 355 | VTYPE (*neuron_n2)[NYSCL][NXSCL][Ni]; 356 | 357 | //cout << "allocating memory\n"; 358 | neuron_i = (VTYPE (*)[NYPAD][NXPAD][Ni])aligned_malloc(64,NYPAD*NXPAD*Ni*sizeof(VTYPE)+64); 359 | neuron_n = (VTYPE (*)[NYSCL][NXSCL][Ni])aligned_malloc(64,NYSCL*NXSCL*Ni*sizeof(VTYPE)+64); 360 | neuron_n2 = (VTYPE (*)[NYSCL][NXSCL][Ni])aligned_malloc(64,NYSCL*NXSCL*Ni*sizeof(VTYPE)+64); 361 | 362 | //cout << "NYSCL: " << NYSCL << "\n"; 363 | //cout << "NXSCL: " << NXSCL << "\n"; 364 | //cout << "Ni: " << Ni << "\n"; 365 | 366 | //cout << "bound i\t" << hex << &(*neuron_i)[0][0][0] << " to " << &(*neuron_i)[NYPAD-1][NXPAD-1][Ni-1] << "\n"; 367 | //cout << "bound n1\t" << hex << &(*neuron_n)[0][0][0] << " to " << &(*neuron_n)[NYSCL-1][NXSCL-1][Ni-1] << "\n"; 368 | //cout << "bound n2\t" << hex << &(*neuron_n2)[0][0][0] << " to " << &(*neuron_n2)[NYSCL-1][NXSCL-1][Ni-1] << "\n"; 369 | 370 | //cout << "0,0,1\t" << hex << &(*neuron_n2)[0][0][1] << "\n"; 371 | //cout << "0,1,0\t" << hex << &(*neuron_n2)[0][1][0] << "\n"; 372 | //cout << "1,0,0\t" << hex << &(*neuron_n2)[1][0][0] << "\n"; 373 | 374 | //cout << "isize: " << NYPAD*NXPAD*Ni*sizeof(VTYPE) << "\n"; 375 | //cout << "nsize: " << NYSCL*NXSCL*Ni*sizeof(VTYPE) << "\n"; 376 | 377 | 378 | if(argc==3) { 379 | 380 | //cout << "Did nothing\n"; 381 | 382 | // } else if(argc==2 && string(argv[1])=="perf") { 383 | } else if(argc==2) { 384 | test_layer(*neuron_i,*neuron_n); 385 | //cout << "Perf Run Complete\n"; 386 | } else { 387 | cout << "initializing arrays\n"; 388 | fill_pooling(*neuron_i,*neuron_n,*neuron_n2); 389 | cout << "starting computation\n"; 390 | 391 | int calc = 0; 392 | pooling_layer(*neuron_i,*neuron_n); 393 | test_layer(*neuron_i,*neuron_n2); 394 | cout << "computation complete!\n"; 395 | 396 | if(calc > 0) { 397 | cout << "calc: " << calc << "\n"; 398 | } 399 | compare_short((VTYPE*)*neuron_n,(VTYPE*)*neuron_n2,NYSCL*NXSCL*Ni); 400 | cout << "adds: " << NYSCL*NXSCL*Ni*Ky*Kx << "\n"; 401 | cout << "argc:" << argc << "\n"; 402 | // cout << "mult-block: " << calc.first << " sigmoid-block: " << calc.second << "\n"; 403 | // cout << "mult-orig: " << calc2.first << " sigmoid-orig: " << calc2.second << "\n"; 404 | // 405 | // int n_outputs= Ny/Sy * Nx/Sx * Nn; 406 | // cout << "mult-correct: " << n_outputs*Ni*Kx*Ky 407 | // << " sigmoid-correct: " << n_outputs << "\n"; 408 | } 409 | sb_stats(); 410 | 411 | } 412 | 413 | -------------------------------------------------------------------------------- /workloads/diannao/red16to1sig.dfg: -------------------------------------------------------------------------------- 1 | InputVec: N [0, 1, 2, 3] 2 | InputVec: S [0, 1, 2, 3, 4, 5, 6, 7] 3 | InputVec: acc [0, 1] 4 | Input: pred 5 | 6 | #compute lanes "A" and "B" 7 | 8 | AM0 =Mul16x4(N0, S0) 9 | AM1 =Mul16x4(N1, S1) 10 | AM2 =Mul16x4(N2, S2) 11 | AM3 =Mul16x4(N3, S3) 12 | 13 | AS0 =Add16x4(AM0, AM1) 14 | AS1 =Add16x4(AM2, AM3) 15 | 16 | AS2 =Add16x4(AS0, AS1) 17 | 18 | AR = Red16x4(AS2, acc0) 19 | 20 | out0 = Sig16(AR, pred) 21 | 22 | 23 | 24 | BM0 =Mul16x4(N0, S4) 25 | BM1 =Mul16x4(N1, S5) 26 | BM2 =Mul16x4(N2, S6) 27 | BM3 =Mul16x4(N3, S7) 28 | 29 | BS0 =Add16x4(BM0, BM1) 30 | BS1 =Add16x4(BM2, BM3) 31 | 32 | BS2 =Add16x4(BS0, BS1) 33 | 34 | BR = Red16x4(BS2, acc1) 35 | 36 | out1 = Sig16(BR, pred) 37 | 38 | OutputVec: out [0, 1] 39 | 40 | -------------------------------------------------------------------------------- /workloads/diannao/red16to1sigx2.dfg: -------------------------------------------------------------------------------- 1 | InputVec: NA [0, 1, 2, 3] 2 | InputVec: SA [0, 1, 2, 3] 3 | InputVec: NB [0, 1, 2, 3] 4 | InputVec: SB [0, 1, 2, 3] 5 | 6 | InputVec: acc [0, 1] 7 | InputVec: pred [0, 1] 8 | 9 | #compute lanes "A" and "B" 10 | 11 | MA0 =Mul16x4(NA0, SA0) 12 | MA1 =Mul16x4(NA1, SA1) 13 | MA2 =Mul16x4(NA2, SA2) 14 | MA3 =Mul16x4(NA3, SA3) 15 | 16 | SA0 =Add16x4(MA0, MA1) 17 | SA1 =Add16x4(MA2, MA3) 18 | 19 | SA2 =Add16x4(SA0, SA1) 20 | 21 | RA = Red16x4(SA2, acc0) 22 | 23 | out0 = Sig16(RA,pred0) 24 | 25 | 26 | 27 | MB0 =Mul16x4(NB0, SB0) 28 | MB1 =Mul16x4(NB1, SB1) 29 | MB2 =Mul16x4(NB2, SB2) 30 | MB3 =Mul16x4(NB3, SB3) 31 | 32 | SB0 =Add16x4(MB0, MB1) 33 | SB1 =Add16x4(MB2, MB3) 34 | 35 | SB2 =Add16x4(SB0, SB1) 36 | 37 | RB = Red16x4(SB2, acc1) 38 | 39 | out1 = Sig16(RB,pred1) 40 | 41 | OutputVec: out [0, 1] 42 | 43 | 44 | -------------------------------------------------------------------------------- /workloads/diannao/red32to1sig.dfg: -------------------------------------------------------------------------------- 1 | InputVec: N [0, 1, 2, 3, 4, 5, 6, 7] # 8Wide 1Deep 2 | InputVec: S [0, 1, 2, 3, 4, 5, 6, 7] # 8Wide 1Deep 3 | Input: acc 4 | Input: pred 5 | 6 | M0 =Mul16x4(N0, S0) 7 | M1 =Mul16x4(N1, S1) 8 | M2 =Mul16x4(N2, S2) 9 | M3 =Mul16x4(N3, S3) 10 | M4 =Mul16x4(N4, S4) 11 | M5 =Mul16x4(N5, S5) 12 | M6 =Mul16x4(N6, S6) 13 | M7 =Mul16x4(N7, S7) 14 | 15 | A0 =Add16x4(M0, M1) 16 | A1 =Add16x4(M2, M3) 17 | A2 =Add16x4(M4, M5) 18 | A3 =Add16x4(M6, M7) 19 | 20 | A8 =Add16x4(A0, A1) 21 | A9 =Add16x4(A2, A3) 22 | 23 | A10 = Add16x4(A8, A9) 24 | 25 | R = Red16x4(A10, acc) 26 | 27 | out=Sig16(R, pred) 28 | 29 | Output: out 30 | 31 | 32 | -------------------------------------------------------------------------------- /workloads/diannao/red8to1sig.dfg: -------------------------------------------------------------------------------- 1 | InputVec: N [0, 1] 2 | InputVec: S [0, 1, 2, 3] 3 | InputVec: acc [0, 1] 4 | Input: pred 5 | 6 | #compute lanes "A" and "B" 7 | 8 | AM0 =Mul16x4(N0, S0) 9 | AM1 =Mul16x4(N1, S1) 10 | 11 | AS0 =Add16x4(AM0, AM1) 12 | 13 | AR = Red16x4(AS0, acc0) 14 | 15 | out0 = Sig16(AR, pred) 16 | 17 | 18 | 19 | BM0 =Mul16x4(N0, S2) 20 | BM1 =Mul16x4(N1, S3) 21 | 22 | BS0 =Add16x4(BM0, BM1) 23 | 24 | BR = Red16x4(BS0, acc1) 25 | 26 | out1 = Sig16(BR, pred) 27 | 28 | OutputVec: out [0, 1] 29 | 30 | -------------------------------------------------------------------------------- /workloads/diannao/run-all.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | args="" 4 | #args="no run" 5 | #args="perf" 6 | 7 | for i in *[0-9]p *[0-9]sb; do 8 | echo -n "./$i $args " 9 | ./$i $args 10 | # ticks=`echo $out | cut -d: -f2` 11 | # echo $ticks 12 | done 13 | -------------------------------------------------------------------------------- /workloads/diannao/sim_timing.h: -------------------------------------------------------------------------------- 1 | #ifdef __x86_64__ 2 | static __inline__ uint64_t rdtsc(void) { 3 | unsigned a, d; 4 | //asm("cpuid"); 5 | asm volatile("rdtsc" : "=a" (a), "=d" (d)); 6 | 7 | return (((uint64_t)a) | (((uint64_t)d) << 32)); 8 | } 9 | 10 | static uint64_t ticks; 11 | __attribute__ ((noinline)) void begin_roi() { 12 | ticks=rdtsc(); 13 | } 14 | __attribute__ ((noinline)) void end_roi() { 15 | ticks=(rdtsc()-ticks); 16 | printf("ticks: %ld\n",ticks); 17 | } 18 | __attribute__ ((noinline)) static void sb_stats() { 19 | } 20 | __attribute__ ((noinline)) static void sb_verify() { 21 | } 22 | 23 | #else 24 | __attribute__ ((noinline)) static void begin_roi() { 25 | __asm__ __volatile__("add x0, x0, 1"); \ 26 | } 27 | __attribute__ ((noinline)) static void end_roi() { 28 | __asm__ __volatile__("add x0, x0, 2"); \ 29 | } 30 | __attribute__ ((noinline)) static void sb_stats() { 31 | __asm__ __volatile__("add x0, x0, 3"); \ 32 | } 33 | __attribute__ ((noinline)) static void sb_verify() { 34 | __asm__ __volatile__("add x0, x0, 4"); \ 35 | } 36 | 37 | #endif 38 | 39 | -------------------------------------------------------------------------------- /workloads/diannao/softbrain.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | * MM Headerfile for softbrain parameters relevant to DNN 3 | */ 4 | 5 | // Scratchpad info 6 | #define SCRATCHSIZE 2048 // number of dnn elts that fit in scratch pad 7 | #define SCRATCHSTART 0 // logical address for start of scratch 8 | 9 | // Virtual port interfaces 10 | // input ports for pipe0 11 | #define INPUTNEURON0 0 // Wide & deep 12 | #define INPUTWEIGHT0 1 // Wide & deep 13 | #define INPUTACC0 2 // Deep 14 | #define INPUTPRED0 3 // Deep 15 | // input ports for pipe1 16 | #define INPUTNEURON1 4 // Wide & deep 17 | #define INPUTWEIGHT1 5 // Wide & deep 18 | #define INPUTACC1 6 // Deep 19 | #define INPUTPRED1 7 // Deep 20 | 21 | // output ports 22 | #define OUTPUT0 8 // Output for pipe 0: Deep (pipe is 16:1 reduce) 23 | #define OUTPUT1 9 // Output for pipe 1: Deep 24 | 25 | --------------------------------------------------------------------------------