├── .gitignore
├── LICENSE
├── Makefile
├── README.md
├── msg.mk
├── setup.sh
├── softbrain-config
    ├── Makefile
    ├── configs
    │   └── diannao_simd64.sbmodel
    ├── make.config
    ├── make.rules
    └── src
    │   ├── Makefile
    │   ├── direction.cpp
    │   ├── direction.h
    │   ├── fixed_point.h
    │   ├── fu_model.cpp
    │   ├── fu_model.h
    │   ├── full.sbinst
    │   ├── inst_model.cpp
    │   ├── inst_model.h
    │   ├── insts
    │       ├── Abs16x4.h
    │       ├── Acc16x4.h
    │       ├── Acc64.h
    │       ├── Add16x4.h
    │       ├── Add32x2.h
    │       ├── Add64.h
    │       ├── And.h
    │       ├── Copy.h
    │       ├── Div16x4.h
    │       ├── FAdd32x2.h
    │       ├── FAdd64.h
    │       ├── FMul32x2.h
    │       ├── FMul64.h
    │       ├── FRed32x2.h
    │       ├── FxAdd16x4.h
    │       ├── FxAdd32x2.h
    │       ├── FxExp16x4.h
    │       ├── FxMul16x4.h
    │       ├── FxMul32x2.h
    │       ├── FxRed16x4.h
    │       ├── FxRed32x2.h
    │       ├── FxRelu16x4.h
    │       ├── FxSig16x4.h
    │       ├── FxTanh16x4.h
    │       ├── HAdd16x4.h
    │       ├── ICmpEQ.h
    │       ├── LShf64.h
    │       ├── Max16x4.h
    │       ├── Min16x4.h
    │       ├── Mul16x4.h
    │       ├── Mul32x2.h
    │       ├── Mul64.h
    │       ├── Or.h
    │       ├── RShf16x4.h
    │       ├── RShf2_16x4.h
    │       ├── RShf32x2.h
    │       ├── RShf4_16x4.h
    │       ├── RShf64.h
    │       ├── Red16x4.h
    │       ├── Red32x2.h
    │       ├── RedMax16x4.h
    │       ├── RedMin16x4.h
    │       ├── RedSMax16x4.h
    │       ├── RedSMin16x4.h
    │       ├── SMax16x4.h
    │       ├── SMin16x4.h
    │       ├── Select.h
    │       ├── Sig16.h
    │       ├── Sub16x4.h
    │       ├── Sub64.h
    │       ├── TAdd16x4.h
    │       └── Xor.h
    │   ├── model.cpp
    │   ├── model.h
    │   ├── model_parsing.cpp
    │   ├── model_parsing.h
    │   ├── sub_model.cpp
    │   └── sub_model.h
├── softbrain-emu
    ├── Makefile
    └── src
    │   ├── .gitignore
    │   ├── create_insts.c
    │   ├── sb.h
    │   ├── sb_c_insts.h
    │   ├── sb_emu.h
    │   ├── sb_init.h
    │   └── softbrain.C
├── softbrain-scheduler
    ├── .gitignore
    ├── Makefile
    ├── dfgs
    │   └── 5x4
    │   │   ├── bfs.dfg
    │   │   ├── dot.dfg
    │   │   ├── long.dfg
    │   │   ├── medium.dfg
    │   │   ├── medium_short.dfg
    │   │   ├── mm_sb.dfg
    │   │   ├── out.txt
    │   │   ├── pool2x2l4avg.dfg
    │   │   ├── pool4x4l2avg.dfg
    │   │   ├── pool_simple.dfg
    │   │   ├── red16to1sig.dfg
    │   │   ├── red16to1sigx2-simple.dfg
    │   │   ├── red16to1sigx2.dfg
    │   │   ├── red32to1sig.dfg
    │   │   ├── red8to1sig.dfg
    │   │   ├── run-all-sched.sh
    │   │   ├── run-sched.sh
    │   │   ├── spmv.dfg
    │   │   ├── stencil.dfg
    │   │   ├── sum.txt
    │   │   ├── vadd.dfg
    │   │   ├── vadd4.dfg
    │   │   ├── vadd5.dfg
    │   │   ├── vadd6.dfg
    │   │   └── viterbi.dfg
    ├── drivers
    │   ├── Makefile
    │   └── sb_dfg_emu.cpp
    ├── make.config
    ├── make.rules
    └── src
    │   ├── Makefile
    │   ├── sbpdg.cpp
    │   └── sbpdg.h
└── workloads
    └── diannao
        ├── Makefile
        ├── classifier.cpp
        ├── convolution.cpp
        ├── convolution_old.cpp
        ├── dnn.hpp
        ├── pool2x2avg.dfg
        ├── pool2x2l4avg.dfg
        ├── pool4x4l2avg.dfg
        ├── pooling.cpp
        ├── red16to1sig.dfg
        ├── red16to1sigx2.dfg
        ├── red32to1sig.dfg
        ├── red8to1sig.dfg
        ├── run-all.sh
        ├── sim_timing.h
        └── softbrain.hpp


/.gitignore:
--------------------------------------------------------------------------------
 1 | ss-tools/
 2 | include/
 3 | 
 4 | # Prerequisites
 5 | *.d
 6 | 
 7 | # Compiled Object files
 8 | *.slo
 9 | *.lo
10 | *.o
11 | *.obj
12 | 
13 | # Precompiled Headers
14 | *.gch
15 | *.pch
16 | 
17 | # Compiled Dynamic libraries
18 | *.so
19 | *.dylib
20 | *.dll
21 | 
22 | # Fortran module files
23 | *.mod
24 | *.smod
25 | 
26 | # Compiled Static libraries
27 | *.lai
28 | *.la
29 | *.a
30 | *.lib
31 | 
32 | # Executables
33 | *.exe
34 | *.out
35 | *.app
36 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | BSD 3-Clause License
 2 | 
 3 | Copyright (c) 2017, PolyArch
 4 | All rights reserved.
 5 | 
 6 | Redistribution and use in source and binary forms, with or without
 7 | modification, are permitted provided that the following conditions are met:
 8 | 
 9 | * Redistributions of source code must retain the above copyright notice, this
10 |   list of conditions and the following disclaimer.
11 | 
12 | * Redistributions in binary form must reproduce the above copyright notice,
13 |   this list of conditions and the following disclaimer in the documentation
14 |   and/or other materials provided with the distribution.
15 | 
16 | * Neither the name of the copyright holder nor the names of its
17 |   contributors may be used to endorse or promote products derived from
18 |   this software without specific prior written permission.
19 | 
20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
24 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
26 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
27 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | PHONY: default
 2 | default: build-all
 3 | 
 4 | include msg.mk
 5 | 
 6 | SBLIBS = $(addprefix softbrain-, config scheduler emu)
 7 | 
 8 | MODULES = $(SBLIBS)  
 9 | CLEAN_MODULES = $(addprefix clean-,$(MODULES))
10 | 
11 | .PHONY: $(MODULES) $(CLEAN_MODULES)
12 | 
13 | .PHONY: build-all
14 | build-all: $(MODULES)
15 | 
16 | .PHONY: clean-all
17 | clean-all: $(CLEAN_MODULES)
18 | 
19 | SIMPLE = $(SBLIBS)
20 | 
21 | $(SIMPLE):
22 | 	$(MAKE) -C $@ install
23 | 
24 | $(addprefix clean-,$(SIMPLE)):
25 | 	$(MAKE) -C $(patsubst clean-%,%,$@) clean
26 | 
27 | $(addprefix clean-,$(AUTOTOOLS)):
28 | 	rm -rf $(patsubst clean-%,%,$@)/build
29 | 
30 | # Dependencies
31 | softbrain-scheduler: softbrain-config
32 | softbrain-emu: softbrain-scheduler softbrain-config
33 | 
34 | 
35 | full-rebuild:
36 | 	@echo "Wipe \$$SS_TOOLS ($$SS_TOOLS) and rebuild everything?"
37 | 	@read -p "[Y/n]: " yn && { [ -z $$yn ] || [ $$yn = Y ] || [ $$yn = y ]; }
38 | 	rm -rf "$$SS_TOOLS"
39 | 	$(MAKE) clean-all
40 | 	$(MAKE) build-all
41 | 
42 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Repo Deprecated
 2 | 
 3 | This content of this repo, including the stream-dataflow ISA implementation, has been moved to: 
 4 | 
 5 | github.com/PolyArch/stream-specialization-stack
 6 | 
 7 | The new repository contains example workloads, a compiler, and a gem5-based simulator.
 8 | 
 9 | 
10 | # stream-dataflow
11 | Stream-Dataflow Infrastructure
12 | 
13 | This is the location of the public release of the infrastructure for the stream dataflow architecture.  
14 | Please keep in mind this is an early stage release, and more advanced features will follow.
15 | 
16 | Description of Folders
17 | * softbrain-config: Library for Defining Accelerator Substrate Topology, Features, and Instructions
18 | * softbrain-emu: Library for software emulation of softbrain
19 | * sb-scheduler: Library for parsing and scheduling dataflow graphs to a particular topology.  (right now only includes emulator code)
20 | * gem5: (To be included)
21 | * workloads: Example workloads including kernels based on diannao parallelization strategy. 
22 | 
23 | 
24 | # Try it:
25 | ```bash
26 | source setup.sh
27 | make -j8
28 | cd workloads/diannao
29 | make -j8
30 | bash run-tests.sh
31 | ```
32 | 


--------------------------------------------------------------------------------
/msg.mk:
--------------------------------------------------------------------------------
 1 | define env-msg
 2 | 
 3 | The following environment variables must be defined
 4 |   SS_STACK (suggested: $(PWD))
 5 |   SS_TOOLS (suggested: $$SS_STACK/ss-tools)
 6 | 
 7 | Additionally, $$SS_TOOLS/bin must be in your $$PATH.
 8 | 
 9 | endef
10 | 
11 | ifeq ($(SS_TOOLS),)
12 | $(error $(env-msg))
13 | endif
14 | 
15 | ifeq ($(SS_STACK),)
16 | $(error $(env-msg))
17 | endif
18 | 
19 | ifeq ($(findstring $(SS_TOOLS)/bin,$(PATH)),)
20 | $(error $(env-msg))
21 | endif
22 | 


--------------------------------------------------------------------------------
/setup.sh:
--------------------------------------------------------------------------------
1 | export SS_STACK=`pwd`
2 | export SS_TOOLS=$SS_STACK/ss-tools
3 | export PATH=$SS_TOOLS/bin:$PATH
4 | export LD_LIBRARY_PATH=$SS_TOOLS/lib:$LD_LIBRARY_PATH
5 | 
6 | 
7 | 


--------------------------------------------------------------------------------
/softbrain-config/Makefile:
--------------------------------------------------------------------------------
 1 | include $(SS_STACK)/msg.mk
 2 | prefix:=$(SS_TOOLS)
 3 | 
 4 | 
 5 | level=./
 6 | include make.config
 7 | 
 8 | 
 9 | 
10 | .PHONY: program
11 | 
12 | all: directories program
13 | 
14 | program:
15 | 	+make -C src
16 | 	
17 | install: program
18 | 	${MKDIR_P} ${prefix}/lib
19 | 	cp ${build}/lib/* ${prefix}/lib
20 | 	${MKDIR_P} ${prefix}/include/softbrain-config
21 | 	cp src/*.h ${prefix}/include/softbrain-config/
22 | 	cp -rf configs ${prefix}/
23 | 
24 | clean:
25 | 	make -C src clean
26 | 
27 | include make.rules
28 | 


--------------------------------------------------------------------------------
/softbrain-config/configs/diannao_simd64.sbmodel:
--------------------------------------------------------------------------------
 1 | [fu-model]
 2 | # fu_types and capabilities
 3 | # Number after colon specifies encoding
 4 | FU_TYPE FU_MUL:  Mul16x4:2, Mul32x2:3, Mul64:4, RShf64:5, LShf64:6, FMul32x2: 7, Div16x4: 8, FxMul16x4: 9
 5 | FU_TYPE FU_ADD:  Add16x4:3, Red16x4:4, HAdd16x4:5, RShf4_16x4:6, RShf2_16x4:7, Add32x2:8, Red32x2:9, Add64:10,  RShf64:11, Sub16x4:12, Abs16x4:13, Sub64:14, Max16x4:15, Min16x4:16, SMax16x4:17, SMin16x4:18, RedMax16x4:19, RedMin16x4:20, RedSMax16x4:21, RedSMin16x4:22, Select:23, And:24, Or:25, Xor:26, LShf64:27, Acc64:28, ICmpEQ:29, Acc16x4:30, FAdd32x2:31, RShf16x4:32, FRed32x2: 33, FxAdd16x4: 34, FxRed16x4: 35, FxExp16x4: 36
 6 | 
 7 | FU_TYPE FU_SPC:  RShf4_16x4:6, RShf2_16x4:7, Sig16:8, RShf16x4:32, FxRelu16x4: 53, FxSig16x4: 54, FxTanh16x4: 55
 8 | 
 9 | 
10 | OUT_DIRECTIONS: NE:0 SE:1 SW:2 NW:3
11 | 
12 | [switch-model]
13 | #OUT_DIRECTIONS: N:0 NE:1 E:2 SE:3 S:4 SW:5 W:6 NW:7
14 | IN_DIRECTIONS: N:0 NE:1 E:2 S:3 W:4
15 | 
16 | 
17 | [sub-model]
18 | # DySER 8x8 Hetero Model File
19 | topology: grid
20 | width: 5
21 | height: 4
22 | 
23 | io_layout: three_sides_in
24 | ins_per_switch: 3
25 | outs_per_switch: 3
26 | 
27 | # Fully Specified Layout
28 | SB_LAYOUT: FULL
29 | FU_MUL FU_MUL FU_MUL FU_MUL FU_MUL 
30 | FU_MUL FU_ADD FU_ADD FU_ADD FU_ADD 
31 | FU_MUL FU_ADD FU_ADD FU_ADD FU_ADD
32 | FU_MUL FU_ADD FU_ADD FU_SPC FU_SPC
33 | 
34 | #FU_ADD FU_MUL FU_ADD FU_MUL     FU_ADD FU_MUL FU_ADD FU_MUL
35 | #FU_MUL FU_ADD FU_MUL FU_ADD     FU_MUL FU_ADD FU_MUL FU_ADD
36 | #FU_ADD FU_MUL FU_ADD FU_MUL     FU_ADD FU_MUL FU_ADD FU_MUL
37 | #FU_MUL FU_ADD FU_MUL FU_ADD     FU_MUL FU_ADD FU_MUL FU_ADD
38 | #FU_ADD FU_MUL FU_ADD FU_MUL     FU_ADD FU_MUL FU_ADD FU_MUL
39 | #FU_MUL FU_ADD FU_MUL FU_ADD     FU_MUL FU_ADD FU_MUL FU_ADD
40 | #FU_ADD FU_MUL FU_ADD FU_MUL     FU_ADD FU_MUL FU_ADD FU_MUL
41 | #FU_MUL FU_ADD FU_MUL FU_SIG     FU_MUL FU_ADD FU_MUL FU_SIG
42 | 
43 | [io-model]
44 | #vector ports specify portno:vec_offset1 vec_offset2
45 | VPORT_IN 0:  2:0, 5:1, 8:2, 11:3, 17:4, 20:5, 23:6, 26:7 #standard 8-wide port
46 | VPORT_IN 1:  4:0, 7:1, 10:2, 16:3, 19:4, 22:5, 25:6, 31:7 #standard 8-wide port
47 | VPORT_IN 2:  4:0, 10:1, 19:2, 25:3  #4-wide 
48 | VPORT_IN 3:  7:0, 18:1, 22:2, 31:3  #4-wide
49 | VPORT_IN 4:  3:0  #1 2 3 4 5 6 7 #8-deep output Port
50 | VPORT_IN 5:  6:0  #1 2 3 4 5 6 7 #8-deep output Port
51 | VPORT_IN 6:  12:0 #1 2 3 4 5 6 7 #8-deep output Port
52 | VPORT_IN 7:  15:0 #1 2 3 4 5 6 7 #8-deep output Port
53 | VPORT_IN 8:  18:0 #1 2 3 4 5 6 7 #8-deep output Port
54 | VPORT_IN 9:  24:0 #1 2 3 4 5 6 7 #8-deep output Port
55 | VPORT_IN 10: 27:0 #1 2 3 4 5 6 7 #8-deep output Port
56 | VPORT_IN 10: 32:0 #1 2 3 4 5 6 7 #8-deep output Port
57 | VPORT_IN 11: 4:0, 7:1,  #2-wide 
58 | VPORT_IN 12: 10:0, 16:1,  #2-wide
59 | VPORT_IN 13: 19:0, 22:1,  #2-wide 
60 | VPORT_IN 14: 25:0, 31:1,  #2-wide
61 | VPORT_IN 15: 8:0, 20:1,  #2-wide
62 | VPORT_IN 16:  2:0, 8:1, 17:2, 23:3  #4-wide
63 | VPORT_IN 17:  5:0, 11:1, 20:2, 26:3  #4-wide
64 | VPORT_IN 18:  3:0, 12:1, 18:2, 27:3  #4-wide
65 | VPORT_IN 19: 3:0, 18:1,  #2-wide
66 | VPORT_IN 20: 6:0, 24:1,  #2-wide
67 | VPORT_IN 21: 12:0, 15:1,  #2-wide
68 | VPORT_IN 22: 27:0, 32:1,  #2-wide
69 | 
70 |   
71 | 
72 | 
73 | VPORT_OUT 0:  1:0, 3:1, 5:2, 6:3, 8:4, 9:5, 11:6, 12:7 #8-wide output Port
74 | VPORT_OUT 1:  2:0, 7:1, 10:2, 13:3 #4-wide output Port
75 | VPORT_OUT 2:  0:0  #1 2 3 4 5 6 7 #8-deep output Port
76 | VPORT_OUT 3:  2:0 #1 2 3 4 5 6 7 #8-deep output Port
77 | VPORT_OUT 4:  4:0 #1 2 3 4 5 6 7 #8-deep output Port
78 | VPORT_OUT 5:  6:0 #1 2 3 4 5 6 7 #8-deep output Port
79 | VPORT_OUT 6:  8:0 #1 2 3 4 5 6 7 #8-deep output Port
80 | VPORT_OUT 7:  10:0 #1 2 3 4 5 6 7 #8-deep output Port
81 | VPORT_OUT 8:  12:0 #1 2 3 4 5 6 7 #8-deep output Port
82 | VPORT_OUT 8:  1:0, 3:1 
83 | VPORT_OUT 9:  5:0, 7:1
84 | VPORT_OUT 10: 9:0, 11:1
85 | VPORT_OUT 11: 13:0, 14:1
86 | 
87 | #PORT_IN 0: 17 18 19 #any of these
88 | #PORT_IN 1: 20 21 22 #any of these
89 | #PORT_OUT 0: 17 18 19 20 21 22 #any of these
90 | #PORT_OUT 1: 25 26 27 #any of these
91 | 
92 | 
93 | 


--------------------------------------------------------------------------------
/softbrain-config/make.config:
--------------------------------------------------------------------------------
 1 | MKDIR_P = mkdir -p
 2 | 
 3 | SYS = $(shell sys)
 4 | CXX = g++
 5 | CXXFLAGS :=  -Wall -g -std=c++11 -O3 -lm
 6 | 
 7 | build ?= $(shell pwd)/${level}/build
 8 | prefix ?= $(shell pwd)
 9 | 
10 | 
11 | 
12 | 


--------------------------------------------------------------------------------
/softbrain-config/make.rules:
--------------------------------------------------------------------------------
1 | .PHONY: directories 
2 | 
3 | directories:
4 | 	${MKDIR_P} ${build}/obj
5 | 	${MKDIR_P} ${build}/lib
6 | 


--------------------------------------------------------------------------------
/softbrain-config/src/Makefile:
--------------------------------------------------------------------------------
 1 | level=../
 2 | include ${level}/make.config
 3 | 
 4 | SOURCES= model.cpp model_parsing.cpp fu_model.cpp sub_model.cpp direction.cpp sbinst.cpp
 5 | PRE_OBJECTS=$(SOURCES:.cpp=.o)
 6 | INST_MODEL_FILE=full.sbinst
 7 | 
 8 | LIB_DEST=${build}/lib
 9 | OBJ_DEST=${build}/obj
10 | 
11 | PRE_OBJECTS=$(SOURCES:.cpp=.o)
12 | OBJECTS = $(patsubst %,$(OBJ_DEST)/%,$(PRE_OBJECTS))
13 | 
14 | CXXFLAGS += -fPIC
15 | 
16 | all: directories sbinst.cpp $(LIB_DEST)/libsbconfig.a $(LIB_DEST)/libsbconfig.so 
17 | 
18 | $(LIB_DEST)/libsbconfig.a: $(OBJECTS)
19 | 	ar crs $@ $^
20 |     
21 | $(LIB_DEST)/libsbconfig.so: $(OBJECTS)
22 | 	$(CXX) $(CXXFLAGS) -MD -shared -o $@ $^ 
23 | 
24 | sbinst.cpp: sbinst.h insts/*.h
25 | 	@echo "done"
26 | 
27 | sbinst.h: inst_model.cpp inst_model.h model_parsing.cpp insts/*.h $(INST_MODEL_FILE)
28 | 	$(CXX) $(CXXFLAGS) -MD inst_model.cpp model_parsing.cpp -o inst_model
29 | 	./inst_model $(INST_MODEL_FILE) sbinst.h sbinst.cpp
30 |     
31 | $(OBJ_DEST)/%.o: %.cpp %.h sbinst.h
32 | 	$(CXX) $(CXXFLAGS) -MD -c -o $@ $<
33 | 
34 | $(OBJ_DEST)/sbinst.o: sbinst.cpp sbinst.h
35 | 	$(CXX) $(CXXFLAGS) -MD -c -o $@ $<
36 | 
37 | 
38 | .phony: clean
39 | 
40 | clean:
41 | 	-rm -Rf libsbconfig.a *.o sbinst.cpp sbinst.h $(LIB_DEST)/*.a $(LIB_DEST)/*.so $(OBJ_DEST)/*.o *.d
42 | 
43 | include ${level}/make.rules
44 | 
45 | 


--------------------------------------------------------------------------------
/softbrain-config/src/direction.cpp:
--------------------------------------------------------------------------------
  1 | #include "direction.h"
  2 | #include "model_parsing.h"
  3 | 
  4 | #include <stdio.h>
  5 | #include <string.h>
  6 | #include <assert.h>
  7 | 
  8 | using namespace SB_CONFIG;
  9 | using namespace std;
 10 | 
 11 | SbDIR::SbDIR() {
 12 | 
 13 |   //Adding the encoding for each direction
 14 |   add_encode(SbDIR::N,3);
 15 |   add_encode(SbDIR::NE,4);
 16 |   add_encode(SbDIR::E,5);
 17 |   add_encode(SbDIR::SE,6);
 18 |   add_encode(SbDIR::S,7);
 19 |   add_encode(SbDIR::SW,0);
 20 |   add_encode(SbDIR::W,1);
 21 |   add_encode(SbDIR::NW,2);
 22 |  
 23 |   //The following functions map the input directions and correspoding tuple
 24 |   //to an index 
 25 | 
 26 |   //TOP bottom left right
 27 |   add_encode(SbDIR::IP0, std::make_tuple(true,false,false,false),encode(SbDIR::NW)); 
 28 |   add_encode(SbDIR::IP1, std::make_tuple(true,false,false,false),encode(SbDIR::N));
 29 |   add_encode(SbDIR::IP2, std::make_tuple(true,false,false,false),encode(SbDIR::NE));
 30 |   
 31 |   //top bottom LEFT right
 32 |   add_encode(SbDIR::IP0, std::make_tuple(false,false,true,false),encode(SbDIR::SW)); 
 33 |   add_encode(SbDIR::IP1, std::make_tuple(false,false,true,false),encode(SbDIR::W));
 34 |   add_encode(SbDIR::IP2, std::make_tuple(false,false,true,false),encode(SbDIR::NW));
 35 |   
 36 |   //top bottoSbleft RIGHT
 37 |   add_encode(SbDIR::IP0, std::make_tuple(false,false,false,true),encode(SbDIR::SE)); 
 38 |   add_encode(SbDIR::IP1, std::make_tuple(false,false,false,true),encode(SbDIR::E));
 39 |   add_encode(SbDIR::IP2, std::make_tuple(false,false,false,true),encode(SbDIR::NE));
 40 |   
 41 |   //top BOTTOSbleft right
 42 |   add_encode(SbDIR::IP0, std::make_tuple(false,true,false,false),encode(SbDIR::SW)); 
 43 |   add_encode(SbDIR::IP1, std::make_tuple(false,true,false,false),encode(SbDIR::S));
 44 |   add_encode(SbDIR::IP2, std::make_tuple(false,true,false,false),encode(SbDIR::SE));
 45 | }
 46 | 
 47 | 
 48 | int SbDIR::encode(DIR myDir) {
 49 |   return encode(myDir,false,false,false,false);
 50 | }
 51 | 
 52 | //preferred directions
 53 | void set_pref_dirs(bool& top,bool& bottom, bool& left, bool& right) {
 54 |   if(top && right)   {right=false;} //top
 55 |   if(top && left)    {left=false;}  //top
 56 |   if(bottom && left) {bottom=false;} //left
 57 |   if(bottom && right){bottom=false;} //right
 58 | }
 59 | 
 60 | //returns index of the direction and tuple using the io_enc map
 61 | int SbDIR::encode(DIR myDir, bool top, bool bottom, bool left, bool right) {
 62 |   set_pref_dirs(top,bottom,left,right);
 63 |   std::pair<DIR,epos> pair = make_pair(myDir,epos(top,bottom,left,right));
 64 |   assert(io_enc.count(pair));
 65 |   return io_enc[pair];
 66 | }
 67 | 
 68 | //decode func with index and tuple returning the direction
 69 | SbDIR::DIR SbDIR::decode(int i, bool top, bool bottom, bool left, bool right) {
 70 |   set_pref_dirs(top,bottom,left,right);
 71 | 
 72 |   std::pair<int,epos> pair = make_pair(i,epos(top,bottom,left,right));
 73 |   assert(io_dec.count(pair));
 74 |   return io_dec[pair];
 75 | }
 76 | 
 77 | //position of output direction
 78 | //TODO: generalize for more than one output side
 79 | int SbDIR::slot_for_dir(DIR myDir, bool top, bool bottom, bool left, bool right) {
 80 |   set_pref_dirs(top,bottom,left,right);
 81 | 
 82 |   if(isOutputDir(myDir)) {
 83 |     myDir=reverse(myDir,true);
 84 |   }
 85 |   return encode(myDir, top, bottom, left, right);
 86 | }
 87 | 
 88 | SbDIR::DIR SbDIR::dir_for_slot(int index, bool top, bool bottom, bool left, bool right) {
 89 |   set_pref_dirs(top,bottom,left,right);
 90 | 
 91 |   SbDIR::DIR myDir = decode(index, top, bottom, left, right);
 92 |   if(isInputDir(myDir)) {
 93 |     myDir=reverse(myDir,true);
 94 |   }
 95 |   return myDir;
 96 | }
 97 | 
 98 | int SbDIR::encode_fu_dir(DIR myDir) {
 99 |   switch(myDir) {
100 |     case NE:  return  1;
101 |     case SE:  return  2;
102 |     case SW:  return  3;
103 |     case NW:  return  4;
104 |     case IM:  return  5;
105 |     default:  assert(0 && "no encoding");
106 |   }
107 |   assert(0 && "not reachable");
108 | }
109 | 
110 | SbDIR::DIR SbDIR::fu_dir_of(int i) {
111 |   switch(i) {
112 |     case 0:  return  END_DIR;
113 |     case 1:  return  NE;
114 |     case 2:  return  SE;
115 |     case 3:  return  SW;
116 |     case 4:  return  NW;
117 |     case 5:  return  IM;
118 |     default:  assert(0);
119 |   }
120 |   assert(0 && "not reachable");
121 | }
122 | 
123 | //  switch(myDir) {
124 | //    case N:   return  3;
125 | //    case NE:  return  4;
126 | //    case E:   return  5;
127 | //    case SE:  return  6;
128 | //    case S:   return  7;
129 | //    case SW:  return  0;
130 | //    case W:   return  1;
131 | //    case NW:  return  2;
132 | //    case OP0: return  pos_of(SW);
133 | //    case OP1: return  pos_of(S);
134 | //    case OP2: return  pos_of(SE);
135 | //    default:  assert(0);
136 | //  }
137 | 
138 | 
139 | 
140 | SbDIR::DIR SbDIR::toDir(string qs, bool outgoing) {
141 |     if (false) return END_DIR;
142 |     else if(ModelParsing::StartsWith(qs,"NW")) return outgoing ? NW  : reverse(NW);  
143 |     else if(ModelParsing::StartsWith(qs,"NE")) return outgoing ? NE  : reverse(NE);
144 |     else if(ModelParsing::StartsWith(qs,"SE")) return outgoing ? SE  : reverse(SE);
145 |     else if(ModelParsing::StartsWith(qs,"SW")) return outgoing ? SW  : reverse(SW);
146 |     else if (ModelParsing::StartsWith(qs,"N" )) return outgoing ? N   : reverse(N);
147 |     else if(ModelParsing::StartsWith(qs,"E" )) return outgoing ? E   : reverse(E);
148 |     else if(ModelParsing::StartsWith(qs,"S" )) return outgoing ? S   : reverse(S);
149 |     else if(ModelParsing::StartsWith(qs,"W" )) return outgoing ? W   : reverse(W);
150 |     else if(ModelParsing::StartsWith(qs,"P0")) return outgoing ? OP0 : IP0;  
151 |     else if(ModelParsing::StartsWith(qs,"P1")) return outgoing ? OP1 : IP1;
152 |     else if(ModelParsing::StartsWith(qs,"P2")) return outgoing ? OP2 : IP2;  
153 |     else if(ModelParsing::StartsWith(qs,"IM")) return IM;
154 |     return END_DIR;
155 | }
156 | 
157 | 
158 | //returns the reverse direction of DIR
159 | SbDIR::DIR SbDIR::reverse(DIR myDir, bool reverseIO) {
160 |   switch(myDir) {
161 |     case N:   return S;
162 |     case NE:  return SW;
163 |     case E:   return W;
164 |     case SE:  return NW;
165 |     case S:   return N;
166 |     case SW:  return NE;
167 |     case W:   return E;
168 |     case NW:  return SE;
169 |     default: {
170 |       if(reverseIO) {
171 |         switch(myDir) {
172 |           case IP0: return OP0;
173 |           case IP1: return OP1;
174 |           case IP2: return OP2;
175 |           case OP0: return IP0;
176 |           case OP1: return IP1;
177 |           case OP2: return IP2;
178 |           default: assert(0); return myDir;
179 |         }
180 |       } 
181 |       assert(isInputDir(myDir) && !isOutputDir(myDir));      
182 |       return myDir; //don't reverse
183 |     }
184 |   }
185 | }
186 | 
187 | const char*  SbDIR::dirNameDBG(SbDIR::DIR myDir, bool reverse) {
188 |   if(isInputDir(myDir) || isOutputDir(myDir)) {
189 |   switch(reverse ? SbDIR::reverse(myDir) : myDir) {
190 |     case SbDIR::IP0:
191 |         return "IP0";
192 |         break;
193 |     case SbDIR::IP1:
194 |         return "IP1";
195 |         break;
196 |     case SbDIR::IP2:
197 |         return "IP2";
198 |         break;
199 |     case SbDIR::OP0:
200 |         return "IP0";
201 |         break;
202 |     case SbDIR::OP1:
203 |         return "IP1";
204 |         break;
205 |     case SbDIR::OP2:
206 |         return "IP2";
207 |         break;
208 |     case SbDIR::IM:
209 |         return "IM";
210 |         break;
211 |     default:
212 |         assert(0);
213 |         break;
214 |     }
215 |   } else {
216 |     return SbDIR::dirName(myDir,reverse);
217 |   }
218 | }
219 | 
220 | 
221 | const char* SbDIR::dirName(SbDIR::DIR myDir, bool reverse) {
222 | 
223 |   switch(reverse ? SbDIR::reverse(myDir) : myDir) {
224 |     case SbDIR::N:
225 |         return "N";
226 |         break;
227 |     case SbDIR::NE:
228 |         return "NE";
229 |         break;
230 |     case SbDIR::E:
231 |         return "E";
232 |         break;
233 |     case SbDIR::SE:
234 |         return "SE";
235 |         break;
236 |     case SbDIR::S:
237 |         return "S";
238 |         break;
239 |     case SbDIR::SW:
240 |         return "SW";
241 |         break;
242 |     case SbDIR::W:
243 |         return "W";
244 |         break;
245 |     case SbDIR::NW:
246 |         return "NW";
247 |         break;
248 |     case SbDIR::IP0:
249 |         return "P0";
250 |         break;
251 |     case SbDIR::IP1:
252 |         return "P1";
253 |         break;
254 |     case SbDIR::IP2:
255 |         return "P2";
256 |         break;
257 |     case SbDIR::OP0:
258 |         return "P0";
259 |         break;
260 |     case SbDIR::OP1:
261 |         return "P1";
262 |         break;
263 |     case SbDIR::OP2:
264 |         return "P2";
265 |         break;
266 |     case SbDIR::IM:
267 |         return "IM";
268 |         break;
269 |     case SbDIR::END_DIR:
270 |         return "xxx";
271 |         break;
272 |     }
273 |     return "???";
274 | }
275 | 


--------------------------------------------------------------------------------
/softbrain-config/src/direction.h:
--------------------------------------------------------------------------------
 1 | #ifndef __SB_DIRECTION_H__
 2 | #define __SB_DIRECTION_H__
 3 | 
 4 | #include <string>
 5 | #include <map>
 6 | #include <unordered_map>
 7 | #include <utility>
 8 | 
 9 | namespace SB_CONFIG {
10 | 
11 |   typedef std::tuple<bool,bool,bool,bool> epos; 
12 | 
13 |   class SbDIR {
14 |   public:
15 |      enum DIR { IP0, IP1, IP2, OP0, OP1, OP2, N, NE, E, SE, S, SW, W, NW, IM, END_DIR };
16 | 
17 |      static bool isInputDir(DIR d)  {return d==IP0 || d==IP1 || d==IP2;}
18 |      static bool isOutputDir(DIR d) {return d==OP0 || d==OP1 || d==OP2;}
19 | 
20 |      static DIR reverse(DIR myDir, bool reverseIO=false);
21 |      static DIR toDir(std::string qs, bool outgoing);
22 |      static const char* dirName(SbDIR::DIR dir, bool reverse=false);
23 |      static const char* dirNameDBG(SbDIR::DIR dir, bool reverse=false);
24 | 
25 |      std::map<std::pair<DIR,epos>,int> io_enc;
26 |      std::map<std::pair<int,epos>,DIR> io_dec;
27 | 
28 |      void add_encode(DIR dir, epos e, int index) {
29 |        io_enc[std::make_pair(dir,e)]=index;
30 |        io_dec[std::make_pair(index,e)]=dir;
31 |      }
32 | 
33 |      //map func to map each direction and its index with
34 |      //all possible tuples
35 |      void add_encode(DIR dir, int index) {
36 |        for(int i = 0; i <= 1; ++i) {
37 |          for(int j = 0; j <= 1; ++j) {
38 |            for(int k = 0; k <= 1; ++k) {
39 |              for(int l = 0; l <= 1; ++l) {
40 |                add_encode(dir,std::make_tuple(i,j,k,l),index);
41 |              }
42 |            }
43 |          }
44 |        }
45 |      }
46 |      
47 |      SbDIR();
48 | 
49 | 
50 |      int encode(DIR i);
51 |      DIR decode(int i, bool top, bool bottom, bool left, bool right);
52 |      DIR dir_for_slot(int i, bool top, bool bottom, bool left, bool right);
53 |      int encode(DIR i, bool top, bool bottom, bool left, bool right);
54 |      int slot_for_dir(DIR i, bool top, bool bottom, bool left, bool right);
55 | 
56 |      int encode_fu_dir(DIR myDir);
57 |      DIR fu_dir_of(int i);
58 | 
59 |   };
60 | }
61 | 
62 | 
63 | 
64 | 
65 | #endif
66 | 


--------------------------------------------------------------------------------
/softbrain-config/src/fixed_point.h:
--------------------------------------------------------------------------------
 1 | // NOTE: The macros below were copied from cambricon/include/fix_common.h
 2 | // in the softbrain-workloads repository
 3 | 
 4 | // In 16-bit integer representation,
 5 | // one bit is reserved for sign,
 6 | // the maximum supported number is 32767,
 7 | // the minimum supported number is -32768.
 8 | // Here FIX_MAX = 32767, and FIX_MIN is chosen to be negative
 9 | // of FIX_MAX instead of -32768 to keep the symmetry.
10 | 
11 | // FIX_TRUNC is to keep the number falling within the range
12 | // between FIX_MIN and FIX_MAX (both inclusively)
13 | #define FIX_MAX ((1 << 15) - 1)
14 | #define FIX_MIN (-FIX_MAX)
15 | #define FIX_TRUNC(x)  (x > FIX_MAX ? FIX_MAX : (x < FIX_MIN ? FIX_MIN : x) )
16 | 
17 | // FRAC_BITS is the number of bits reserved for fractional parts.
18 | // So the integer part has 15 - FRAC_BITS bits.
19 | 
20 | // DELTA is the minimum positive amount that can be represented in this number system.
21 | 
22 | // FLOAT_MAX is the largest real value that can be represented in this number system.
23 | // FLOAT_MIN is the smallest real value that can be represented in this number system.
24 | 
25 | // FLOAT_TRUNC is to keep numbers within the range
26 | // between FLOAT_MIN and FLOAT_MAX (both inclusively)
27 | #define FRAC_BITS 11  // 11 or 12 is recommended
28 | #define DELTA (((double)1.0)/(1 << FRAC_BITS))
29 | #define FLOAT_MAX (FIX_MAX * DELTA)
30 | #define FLOAT_MIN (FIX_MIN * DELTA)
31 | #define FLOAT_TRUNC(x)  (x > FLOAT_MAX ? FLOAT_MAX : (x < FLOAT_MIN ? FLOAT_MIN : x) )
32 | 
33 | // DOUBLE_TO_FIX converts a double number to integer in our fixed representation.
34 | // FIX_TO_DOUBLE converts a integer number to double in our fixed representation.
35 | #define DOUBLE_TO_FIX(x)  ( (int)(FLOAT_TRUNC(x) / DELTA) )
36 | #define FIX_TO_DOUBLE(x) (x * DELTA)
37 | 
38 | // FIX_ADD fixed addition.
39 | // FIX_MINUS fixed subtraction.
40 | // FIX_MUL fixed multiplication.
41 | // FIX_TAN_H fixed tanh, but is right now using tanh from math.h
42 | #define FIX_ADD(a, b) ( FIX_TRUNC( (int)a + (int)b ) )
43 | #define FIX_MINUS(a, b) ( FIX_ADD(a, -b) )
44 | #define FIX_MUL(a, b) ( FIX_TRUNC( ((int)a * (int)b) >> FRAC_BITS ) )
45 | #define FIX_TAN_H(x) ( DOUBLE_TO_FIX(tanh(FIX_TO_DOUBLE(x))) )
46 | 
47 | 


--------------------------------------------------------------------------------
/softbrain-config/src/fu_model.cpp:
--------------------------------------------------------------------------------
  1 | #include <sstream>
  2 | 
  3 | #include "fu_model.h"
  4 | #include "model_parsing.h"
  5 | #include "sbinst.h"
  6 | #include "assert.h"
  7 | 
  8 | using namespace SB_CONFIG;
  9 | using namespace std;
 10 | 
 11 | //FU_type(func_unit_def) capabilities 
 12 | //FU_ADD: Add16x4:1
 13 | 
 14 | FuModel::FuModel(std::istream& istream) {
 15 |     //char line[512];
 16 |     string param,value;
 17 |     
 18 |     while(istream.good())
 19 |     {
 20 |         if(istream.peek()=='[') break;  //break out if done
 21 |         
 22 |         //string line;
 23 |         ModelParsing::ReadPair(istream, param, value);
 24 |         
 25 |         if(param[0]=='#' || value[0]=='#') continue;    //Not a comment
 26 |         
 27 |         if(ModelParsing::StartsWith(param, "FU_TYPE")) {
 28 |           //defining an fu and capabilitty
 29 |           
 30 |           string newtype;
 31 |           
 32 |           std::stringstream ss(param);
 33 |           
 34 |           getline(ss, param, ' ');
 35 |           getline(ss, newtype);
 36 |          
 37 |           func_defs.push_back(func_unit_def(newtype));
 38 |           AddCapabilities(func_defs[func_defs.size()-1], value);
 39 |           
 40 |         } else if(ModelParsing::StartsWith(param, "SWITCH_TYPE")) {
 41 |             //AddCapabilities(*GetFU("SWITCH"), value);
 42 |             assert(0);
 43 |         }
 44 |     }
 45 | }
 46 | 
 47 | func_unit_def* FuModel::GetFUDef(char* fu_cstr)
 48 | {
 49 |     string s(fu_cstr);
 50 |     return GetFUDef(s);
 51 | }
 52 | 
 53 | 
 54 | //Get a functional unit based upon the description string (the name)
 55 | func_unit_def* FuModel::GetFUDef(string& fu_string)
 56 | {
 57 |     for(unsigned i = 0; i < func_defs.size(); ++i)
 58 |     {
 59 |         if(func_defs[i].name().compare(fu_string)==0)
 60 |         {
 61 |             return &func_defs[i];
 62 |         }
 63 |     }
 64 |     return NULL;  //if no fu, return null
 65 | }
 66 | 
 67 | //This function reads line from an ifstream, and gets a param and value,
 68 | //seperated by a ":"
 69 | void FuModel::AddCapabilities(func_unit_def& fu, string& cap_string)
 70 | {
 71 |     
 72 |     stringstream ss(cap_string);
 73 |     string cur_cap;
 74 |     
 75 |     while (getline(ss, cur_cap, ','))
 76 |     {
 77 |         stringstream pss(cur_cap);
 78 |         string cap;
 79 |         string enc_str;
 80 |         
 81 |         getline(pss,  cap, ':');
 82 |         
 83 |         ModelParsing::trim(cap);
 84 |         
 85 |         if(cap.empty()) {
 86 |           return;
 87 |         }
 88 | 
 89 |         if(ModelParsing::stricmp(cap,"ALL")) {
 90 |             for(int i = 0; i < SB_NUM_TYPES; ++i) {
 91 |                 fu.add_cap((sb_inst_t)i);
 92 |             }
 93 |             return;
 94 |         }
 95 | 
 96 |         sb_inst_t sb_inst = inst_from_config_name(cap.c_str());
 97 |         
 98 |         if(sb_inst==SB_NONE || sb_inst==SB_ERR)
 99 |         {
100 |             cerr << "ERROR IN PARSING SOFTBRAIN INSTRUCTION: \"" << cap << "\"\n";
101 |             assert(0);
102 |             return;
103 |         }
104 |         
105 |         fu.add_cap(sb_inst);
106 |         
107 |         if(pss.good()) //then there must be an encoding string
108 |         {
109 |             unsigned encoding;
110 |             pss >> encoding;
111 |             
112 |             fu.set_encoding(sb_inst,encoding);
113 |         }
114 |     }
115 | }
116 | 


--------------------------------------------------------------------------------
/softbrain-config/src/fu_model.h:
--------------------------------------------------------------------------------
 1 | #ifndef __SB_FU_MODEL_H__
 2 | #define __SB_FU_MODEL_H__
 3 | 
 4 | #include <string>
 5 | #include <vector>
 6 | #include <set>
 7 | #include <map>
 8 | #include <assert.h>
 9 | 
10 | #include "sbinst.h"
11 | 
12 | namespace SB_CONFIG {
13 | 
14 | class func_unit_def {
15 | public:
16 |     func_unit_def(std::string name_in) {
17 |         _name = name_in;
18 |     }
19 | 
20 |     std::string name() {return _name;}
21 |     
22 |     void add_cap(sb_inst_t sb_inst) { _cap.insert(sb_inst); }
23 |     void set_encoding(sb_inst_t sb_inst, unsigned i) { 
24 |       if(i==0) {
25 |         assert(0 && "Encoding for Instruction cannot be zero.  Zero is reserved for Blank");
26 |       }
27 |       if(i==1) {
28 |         assert(0 && "Encoding for Instruction cannot be 1.  1 is reserved for Copy");
29 |       }
30 |       _cap2encoding[sb_inst]=i; 
31 |       _encoding2cap[i]=sb_inst;
32 |     }
33 |     
34 |     bool is_cap(sb_inst_t inst) { return _cap.count(inst)>0; }
35 |     unsigned encoding_of(sb_inst_t inst) { 
36 |       if(inst == SB_Copy) {
37 |         return 1;
38 |       } else {
39 |         return _cap2encoding[inst]; 
40 |       }
41 |     }
42 |     
43 |     sb_inst_t inst_of_encoding(unsigned i) {
44 |       if(i==1) {
45 |         return SB_Copy;
46 |       }
47 |       assert(_encoding2cap.count(i));
48 |       return _encoding2cap[i];
49 |     }
50 |     
51 | private:    
52 |     std::string _name;
53 |     std::set<sb_inst_t> _cap;
54 |     std::map<sb_inst_t, unsigned> _cap2encoding;
55 |     std::map<unsigned, sb_inst_t> _encoding2cap;
56 | 
57 |     friend class FuModel;
58 | };
59 | 
60 | class FuModel {
61 |     public:
62 |         FuModel(std::istream& istream);
63 |         func_unit_def* GetFUDef(char*);
64 |         func_unit_def* GetFUDef(std::string& fu_string);
65 |        
66 |     private:
67 |         void AddCapabilities(func_unit_def& fu, std::string& cap_string);
68 |         
69 |         std::vector<func_unit_def> func_defs;
70 |         
71 | };
72 | 
73 | }
74 | 
75 | 
76 | #endif
77 | 


--------------------------------------------------------------------------------
/softbrain-config/src/full.sbinst:
--------------------------------------------------------------------------------
  1 | #Instruction ConfigName      Latency   NumOperands 
  2 | Add16        Add16           1         2
  3 | Mul16        Mul16           1         2
  4 | Sig16        Sig16           3         2
  5 | 
  6 | Add16x4      Add16x4         1         2
  7 | TAdd16x4     TAdd16x4        1         2
  8 | HAdd16x4     HAdd16x4        1         2
  9 | RShf16x4     RShf16x4        1         2
 10 | Sub16x4      Sub16x4         1         2
 11 | Abs16x4      Abs16x4         1         2
 12 | Acc16x4      Acc16x4         1         2
 13 | 
 14 | RShf2_16x4   RShf2_16x4      1         2
 15 | RShf4_16x4   RShf4_16x4      1         2
 16 | 
 17 | Mul16x4      Mul16x4         1         2
 18 | Div16x4      Div16x4         3         2
 19 | Sig16x4      Sig16x4         3         2
 20 | Red16x4      Red16x4         2         2
 21 | 
 22 | Max16x4      Max16x4         1         2
 23 | Min16x4      Min16x4         1         2
 24 | SMax16x4     SMax16x4        1         2
 25 | SMin16x4     SMin16x4        1         2
 26 | RedMax16x4   RedMax16x4      2         2
 27 | RedMin16x4   RedMin16x4      2         2
 28 | RedSMax16x4  RedSMax16x4     2         2
 29 | RedSMin16x4  RedSMin16x4     2         2
 30 | 
 31 | DelayFU      DelayFU         0         1
 32 | 
 33 | Mul32x2      Mul32x2         1         2
 34 | Add32x2      Add32x2         1         2
 35 | Red32x2      Red32x2         2         2
 36 | RShf32x2     RShf32x2        1         2
 37 | 
 38 | Max32x2      Max32x2         1         2
 39 | Min32x2      Min32x2         1         2
 40 | RedMax32x2   RedMax32x2      2         2
 41 | RedMin32x2   RedMin32x2      2         2
 42 | 
 43 | Mul64        Mul64           1         2
 44 | Add64        Add64           1         2
 45 | Sub64        Sub64           1         2
 46 | RShf64       RShf64          1         2
 47 | LShf64       LShf64          1         2
 48 | Max64        Max64           1         2
 49 | Min64        Min64           1         2
 50 | Acc64        Acc64           1         2
 51 | 
 52 | FAdd32x2     FAdd32x2        1         2
 53 | FRed32x2     FRed32x2        2         2
 54 | FMul32x2     FMul32x2        2         2
 55 | 
 56 | FxRelu16x4   FxRelu16x4      1         2
 57 | FxSig16x4    FxSig16x4       3         2
 58 | FxTanh16x4   FxTanh16x4      3         2
 59 | 
 60 | FxAdd16x4    FxAdd16x4       1         2
 61 | FxRed16x4    FxRed16x4       2         2
 62 | FxMul16x4    FxMul16x4       1         2
 63 | FxExp16x4    FxExp16x4       3         2
 64 | 
 65 | FxMul32x2    FxMul32x2       1         2
 66 | FxAdd32x2    FxAdd32x2       1         2
 67 | FxRed32x2    FxRed32x2       2         2
 68 | 
 69 | FAdd64       FAdd64          1         2
 70 | FMul64       FMul64          2         2
 71 | 
 72 | Select       Select          1         3
 73 | And          And             1         2
 74 | Or           Or              1         2
 75 | Xor          Xor             1         2
 76 | 
 77 | Copy         Copy            1         1
 78 | 
 79 | ICmpEQ       ICmpEQ          1         2
 80 | 
 81 | # ----- old ones  (no corresponding impl) ----
 82 | 
 83 | Switch       Switch          1         8
 84 | 
 85 | 
 86 | Add          Add             1         2
 87 | Sub          Sub             1         2
 88 | Mul          Mul             1         2
 89 | UDiv         UDiv            1         2
 90 | SDiv         SDiv            1         2
 91 | URem         URem            1         2
 92 | SRem         SRem            1         2
 93 | IMax         IMax            1         2
 94 | IMin         IMin            1         2
 95 | SMax         SMax            1         2
 96 | SMin         SMin            1         2
 97 | 
 98 | FAdd         FAdd            3         2
 99 | FSub         FSub            3         2
100 | FMul         FMul            3         2
101 | FDiv         FDiv            12        2
102 | FRem         FRem            12        2
103 | Sqrt         Sqrt            12        1
104 | FSin         FSin            24        1
105 | FCos         FCos            24        1
106 | FMax         FMax            3         2
107 | FMin         FMin            3         2
108 | 
109 | SExt         SExt            1         1
110 | 
111 | Shl          Shl             1         2
112 | LShr         LShr            1         2
113 | AShr         AShr            1         2
114 | 
115 | PHI          PHI             1         2
116 | Ternary      Ternary         1         3
117 | 
118 | ICmpNE       ICmp-NE         1         2
119 | ICmpUGT      ICmp-UGT        1         2
120 | ICmpUGE      ICmp-UGE        1         2
121 | ICmpULT      ICmp-ULT        1         2
122 | ICmpULE      ICmp-ULE        1         2
123 | ICmpSGT      ICmp-SGT        1         2
124 | ICmpSGE      ICmp-SGE        1         2
125 | ICmpSLT      ICmp-SLT        1         2
126 | ICmpSLE      ICmp-SLE        1         2
127 | 
128 | FCmpOEQ      FCmp-OEQ        3         2
129 | FCmpONE      FCmp-ONE        3         2
130 | FCmpOGT      FCmp-OGT        3         2
131 | FCmpOGE      FCmp-OGE        3         2
132 | FCmpOLT      FCmp-OLT        3         2
133 | FCmpOLE      FCmp-OLE        3         2
134 | 


--------------------------------------------------------------------------------
/softbrain-config/src/inst_model.cpp:
--------------------------------------------------------------------------------
  1 | #include "inst_model.h"
  2 | 
  3 | #include <string.h>
  4 | #include <stdlib.h>
  5 | #include "model_parsing.h"
  6 | 
  7 | using namespace SB_CONFIG;
  8 | using namespace std;
  9 | 
 10 | //constructor based on input stream
 11 | InstModel::InstModel(char* filename) {
 12 |     
 13 |     ifstream ifs(filename, ios::in);
 14 |     
 15 |     if(ifs.fail()) {
 16 |         cerr << "Could Not Open: " << filename << "\n";
 17 |         return;
 18 |     }
 19 |     
 20 |     char line[512];
 21 |     while(ifs.good())
 22 |     {
 23 |         //string line;
 24 |         ifs.getline(line,512);
 25 |         
 26 |         string str_line=string(line);
 27 |         
 28 |         ModelParsing::trim(str_line);
 29 |         
 30 |         //Empty line or the first line
 31 |         if(str_line[0]=='#' || str_line.empty()) continue;
 32 |         
 33 |         SbInst* inst = new SbInst();
 34 |         
 35 |         char* token;
 36 |         token = strtok (line," ");
 37 |         string str_name(token);
 38 |         inst->setName(str_name);
 39 |         
 40 |         token = strtok (NULL," ");
 41 |         string str_config(token);
 42 |         inst->setConfigName(str_config);
 43 |         
 44 |         token = strtok (NULL, " ");
 45 |         inst->setLatency(atoi(token));
 46 | 
 47 |         token = strtok (NULL, " ");
 48 |         inst->setNumOperands(atoi(token));
 49 | 
 50 |         _instList.push_back(inst);
 51 |     }
 52 |     
 53 |     
 54 | }
 55 | 
 56 | void InstModel::printCFiles(char* header_file, char* cpp_file) {
 57 |     
 58 |   // -------------------------print header file -----------------------------
 59 |     ofstream ofs(header_file, ios::out);
 60 |     ofs <<
 61 |     "//This file generated from inst_model.cpp -- Do not edit.  Do not commit to repo.\n"
 62 |     "#ifndef __SB_INST_H__\n"
 63 |     "#define __SB_INST_H__\n"
 64 |     "\n"
 65 |     "#include <string>\n"
 66 |     "#include <string.h>\n"
 67 |     "#include <cstring>\n"
 68 |     "#include <assert.h>\n"
 69 |     "#include <vector>\n"
 70 |     "#include <xmmintrin.h>\n"
 71 |     "#include <math.h>\n"
 72 |     "#include \"fixed_point.h\"\n"
 73 |     "\n"
 74 |     "namespace SB_CONFIG {\n"
 75 |     "\n"
 76 | 
 77 |     "float    as_float(std::uint32_t ui);\n"
 78 |     "uint32_t as_uint32(float f);\n"
 79 |     "\n"
 80 |     "double    as_double(std::uint64_t ui);\n"
 81 |     "uint64_t as_uint64(double f);\n"
 82 |     "\n"
 83 | 
 84 | 
 85 |     "enum sb_inst_t {\n"
 86 |     "SB_NONE=0,\n"
 87 |     "SB_ERR,\n";
 88 |     
 89 |     for(unsigned i = 0; i < _instList.size(); ++i) {
 90 |         ofs << "SB_" << _instList[i]->name() << ", \n";
 91 |     };
 92 |     
 93 |     ofs << "SB_NUM_TYPES\n};\n";
 94 | 
 95 |     ofs << "\n";
 96 |     ofs << "extern int num_ops[" << _instList.size()+2 << "];\n";
 97 | 
 98 |     ofs << 
 99 |     "\n"
100 |     "sb_inst_t inst_from_string(const char* str);\n"
101 |     "sb_inst_t inst_from_config_name(const char* str);\n"
102 |     "const char* name_of_inst(sb_inst_t inst);\n"
103 |     "const char* config_name_of_inst(sb_inst_t inst);\n"
104 |     "int inst_lat(sb_inst_t inst);\n"
105 |     "uint64_t execute(sb_inst_t inst, std::vector<uint64_t>& ops, uint64_t& accum);\n"
106 |     "\n"
107 |     "};\n\n"
108 |     "#endif\n";
109 |     
110 |     ofs.close();
111 |     
112 |     // -------------------------print cpp file --------------------------------
113 |     {
114 |     
115 |     ofstream ofs(cpp_file, ios::out);
116 |     
117 |     // inst_from_string
118 |     ofs << 
119 |     "//This file generated from inst_model.cpp -- Do not edit.  Do not commit to repo.\n"
120 |     "#include \"" << header_file << "\"\n\n"
121 | 
122 |     "float SB_CONFIG::as_float(std::uint32_t ui) {\n"
123 |     "  float f;\n"
124 |     "  std::memcpy(&f, &ui, sizeof(float));\n"
125 |     "  return f;\n"
126 |     "}\n"
127 |     "\n"
128 | 
129 |     "uint32_t SB_CONFIG::as_uint32(float f) {\n"
130 |     "  uint32_t ui;\n"
131 |     "  std::memcpy(&ui, &f, sizeof(float));\n"
132 |     "  return ui;\n"
133 |     "}\n"
134 |     "\n"
135 | 
136 |     "double SB_CONFIG::as_double(std::uint64_t ui) {\n"
137 |     "  float f;\n"
138 |     "  std::memcpy(&f, &ui, sizeof(float));\n"
139 |     "  return f;\n"
140 |     "}\n"
141 |     "\n"
142 | 
143 |     "uint64_t SB_CONFIG::as_uint64(double f) {\n"
144 |     "  uint32_t ui;\n"
145 |     "  std::memcpy(&ui, &f, sizeof(double));\n"
146 |     "  return ui;\n"
147 |     "}\n"
148 |     "\n"
149 | 
150 | 
151 |     "using namespace SB_CONFIG;\n\n"
152 |     "sb_inst_t SB_CONFIG::inst_from_string(const char* str) {\n"
153 |     "  if(strcmp(str,\"NONE\")==0) return SB_NONE;\n";
154 |     
155 |     for(unsigned i = 0; i < _instList.size(); ++i) {
156 |         ofs << "  else if(strcmp(str,\"" << _instList[i]->name() << "\")==0) return SB_" << _instList[i]->name() << ";\n";
157 |     }
158 |     ofs << "  else return SB_ERR;\n\n";
159 |     
160 |     ofs << "}\n\n";
161 |     
162 |     
163 |     // inst_from_config_name
164 |     ofs << 
165 |     "sb_inst_t SB_CONFIG::inst_from_config_name(const char* str) {\n"
166 |     "  if(strcmp(str,\"NONE\")==0) return SB_NONE;\n";
167 |     for(unsigned i = 0; i < _instList.size(); ++i) {
168 |         ofs << "  else if(strcmp(str,\"" << _instList[i]->configName() << "\")==0) return SB_" << _instList[i]->name() << ";\n";
169 |     }
170 |     ofs << "  else return SB_ERR;\n\n";
171 |     
172 |     ofs << "}\n\n";
173 |     
174 |     // Properties of Instructions
175 |     
176 |     // name_of_inst
177 |     ofs << 
178 |     "const char* SB_CONFIG::name_of_inst(sb_inst_t inst) {\n"
179 |     "  switch(inst) {\n";
180 |     for(unsigned i = 0; i < _instList.size(); ++i) {
181 |         ofs << "    case " << "SB_" << _instList[i]->name() << ": return \"" << _instList[i]->name() << "\";\n";
182 |     }
183 |     ofs << "case SB_NONE: return \"NONE\";\n";
184 |     ofs << "case SB_ERR:  assert(0); return \"ERR\";\n";
185 |     ofs << "case SB_NUM_TYPES:  assert(0); return \"ERR\";\n";
186 |     ofs << "    default: assert(0); return \"DEFAULT\";\n";
187 |     ofs << "  }\n\n";
188 |     ofs << "}\n\n";
189 | 
190 |     // config_name_of_inst
191 |     ofs <<
192 |     "const char* SB_CONFIG::config_name_of_inst(sb_inst_t inst) {\n"
193 |     "  switch(inst) {\n";
194 |     for(unsigned i = 0; i < _instList.size(); ++i) {
195 |         ofs << "    case " << "SB_" << _instList[i]->name() << ": return \"" << _instList[i]->configName() << "\";\n";
196 |     }
197 | 
198 |     ofs << "    case SB_NONE: return \"NONE\";\n";
199 |     ofs << "    case SB_ERR:  assert(0); return \"ERR\";\n";
200 |     ofs << "    case SB_NUM_TYPES:  assert(0); return \"ERR\";\n";
201 |     ofs << "    default: assert(0); return \"DEFAULT\";\n";
202 |     ofs << "  }\n\n";
203 |     ofs << "}\n\n";
204 |     
205 |     //FUNCTION: inst_lat (this really should have just used an array...)
206 |     ofs <<
207 |     "int SB_CONFIG::inst_lat(sb_inst_t inst) {\n"
208 |     "  switch(inst) {\n";
209 |     for(unsigned i = 0; i < _instList.size(); ++i) {
210 |         ofs << "    case " << "SB_" << _instList[i]->name() << ": return " << _instList[i]->latency() << ";\n";
211 |     }
212 |     ofs << "    default: return 1;\n";
213 |     ofs << "  }\n\n";
214 |     ofs << "}\n\n";
215 | 
216 |     // num_ops_array
217 |     ofs << "int SB_CONFIG::num_ops[" << _instList.size()+2 << "]={0, 0\n";
218 |     ofs << "\t\t\t\t\t\t\t\t\t\t\t\t\t\t";
219 |     for(unsigned i = 0; i < _instList.size(); ++i) {
220 |       ofs << ", " << _instList[i]->numOps();
221 |       if(i%16==15) {
222 |         ofs << "\n";
223 |         ofs << "\t\t\t\t\t\t\t\t\t\t\t\t\t\t";
224 |       }
225 |     }
226 |     ofs << "};\n\n";
227 | 
228 | 
229 |     //FUNCTION: execute()
230 |     ofs <<
231 |     "uint64_t SB_CONFIG::execute(sb_inst_t inst, std::vector<uint64_t>& ops, uint64_t& accum) {\n";
232 | 
233 |     ofs <<  //this is an implementation of pass through
234 |     "  assert(ops.size() <= 3); \n" 
235 |     "  assert(ops.size() <=  (unsigned)(num_ops[inst]+1)); \n" 
236 |     "  if((ops.size() > (unsigned)num_ops[inst]) && (ops[ops.size()] == 0)) { \n"
237 |     "    return ops[0];\n"
238 |     "  }\n"
239 | 
240 |     "  switch(inst) {\n";
241 |     for(unsigned i = 0; i < _instList.size(); ++i) {
242 |         ofs << "    case " << "SB_" << _instList[i]->name() << ": {";
243 |         string inst_code_name = "insts/" + _instList[i]->name() + ".h";
244 |         ifstream f(inst_code_name.c_str());
245 | 
246 |         if (f.good()) {
247 |           std::string line;
248 |           ofs << "\n";
249 |           while (std::getline(f, line)) {
250 |             ofs << "      " << line << "\n";
251 |           }
252 |           ofs << "    };\n";
253 |         } else {
254 |           ofs << "assert(0 && \"Instruction Not Implemented\");";
255 |           ofs << "};\n";
256 |         }
257 |     }
258 |     ofs << "    default: assert(0); return 1;\n";
259 |     ofs << "  }\n\n";
260 |     ofs << "}\n\n";
261 | 
262 |    
263 |     ofs.close();
264 |     
265 |     }
266 | }
267 | 
268 | int main(int argc, char** argv)
269 | {
270 |     if(argc!=4) {
271 |         std::cout << "Usage:\n inst_model [input file] [header file] [cpp file]\n";
272 |         return 1;
273 |     }
274 | 
275 |     InstModel* instModel = new InstModel(argv[1]);
276 |     instModel->printCFiles(argv[2],argv[3]);
277 |     return 0;
278 | }
279 | 
280 | 


--------------------------------------------------------------------------------
/softbrain-config/src/inst_model.h:
--------------------------------------------------------------------------------
 1 | #ifndef __SB_INST_MODEL_H__
 2 | #define __SB_INST_MODEL_H__
 3 | 
 4 | #include <string>
 5 | #include <vector>
 6 | #include <iostream>
 7 | #include <fstream>
 8 | 
 9 | namespace SB_CONFIG {
10 | 
11 | // SB Instruction Class
12 | // Stores attributes like it's name, latency, etc...
13 | class SbInst {
14 |     public:
15 |         std::string name()               { return _name; }
16 |         void setName(std::string& name) { _name=name; }
17 |         
18 |         std::string configName()               { return _configname; }
19 |         void setConfigName(std::string& name) { _configname=name; }
20 |         
21 |         int latency()               { return _latency; }
22 |         void setLatency(int lat)    { _latency=lat; }
23 | 
24 |         int numOps()                     { return _num_ops; }
25 |         void setNumOperands(int n_ops)    { _num_ops=n_ops; }
26 | 
27 |     private:
28 |         std::string _name;
29 |         std::string _configname;
30 |         int _latency;
31 |         int _num_ops;
32 | };
33 | 
34 | class InstModel {
35 |     public:
36 |         InstModel(char* filename);          //read the file and populate the instructions
37 |         //DyInst* GetDyInstByName(std::string& name);
38 |         
39 |         void printCFiles(char* header, char* cpp);
40 |         
41 |     private:
42 |         std::vector<SbInst*> _instList;
43 | };
44 | 
45 | 
46 | 
47 | 
48 | 
49 | 
50 | }
51 | 
52 | #endif
53 | 


--------------------------------------------------------------------------------
/softbrain-config/src/insts/Abs16x4.h:
--------------------------------------------------------------------------------
 1 | int16_t a0 = (ops[0]&0x000000000000FFFF)>>0;
 2 | int16_t a1 = (ops[0]&0x00000000FFFF0000)>>16;
 3 | int16_t a2 = (ops[0]&0x0000FFFF00000000)>>32;
 4 | int16_t a3 = (ops[0]&0xFFFF000000000000)>>48;
 5 | a0 = a0 >= 0 ? a0 : -a0;
 6 | a1 = a1 >= 0 ? a1 : -a1;
 7 | a2 = a2 >= 0 ? a2 : -a2;
 8 | a3 = a3 >= 0 ? a3 : -a3;
 9 | uint64_t c0 = (uint64_t)(a0)<<0;
10 | uint64_t c1 = (uint64_t)(a1)<<16;
11 | uint64_t c2 = (uint64_t)(a2)<<32;
12 | uint64_t c3 = (uint64_t)(a3)<<48;
13 | return c0 | c1 | c2 | c3;
14 | 


--------------------------------------------------------------------------------
/softbrain-config/src/insts/Acc16x4.h:
--------------------------------------------------------------------------------
 1 | uint16_t a0 = (ops[0]&0x000000000000FFFF)>>0;
 2 | uint16_t a1 = (ops[0]&0x00000000FFFF0000)>>16;
 3 | uint16_t a2 = (ops[0]&0x0000FFFF00000000)>>32;
 4 | uint16_t a3 = (ops[0]&0xFFFF000000000000)>>48;
 5 | uint16_t b0 = (accum&0x000000000000FFFF)>>0;
 6 | uint16_t b1 = (accum&0x00000000FFFF0000)>>16;
 7 | uint16_t b2 = (accum&0x0000FFFF00000000)>>32;
 8 | uint16_t b3 = (accum&0xFFFF000000000000)>>48;
 9 | a0+=b0;
10 | a1+=b1;
11 | a2+=b2;
12 | a3+=b3;
13 | uint64_t c0 = (uint64_t)(a0)<<0;
14 | uint64_t c1 = (uint64_t)(a1)<<16;
15 | uint64_t c2 = (uint64_t)(a2)<<32;
16 | uint64_t c3 = (uint64_t)(a3)<<48;
17 | 
18 | accum = c0 | c1 | c2 | c3;
19 | 
20 | uint64_t ret = accum;
21 | 
22 | if(ops[1]) {
23 |   accum=0;
24 | }
25 | 
26 | return ret; 
27 | 


--------------------------------------------------------------------------------
/softbrain-config/src/insts/Acc64.h:
--------------------------------------------------------------------------------
 1 | accum+=ops[0];
 2 | 
 3 | uint64_t ret = accum;
 4 | 
 5 | if(ops[1]) {
 6 |   accum=0;
 7 | }
 8 | 
 9 | return ret; 
10 | 


--------------------------------------------------------------------------------
/softbrain-config/src/insts/Add16x4.h:
--------------------------------------------------------------------------------
 1 | uint16_t a0 = (ops[0]&0x000000000000FFFF)>>0;
 2 | uint16_t a1 = (ops[0]&0x00000000FFFF0000)>>16;
 3 | uint16_t a2 = (ops[0]&0x0000FFFF00000000)>>32;
 4 | uint16_t a3 = (ops[0]&0xFFFF000000000000)>>48;
 5 | uint16_t b0 = (ops[1]&0x000000000000FFFF)>>0;
 6 | uint16_t b1 = (ops[1]&0x00000000FFFF0000)>>16;
 7 | uint16_t b2 = (ops[1]&0x0000FFFF00000000)>>32;
 8 | uint16_t b3 = (ops[1]&0xFFFF000000000000)>>48;
 9 | a0+=b0;
10 | a1+=b1;
11 | a2+=b2;
12 | a3+=b3;
13 | uint64_t c0 = (uint64_t)(a0)<<0;
14 | uint64_t c1 = (uint64_t)(a1)<<16;
15 | uint64_t c2 = (uint64_t)(a2)<<32;
16 | uint64_t c3 = (uint64_t)(a3)<<48;
17 | return c0 | c1 | c2 | c3;
18 | //return (uint64_t) _mm_adds_pu16((__m64)ops[0], (__m64)ops[1]);
19 | 


--------------------------------------------------------------------------------
/softbrain-config/src/insts/Add32x2.h:
--------------------------------------------------------------------------------
 1 | uint32_t a0 = (ops[0]&0x00000000FFFFFFFF)>>0;
 2 | uint32_t a1 = (ops[0]&0xFFFFFFFF00000000)>>32;
 3 | uint32_t b0 = (ops[1]&0x00000000FFFFFFFF)>>0;
 4 | uint32_t b1 = (ops[1]&0xFFFFFFFF00000000)>>32;
 5 | a0+=b0;
 6 | a1+=b1;
 7 | uint64_t c0 = (uint64_t)(a0)<<0;
 8 | uint64_t c1 = (uint64_t)(a1)<<32;
 9 | return c0 | c1;
10 | //return (uint64_t) _mm_adds_pu16((__m64)ops[0], (__m64)ops[1]);
11 | 


--------------------------------------------------------------------------------
/softbrain-config/src/insts/Add64.h:
--------------------------------------------------------------------------------
1 | return ops[0] + ops[1]; 
2 | 


--------------------------------------------------------------------------------
/softbrain-config/src/insts/And.h:
--------------------------------------------------------------------------------
1 | return ops[0] & ops[1]; 
2 | 


--------------------------------------------------------------------------------
/softbrain-config/src/insts/Copy.h:
--------------------------------------------------------------------------------
1 | return ops[0];
2 | 


--------------------------------------------------------------------------------
/softbrain-config/src/insts/Div16x4.h:
--------------------------------------------------------------------------------
 1 | uint16_t a0 = (ops[0]&0x000000000000FFFF)>>0;
 2 | uint16_t a1 = (ops[0]&0x00000000FFFF0000)>>16;
 3 | uint16_t a2 = (ops[0]&0x0000FFFF00000000)>>32;
 4 | uint16_t a3 = (ops[0]&0xFFFF000000000000)>>48;
 5 | uint16_t b0 = (ops[1]&0x000000000000FFFF)>>0;
 6 | uint16_t b1 = (ops[1]&0x00000000FFFF0000)>>16;
 7 | uint16_t b2 = (ops[1]&0x0000FFFF00000000)>>32;
 8 | uint16_t b3 = (ops[1]&0xFFFF000000000000)>>48;
 9 | a0/=b0;
10 | a1/=b1;
11 | a2/=b2;
12 | a3/=b3;
13 | uint64_t c0 = (uint64_t)(a0)<<0;
14 | uint64_t c1 = (uint64_t)(a1)<<16;
15 | uint64_t c2 = (uint64_t)(a2)<<32;
16 | uint64_t c3 = (uint64_t)(a3)<<48;
17 | return c0 | c1 | c2 | c3;
18 | 


--------------------------------------------------------------------------------
/softbrain-config/src/insts/FAdd32x2.h:
--------------------------------------------------------------------------------
 1 | uint32_t t_a0 = (ops[0]&0x00000000FFFFFFFF)>>0;
 2 | uint32_t t_a1 = (ops[0]&0xFFFFFFFF00000000)>>32;
 3 | uint32_t t_b0 = (ops[1]&0x00000000FFFFFFFF)>>0;
 4 | uint32_t t_b1 = (ops[1]&0xFFFFFFFF00000000)>>32;
 5 | 
 6 | float a0=as_float(t_a0);
 7 | float a1=as_float(t_a1);
 8 | float b0=as_float(t_b0);
 9 | float b1=as_float(t_b1);
10 | 
11 | a0+=b0;
12 | a1+=b1;
13 | 
14 | uint64_t c0 = (uint64_t)(as_uint32(a0))<<0;
15 | uint64_t c1 = (uint64_t)(as_uint32(a1))<<32;
16 | return c0 | c1;
17 | //return (uint64_t) _mm_adds_pu16((__m64)ops[0], (__m64)ops[1]);
18 | 


--------------------------------------------------------------------------------
/softbrain-config/src/insts/FAdd64.h:
--------------------------------------------------------------------------------
1 | double a = as_double(ops[0]);
2 | double b = as_double(ops[1]);
3 | double c = a+b;
4 | return as_uint64(c); 
5 | 


--------------------------------------------------------------------------------
/softbrain-config/src/insts/FMul32x2.h:
--------------------------------------------------------------------------------
 1 | uint32_t t_a0 = (ops[0]&0x00000000FFFFFFFF)>>0;
 2 | uint32_t t_a1 = (ops[0]&0xFFFFFFFF00000000)>>32;
 3 | uint32_t t_b0 = (ops[1]&0x00000000FFFFFFFF)>>0;
 4 | uint32_t t_b1 = (ops[1]&0xFFFFFFFF00000000)>>32;
 5 | 
 6 | float a0=as_float(t_a0);
 7 | float a1=as_float(t_a1);
 8 | float b0=as_float(t_b0);
 9 | float b1=as_float(t_b1);
10 | 
11 | a0*=b0;
12 | a1*=b1;
13 | 
14 | uint64_t c0 = (uint64_t)(as_uint32(a0))<<0;
15 | uint64_t c1 = (uint64_t)(as_uint32(a1))<<32;
16 | return c0 | c1;
17 | 
18 | //return (uint64_t) _mm_mullo_pi32((__m64)ops[0], (__m64)ops[1]);  -- mullo_pi32 doesnt exisit in mm intrinsics
19 | 


--------------------------------------------------------------------------------
/softbrain-config/src/insts/FMul64.h:
--------------------------------------------------------------------------------
1 | double a = as_double(ops[0]);
2 | double b = as_double(ops[1]);
3 | double c = a*b;
4 | return as_uint64(c); 
5 | 


--------------------------------------------------------------------------------
/softbrain-config/src/insts/FRed32x2.h:
--------------------------------------------------------------------------------
 1 | uint32_t t_r0 = (ops[0]&0x00000000FFFFFFFF)>>0;
 2 | uint32_t t_r1 = (ops[0]&0xFFFFFFFF00000000)>>32;
 3 | 
 4 | float r0=as_float(t_r0);
 5 | float r1=as_float(t_r1);
 6 | 
 7 | float result;
 8 | if(ops.size() > 1) { //additional op is acc
 9 |   result = r0 + r1 + as_float((uint32_t)ops[1]);
10 | } else {
11 |   result = r0 + r1;
12 | }
13 | return (uint64_t)(as_uint32(result));
14 | 
15 | 


--------------------------------------------------------------------------------
/softbrain-config/src/insts/FxAdd16x4.h:
--------------------------------------------------------------------------------
 1 | uint16_t a0 = (ops[0]&0x000000000000FFFF)>>0;
 2 | uint16_t a1 = (ops[0]&0x00000000FFFF0000)>>16;
 3 | uint16_t a2 = (ops[0]&0x0000FFFF00000000)>>32;
 4 | uint16_t a3 = (ops[0]&0xFFFF000000000000)>>48;
 5 | uint16_t b0 = (ops[1]&0x000000000000FFFF)>>0;
 6 | uint16_t b1 = (ops[1]&0x00000000FFFF0000)>>16;
 7 | uint16_t b2 = (ops[1]&0x0000FFFF00000000)>>32;
 8 | uint16_t b3 = (ops[1]&0xFFFF000000000000)>>48;
 9 | 
10 | uint16_t sum = a0 + b0;
11 | 
12 | if (!((a0 ^ b0) & 0x8000) && ((a0 ^ sum) & 0x8000) && !(a0 & 0x8000))
13 |   a0 = 0x7FFF;
14 | else if (!((a0 ^ b0) & 0x8000) && ((a0 ^ sum) & 0x8000) && (a0 & 0x8000))
15 |   a0 = 0x8001;
16 | else
17 |   a0 = sum;
18 | 
19 | sum = a1 + b1;
20 | 
21 | if (!((a1 ^ b1) & 0x8000) && ((a1 ^ sum) & 0x8000) && !(a1 & 0x8000))
22 |   a1 = 0x7FFF;
23 | else if (!((a1 ^ b1) & 0x8000) && ((a1 ^ sum) & 0x8000) && (a1 & 0x8000))
24 |   a1 = 0x8001;
25 | else
26 |   a1 = sum;
27 | 
28 | sum = a2 + b2;
29 | 
30 | if (!((a2 ^ b2) & 0x8000) && ((a2 ^ sum) & 0x8000) && !(a2 & 0x8000))
31 |   a2 = 0x7FFF;
32 | else if (!((a2 ^ b2) & 0x8000) && ((a2 ^ sum) & 0x8000) && (a2 & 0x8000))
33 |   a2 = 0x8001;
34 | else
35 |   a2 = sum;
36 | 
37 | sum = a3 + b3;
38 | 
39 | if (!((a3 ^ b3) & 0x8000) && ((a3 ^ sum) & 0x8000) && !(a3 & 0x8000))
40 |   a3 = 0x7FFF;
41 | else if (!((a3 ^ b3) & 0x8000) && ((a3 ^ sum) & 0x8000) && (a3 & 0x8000))
42 |   a3 = 0x8001;
43 | else
44 |   a3 = sum;
45 | 
46 | uint64_t c0 = (uint64_t)(a0)<<0;
47 | uint64_t c1 = (uint64_t)(a1)<<16;
48 | uint64_t c2 = (uint64_t)(a2)<<32;
49 | uint64_t c3 = (uint64_t)(a3)<<48;
50 | return c0 | c1 | c2 | c3;
51 | 


--------------------------------------------------------------------------------
/softbrain-config/src/insts/FxAdd32x2.h:
--------------------------------------------------------------------------------
 1 | uint32_t a0 = (ops[0]&0x00000000FFFFFFFF)>>0;
 2 | uint32_t a1 = (ops[0]&0xFFFFFFFF00000000)>>32;
 3 | uint32_t b0 = (ops[1]&0x00000000FFFFFFFF)>>0;
 4 | uint32_t b1 = (ops[1]&0xFFFFFFFF00000000)>>32;
 5 | 
 6 | uint32_t sum;
 7 | 
 8 | sum = a0 + b0;
 9 | if (!((a0 ^ b0) & 0x80000000) && ((a0 ^ sum) & 0x80000000) && !(a0 & 0x80000000))
10 |   a0 = 0x7FFFFFFF;
11 | else if (!((a0 ^ b0) & 0x80000000) && ((a0 ^ sum) & 0x80000000) && (a0 & 0x80000000))
12 |   a0 = 0x80000001;
13 | else
14 |   a0 = sum;
15 | 
16 | sum = a1 + b1;
17 | if (!((a1 ^ b1) & 0x80000000) && ((a1 ^ sum) & 0x80000000) && !(a1 & 0x80000000))
18 |   a1 = 0x7FFFFFFF;
19 | else if (!((a1 ^ b1) & 0x80000000) && ((a1 ^ sum) & 0x80000000) && (a1 & 0x80000000))
20 |   a1 = 0x80000001;
21 | else
22 |   a1 = sum;
23 | 
24 | uint64_t c0 = ((uint64_t)(a0))<<0;
25 | uint64_t c1 = ((uint64_t)(a1))<<32;
26 | return c0 | c1;
27 | 


--------------------------------------------------------------------------------
/softbrain-config/src/insts/FxExp16x4.h:
--------------------------------------------------------------------------------
 1 | int16_t a0 = (ops[0]&0x000000000000FFFF)>>0;
 2 | int16_t a1 = (ops[0]&0x00000000FFFF0000)>>16;
 3 | int16_t a2 = (ops[0]&0x0000FFFF00000000)>>32;
 4 | int16_t a3 = (ops[0]&0xFFFF000000000000)>>48;
 5 | 
 6 | if ((ops.size() > 1) && (ops[1] == 0))
 7 |     return ops[0];
 8 | 
 9 | double d0 = FIX_TO_DOUBLE(a0);
10 | double d1 = FIX_TO_DOUBLE(a1);
11 | double d2 = FIX_TO_DOUBLE(a2);
12 | double d3 = FIX_TO_DOUBLE(a3);
13 | 
14 | d0 = exp(d0);
15 | d1 = exp(d1);
16 | d2 = exp(d2);
17 | d3 = exp(d3);
18 | 
19 | int16_t b0 = DOUBLE_TO_FIX(d0);
20 | int16_t b1 = DOUBLE_TO_FIX(d1);
21 | int16_t b2 = DOUBLE_TO_FIX(d2);
22 | int16_t b3 = DOUBLE_TO_FIX(d3);
23 | 
24 | uint64_t c0 = ((uint64_t)(b0)<<0)&0x000000000000FFFF;
25 | uint64_t c1 = ((uint64_t)(b1)<<16)&0x00000000FFFF0000;
26 | uint64_t c2 = ((uint64_t)(b2)<<32)&0x0000FFFF00000000;
27 | uint64_t c3 = ((uint64_t)(b3)<<48)&0xFFFF000000000000;
28 | 
29 | return c0 | c1 | c2 | c3;
30 | 


--------------------------------------------------------------------------------
/softbrain-config/src/insts/FxMul16x4.h:
--------------------------------------------------------------------------------
 1 | int16_t a0 = (ops[0]&0x000000000000FFFF)>>0;
 2 | int16_t a1 = (ops[0]&0x00000000FFFF0000)>>16;
 3 | int16_t a2 = (ops[0]&0x0000FFFF00000000)>>32;
 4 | int16_t a3 = (ops[0]&0xFFFF000000000000)>>48;
 5 | int16_t b0 = (ops[1]&0x000000000000FFFF)>>0;
 6 | int16_t b1 = (ops[1]&0x00000000FFFF0000)>>16;
 7 | int16_t b2 = (ops[1]&0x0000FFFF00000000)>>32;
 8 | int16_t b3 = (ops[1]&0xFFFF000000000000)>>48;
 9 | 
10 | int32_t im0 = ((int32_t)a0 * (int32_t)b0) >> 11;
11 | int16_t m0 = im0 > (int16_t)0x7FFF ? (int16_t)0x7FFF : (im0 < (int32_t)0xFFFF8001 ? (int32_t)0xFFFF8001 : im0);
12 | 
13 | int32_t im1 = ((int32_t)a1 * (int32_t)b1) >> 11;
14 | int16_t m1 = im1 > 0x7FFF ? 0x7FFF : (im1 < (int32_t)(int16_t)0xFFFF8001 ? (int32_t)(int16_t)0xFFFF8001 : im1);
15 | 
16 | int32_t im2 = ((int32_t)a2 * (int32_t)b2) >> 11;
17 | int16_t m2 = im2 > (int16_t)0x7FFF ? (int16_t)0x7FFF : (im2 < (int32_t)0xFFFF8001 ? (int32_t)0xFFFF8001 : im2);
18 | 
19 | int32_t im3 = ((int32_t)a3 * (int32_t)b3) >> 11;
20 | int16_t m3 = im3 > (int16_t)0x7FFF ? (int16_t)0x7FFF : (im3 < (int32_t)0xFFFF8001 ? (int32_t)0xFFFF8001 : im3);
21 | 
22 | uint64_t c0 = ((uint64_t)(m0)<<0)&0x000000000000FFFF;
23 | uint64_t c1 = ((uint64_t)(m1)<<16)&0x00000000FFFF0000;
24 | uint64_t c2 = ((uint64_t)(m2)<<32)&0x0000FFFF00000000;
25 | uint64_t c3 = ((uint64_t)(m3)<<48)&0xFFFF000000000000;
26 | 
27 | return c0 | c1 | c2 | c3;
28 | 


--------------------------------------------------------------------------------
/softbrain-config/src/insts/FxMul32x2.h:
--------------------------------------------------------------------------------
 1 | int32_t a0 = (ops[0]&0x00000000FFFFFFFF)>>0;
 2 | int32_t a1 = (ops[0]&0xFFFFFFFF00000000)>>32;
 3 | int32_t b0 = (ops[1]&0x00000000FFFFFFFF)>>0;
 4 | int32_t b1 = (ops[1]&0xFFFFFFFF00000000)>>32;
 5 | 
 6 | int64_t im0 = ((int64_t)a0 * (int64_t)b0) >> 14; // 14 fractional bits 
 7 | int32_t m0 = im0 > (int64_t)0x000000007FFFFFFF ? (int32_t)0x7FFFFFFF : (im0 < (int64_t)0xFFFFFFFF80000001 ? (int32_t)0x80000001 : im0);
 8 | 
 9 | int64_t im1 = ((int64_t)a1 * (int64_t)b1) >> 14;
10 | int32_t m1 = im1 > (int64_t)0x000000007FFFFFFF ? (int32_t)0x7FFFFFFF : (im1 < (int64_t)0xFFFFFFFF80000001 ? (int32_t)0x80000001 : im1);
11 | 
12 | uint64_t c0 = ((uint64_t)(m0)<<0)&0x00000000FFFFFFFF;
13 | uint64_t c1 = ((uint64_t)(m1)<<32)&0xFFFFFFFF00000000;
14 | 
15 | return c0 | c1;
16 | 


--------------------------------------------------------------------------------
/softbrain-config/src/insts/FxRed16x4.h:
--------------------------------------------------------------------------------
 1 | uint16_t r0 = (ops[0]&0x000000000000FFFF)>>0;
 2 | uint16_t r1 = (ops[0]&0x00000000FFFF0000)>>16;
 3 | uint16_t r2 = (ops[0]&0x0000FFFF00000000)>>32;
 4 | uint16_t r3 = (ops[0]&0xFFFF000000000000)>>48;
 5 | 
 6 | uint16_t sum;
 7 | 
 8 | uint16_t sum0 = r0 + r1;
 9 | if (!((r0 ^ r1) & 0x8000) && ((r0 ^ sum0) & 0x8000) && !(r0 & 0x8000))
10 |   sum0 = 0x7FFF;
11 | else if (!((r0 ^ r1) & 0x8000) && ((r0 ^ sum0) & 0x8000) && (r0 & 0x8000))
12 |   sum0 = 0x8001;
13 | 
14 | uint16_t sum1 = r2 + r3;
15 | if (!((r2 ^ r3) & 0x8000) && ((r2 ^ sum1) & 0x8000) && !(r2 & 0x8000))
16 |   sum1 = 0x7FFF;
17 | else if (!((r2 ^ r3) & 0x8000) && ((r2 ^ sum1) & 0x8000) && (r2 & 0x8000))
18 |   sum1 = 0x8001;
19 | 
20 | uint16_t sum2 = sum0 + sum1;
21 | if (!((sum0 ^ sum1) & 0x8000) && ((sum0 ^ sum2) & 0x8000) && !(sum0 & 0x8000))
22 |   sum2 = 0x7FFF;
23 | else if (!((sum0 ^ sum1) & 0x8000) && ((sum0 ^ sum2) & 0x8000) && (sum0 & 0x8000))
24 |   sum2 = 0x8001;
25 | 
26 | if(ops.size() > 1) { //additional op is acc
27 |   sum = sum2 + (uint16_t)ops[1];
28 |   if (!((sum2 ^ (uint16_t)ops[1]) & 0x8000) && ((sum2 ^ sum) & 0x8000) && !(sum2 & 0x8000))
29 |     sum = 0x7FFF;
30 |   else if (!((sum2 ^ (uint16_t)ops[1]) & 0x8000) && ((sum2 ^ sum) & 0x8000) && (sum2 & 0x8000))
31 |     sum = 0x8001;
32 | } else {
33 |   sum = sum2;
34 | }
35 | 
36 | return sum;
37 | 


--------------------------------------------------------------------------------
/softbrain-config/src/insts/FxRed32x2.h:
--------------------------------------------------------------------------------
 1 | uint32_t r0 = (ops[0]&0x00000000FFFFFFFF)>>0;
 2 | uint32_t r1 = (ops[0]&0xFFFFFFFF00000000)>>32;
 3 | 
 4 | uint32_t sum;
 5 | 
 6 | uint32_t sum0 = r0 + r1;
 7 | if (!((r0 ^ r1) & 0x80000000) && ((r0 ^ sum0) & 0x80000000) && !(r0 & 0x80000000))
 8 |   sum0 = 0x7FFFFFFF;
 9 | else if (!((r0 ^ r1) & 0x80000000) && ((r0 ^ sum0) & 0x80000000) && (r0 & 0x80000000))
10 |   sum0 = 0x80000001;
11 | 
12 | if(ops.size() > 1) { //additional op is acc
13 |   sum = sum0 + (uint32_t)ops[1];
14 |   if (!((sum0 ^ (uint32_t)ops[1]) & 0x80000000) && ((sum0 ^ sum) & 0x80000000) && !(sum0 & 0x80000000))
15 |     sum = 0x7FFFFFFF;
16 |   else if (!((sum0 ^ (uint32_t)ops[1]) & 0x80000000) && ((sum0 ^ sum) & 0x80000000) && (sum0 & 0x80000000))
17 |     sum = 0x80000001;
18 | } else {
19 |   sum = sum0;
20 | }
21 | 
22 | return sum;
23 | 


--------------------------------------------------------------------------------
/softbrain-config/src/insts/FxRelu16x4.h:
--------------------------------------------------------------------------------
 1 | uint16_t i1 = (ops[0]&0x000000000000FFFF)>>0;
 2 | uint16_t i2 = (ops[0]&0x00000000FFFF0000)>>16;
 3 | uint16_t i3 = (ops[0]&0x0000FFFF00000000)>>32;
 4 | uint16_t i4 = (ops[0]&0xFFFF000000000000)>>48;
 5 | 
 6 | if ((ops.size() > 1) && (ops[1] == 0))
 7 |     return ops[0];
 8 | 
 9 | if (i1 & 0x8000)
10 |   i1 = 0;
11 | 
12 | if (i2 & 0x8000)
13 |   i2 = 0;
14 | 
15 | if (i3 & 0x8000)
16 |   i3 = 0;
17 | 
18 | if (i4 & 0x8000)
19 |   i4 = 0;
20 | 
21 | uint64_t o1 = (uint64_t)(i1)<<0;
22 | uint64_t o2 = (uint64_t)(i2)<<16;
23 | uint64_t o3 = (uint64_t)(i3)<<32;
24 | uint64_t o4 = (uint64_t)(i4)<<48;
25 | 
26 | return o1 | o2 | o3 | o4;
27 | 


--------------------------------------------------------------------------------
/softbrain-config/src/insts/FxSig16x4.h:
--------------------------------------------------------------------------------
 1 | int16_t a0 = (ops[0]&0x000000000000FFFF)>>0;
 2 | int16_t a1 = (ops[0]&0x00000000FFFF0000)>>16;
 3 | int16_t a2 = (ops[0]&0x0000FFFF00000000)>>32;
 4 | int16_t a3 = (ops[0]&0xFFFF000000000000)>>48;
 5 | 
 6 | if ((ops.size() > 1) && (ops[1] == 0))
 7 |     return ops[0];
 8 | 
 9 | double d0 = FIX_TO_DOUBLE(a0);
10 | double d1 = FIX_TO_DOUBLE(a1);
11 | double d2 = FIX_TO_DOUBLE(a2);
12 | double d3 = FIX_TO_DOUBLE(a3);
13 | 
14 | d0 = 1 / (1 + exp(-d0));
15 | d1 = 1 / (1 + exp(-d1));
16 | d2 = 1 / (1 + exp(-d2));
17 | d3 = 1 / (1 + exp(-d3));
18 | 
19 | int16_t b0 = DOUBLE_TO_FIX(d0);
20 | int16_t b1 = DOUBLE_TO_FIX(d1);
21 | int16_t b2 = DOUBLE_TO_FIX(d2);
22 | int16_t b3 = DOUBLE_TO_FIX(d3);
23 | 
24 | uint64_t c0 = ((uint64_t)(b0)<<0)&0x000000000000FFFF;
25 | uint64_t c1 = ((uint64_t)(b1)<<16)&0x00000000FFFF0000;
26 | uint64_t c2 = ((uint64_t)(b2)<<32)&0x0000FFFF00000000;
27 | uint64_t c3 = ((uint64_t)(b3)<<48)&0xFFFF000000000000;
28 | 
29 | return c0 | c1 | c2 | c3;
30 | 


--------------------------------------------------------------------------------
/softbrain-config/src/insts/FxTanh16x4.h:
--------------------------------------------------------------------------------
 1 | int16_t a0 = (ops[0]&0x000000000000FFFF)>>0;
 2 | int16_t a1 = (ops[0]&0x00000000FFFF0000)>>16;
 3 | int16_t a2 = (ops[0]&0x0000FFFF00000000)>>32;
 4 | int16_t a3 = (ops[0]&0xFFFF000000000000)>>48;
 5 | 
 6 | if ((ops.size() > 1) && (ops[1] == 0))
 7 |     return ops[0];
 8 | 
 9 | double d0 = FIX_TO_DOUBLE(a0);
10 | double d1 = FIX_TO_DOUBLE(a1);
11 | double d2 = FIX_TO_DOUBLE(a2);
12 | double d3 = FIX_TO_DOUBLE(a3);
13 | 
14 | d0 = tanh(d0);
15 | d1 = tanh(d1);
16 | d2 = tanh(d2);
17 | d3 = tanh(d3);
18 | 
19 | int16_t b0 = DOUBLE_TO_FIX(d0);
20 | int16_t b1 = DOUBLE_TO_FIX(d1);
21 | int16_t b2 = DOUBLE_TO_FIX(d2);
22 | int16_t b3 = DOUBLE_TO_FIX(d3);
23 | 
24 | uint64_t c0 = ((uint64_t)(b0)<<0)&0x000000000000FFFF;
25 | uint64_t c1 = ((uint64_t)(b1)<<16)&0x00000000FFFF0000;
26 | uint64_t c2 = ((uint64_t)(b2)<<32)&0x0000FFFF00000000;
27 | uint64_t c3 = ((uint64_t)(b3)<<48)&0xFFFF000000000000;
28 | 
29 | return c0 | c1 | c2 | c3;
30 | 


--------------------------------------------------------------------------------
/softbrain-config/src/insts/HAdd16x4.h:
--------------------------------------------------------------------------------
 1 | uint16_t a0 = (ops[0]&0xFFFF000000000000)>>48;
 2 | uint16_t a1 = (ops[0]&0x0000FFFF00000000)>>32;
 3 | uint16_t a2 = (ops[0]&0x00000000FFFF0000)>>16;
 4 | uint16_t a3 = (ops[0]&0x000000000000FFFF)>>0;
 5 | 
 6 | uint16_t b0 = (ops[1]&0xFFFF000000000000)>>48;
 7 | //uint16_t b1 = (ops[1]&0x0000FFFF00000000)>>32;
 8 | //uint16_t b2 = (ops[1]&0x00000000FFFF0000)>>16;
 9 | //uint16_t b3 = (ops[1]&0x000000000000FFFF)>>0;
10 | 
11 | uint64_t c0 = (uint64_t)(a0+a1)<<48;
12 | uint64_t c1 = (uint64_t)(a1+a2)<<32;
13 | uint64_t c2 = (uint64_t)(a2+a3)<<16;
14 | uint64_t c3 = (uint64_t)(a3+b0)<<0;
15 | 
16 | return c0 | c1 | c2 | c3;
17 | //return (uint64_t) _mm_adds_pu16((__m64)ops[0], (__m64)ops[1]);
18 | 


--------------------------------------------------------------------------------
/softbrain-config/src/insts/ICmpEQ.h:
--------------------------------------------------------------------------------
1 | return ops[0] == ops[1]; 
2 | 


--------------------------------------------------------------------------------
/softbrain-config/src/insts/LShf64.h:
--------------------------------------------------------------------------------
1 | if(ops[1]==64) {
2 |   return 0;
3 | }
4 | return ops[0] << ops[1];
5 | 


--------------------------------------------------------------------------------
/softbrain-config/src/insts/Max16x4.h:
--------------------------------------------------------------------------------
 1 | uint16_t a0 = (ops[0]&0x000000000000FFFF)>>0;
 2 | uint16_t a1 = (ops[0]&0x00000000FFFF0000)>>16;
 3 | uint16_t a2 = (ops[0]&0x0000FFFF00000000)>>32;
 4 | uint16_t a3 = (ops[0]&0xFFFF000000000000)>>48;
 5 | uint16_t b0 = (ops[1]&0x000000000000FFFF)>>0;
 6 | uint16_t b1 = (ops[1]&0x00000000FFFF0000)>>16;
 7 | uint16_t b2 = (ops[1]&0x0000FFFF00000000)>>32;
 8 | uint16_t b3 = (ops[1]&0xFFFF000000000000)>>48;
 9 | uint16_t t0 = a0 >= b0 ? a0 : b0;
10 | uint16_t t1 = a1 >= b1 ? a1 : b1;
11 | uint16_t t2 = a2 >= b2 ? a2 : b2;
12 | uint16_t t3 = a3 >= b3 ? a3 : b3;
13 | uint64_t c0 = (uint64_t)(t0)<<0;
14 | uint64_t c1 = (uint64_t)(t1)<<16;
15 | uint64_t c2 = (uint64_t)(t2)<<32;
16 | uint64_t c3 = (uint64_t)(t3)<<48;
17 | return c0 | c1 | c2 | c3;
18 | 


--------------------------------------------------------------------------------
/softbrain-config/src/insts/Min16x4.h:
--------------------------------------------------------------------------------
 1 | uint16_t a0 = (ops[0]&0x000000000000FFFF)>>0;
 2 | uint16_t a1 = (ops[0]&0x00000000FFFF0000)>>16;
 3 | uint16_t a2 = (ops[0]&0x0000FFFF00000000)>>32;
 4 | uint16_t a3 = (ops[0]&0xFFFF000000000000)>>48;
 5 | uint16_t b0 = (ops[1]&0x000000000000FFFF)>>0;
 6 | uint16_t b1 = (ops[1]&0x00000000FFFF0000)>>16;
 7 | uint16_t b2 = (ops[1]&0x0000FFFF00000000)>>32;
 8 | uint16_t b3 = (ops[1]&0xFFFF000000000000)>>48;
 9 | uint16_t t0 = a0 <= b0 ? a0 : b0;
10 | uint16_t t1 = a1 <= b1 ? a1 : b1;
11 | uint16_t t2 = a2 <= b2 ? a2 : b2;
12 | uint16_t t3 = a3 <= b3 ? a3 : b3;
13 | uint64_t c0 = (uint64_t)(t0)<<0;
14 | uint64_t c1 = (uint64_t)(t1)<<16;
15 | uint64_t c2 = (uint64_t)(t2)<<32;
16 | uint64_t c3 = (uint64_t)(t3)<<48;
17 | return c0 | c1 | c2 | c3;
18 | 


--------------------------------------------------------------------------------
/softbrain-config/src/insts/Mul16x4.h:
--------------------------------------------------------------------------------
 1 | uint16_t a0 = (ops[0]&0x000000000000FFFF)>>0;
 2 | uint16_t a1 = (ops[0]&0x00000000FFFF0000)>>16;
 3 | uint16_t a2 = (ops[0]&0x0000FFFF00000000)>>32;
 4 | uint16_t a3 = (ops[0]&0xFFFF000000000000)>>48;
 5 | uint16_t b0 = (ops[1]&0x000000000000FFFF)>>0;
 6 | uint16_t b1 = (ops[1]&0x00000000FFFF0000)>>16;
 7 | uint16_t b2 = (ops[1]&0x0000FFFF00000000)>>32;
 8 | uint16_t b3 = (ops[1]&0xFFFF000000000000)>>48;
 9 | a0*=b0;
10 | a1*=b1;
11 | a2*=b2;
12 | a3*=b3;
13 | uint64_t c0 = (uint64_t)(a0)<<0;
14 | uint64_t c1 = (uint64_t)(a1)<<16;
15 | uint64_t c2 = (uint64_t)(a2)<<32;
16 | uint64_t c3 = (uint64_t)(a3)<<48;
17 | return c0 | c1 | c2 | c3;
18 | //return (uint64_t) _mm_mullo_pi16((__m64)ops[0], (__m64)ops[1]);
19 | 


--------------------------------------------------------------------------------
/softbrain-config/src/insts/Mul32x2.h:
--------------------------------------------------------------------------------
 1 | uint32_t a0 = (ops[0]&0x00000000FFFFFFFF)>>0;
 2 | uint32_t a1 = (ops[0]&0xFFFFFFFF00000000)>>32;
 3 | uint32_t b0 = (ops[1]&0x00000000FFFFFFFF)>>0;
 4 | uint32_t b1 = (ops[1]&0xFFFFFFFF00000000)>>32;
 5 | 
 6 | a0*=b0;
 7 | a1*=b1;
 8 | 
 9 | uint64_t c0 = (uint64_t)(a0)<<0;
10 | uint64_t c1 = (uint64_t)(a1)<<32;
11 | return c0 | c1;
12 | 
13 | //return (uint64_t) _mm_mullo_pi32((__m64)ops[0], (__m64)ops[1]);  -- mullo_pi32 doesnt exisit in mm intrinsics
14 | 


--------------------------------------------------------------------------------
/softbrain-config/src/insts/Mul64.h:
--------------------------------------------------------------------------------
1 | return ops[0] * ops[1]; 
2 | 


--------------------------------------------------------------------------------
/softbrain-config/src/insts/Or.h:
--------------------------------------------------------------------------------
1 | return ops[0] | ops[1]; 
2 | 


--------------------------------------------------------------------------------
/softbrain-config/src/insts/RShf16x4.h:
--------------------------------------------------------------------------------
 1 | uint16_t a0 = (ops[0]&0x00000000FFFFFFFF)>>0;
 2 | uint16_t a1 = (ops[0]&0xFFFFFFFF00000000)>>32;
 3 | 
 4 | uint64_t b = ops[1];
 5 | if(ops.size()==1) {
 6 |   b = 2;
 7 | }
 8 | uint64_t c0 = (uint64_t)(a0>>b)<<0;
 9 | uint64_t c1 = (uint64_t)(a1>>b)<<32;
10 | return c0 | c1;
11 | //return (uint64_t) _mm_adds_pu16((__m64)ops[0], (__m64)ops[1]);
12 | 


--------------------------------------------------------------------------------
/softbrain-config/src/insts/RShf2_16x4.h:
--------------------------------------------------------------------------------
 1 | uint16_t a0 = (ops[0]&0x000000000000FFFF)>>0;
 2 | uint16_t a1 = (ops[0]&0x00000000FFFF0000)>>16;
 3 | uint16_t a2 = (ops[0]&0x0000FFFF00000000)>>32;
 4 | uint16_t a3 = (ops[0]&0xFFFF000000000000)>>48;
 5 | 
 6 | uint64_t b = ops[1];
 7 | if(ops.size()==1) {
 8 |   b = 2;
 9 | }
10 | uint64_t c0 = (uint64_t)(a0>>b)<<0;
11 | uint64_t c1 = (uint64_t)(a1>>b)<<16;
12 | uint64_t c2 = (uint64_t)(a2>>b)<<32;
13 | uint64_t c3 = (uint64_t)(a3>>b)<<48;
14 | return c0 | c1 | c2 | c3;
15 | //return (uint64_t) _mm_adds_pu16((__m64)ops[0], (__m64)ops[1]);
16 | 


--------------------------------------------------------------------------------
/softbrain-config/src/insts/RShf32x2.h:
--------------------------------------------------------------------------------
 1 | uint16_t a0 = (ops[0]&0x000000000000FFFF)>>0;
 2 | uint16_t a1 = (ops[0]&0x00000000FFFF0000)>>16;
 3 | uint16_t a2 = (ops[0]&0x0000FFFF00000000)>>32;
 4 | uint16_t a3 = (ops[0]&0xFFFF000000000000)>>48;
 5 | 
 6 | uint64_t b = ops[1];
 7 | if(ops.size()==1) {
 8 |   b = 2;
 9 | }
10 | uint64_t c0 = (uint64_t)(a0>>b)<<0;
11 | uint64_t c1 = (uint64_t)(a1>>b)<<16;
12 | uint64_t c2 = (uint64_t)(a2>>b)<<32;
13 | uint64_t c3 = (uint64_t)(a3>>b)<<48;
14 | return c0 | c1 | c2 | c3;
15 | //return (uint64_t) _mm_adds_pu16((__m64)ops[0], (__m64)ops[1]);
16 | 


--------------------------------------------------------------------------------
/softbrain-config/src/insts/RShf4_16x4.h:
--------------------------------------------------------------------------------
 1 | uint16_t a0 = (ops[0]&0x000000000000FFFF)>>0;
 2 | uint16_t a1 = (ops[0]&0x00000000FFFF0000)>>16;
 3 | uint16_t a2 = (ops[0]&0x0000FFFF00000000)>>32;
 4 | uint16_t a3 = (ops[0]&0xFFFF000000000000)>>48;
 5 | 
 6 | uint64_t b = ops[1];
 7 | if(ops.size()==1) {
 8 |   b = 4;
 9 | }
10 | uint64_t c0 = (uint64_t)(a0>>b)<<0;
11 | uint64_t c1 = (uint64_t)(a1>>b)<<16;
12 | uint64_t c2 = (uint64_t)(a2>>b)<<32;
13 | uint64_t c3 = (uint64_t)(a3>>b)<<48;
14 | return c0 | c1 | c2 | c3;
15 | //return (uint64_t) _mm_adds_pu16((__m64)ops[0], (__m64)ops[1]);
16 | 


--------------------------------------------------------------------------------
/softbrain-config/src/insts/RShf64.h:
--------------------------------------------------------------------------------
1 | if(ops[1]==64) {
2 |   return 0;
3 | }
4 | return ops[0] >> ops[1];
5 | 


--------------------------------------------------------------------------------
/softbrain-config/src/insts/Red16x4.h:
--------------------------------------------------------------------------------
 1 | uint16_t r0 = (ops[0]&0x000000000000FFFF)>>0;
 2 | uint16_t r1 = (ops[0]&0x00000000FFFF0000)>>16;
 3 | uint16_t r2 = (ops[0]&0x0000FFFF00000000)>>32;
 4 | uint16_t r3 = (ops[0]&0xFFFF000000000000)>>48;
 5 | 
 6 | if(ops.size() > 1) { //additional op is acc
 7 |   return (r0+r1+r2+r3+((uint16_t)ops[1]));
 8 | } 
 9 | return (r0+r1+r2+r3);
10 | 
11 | 


--------------------------------------------------------------------------------
/softbrain-config/src/insts/Red32x2.h:
--------------------------------------------------------------------------------
1 | uint32_t r0 = (ops[0]&0x00000000FFFFFFFF)>>0;
2 | uint32_t r1 = (ops[0]&0xFFFFFFFF00000000)>>32;
3 | 
4 | if(ops.size() > 1) { //additional op is acc
5 |   return (r0+r1+((uint32_t)ops[1]));
6 | } 
7 | return (r0+r1);
8 | 
9 | 


--------------------------------------------------------------------------------
/softbrain-config/src/insts/RedMax16x4.h:
--------------------------------------------------------------------------------
 1 | uint16_t r0 = (ops[0]&0x000000000000FFFF)>>0;
 2 | uint16_t r1 = (ops[0]&0x00000000FFFF0000)>>16;
 3 | uint16_t r2 = (ops[0]&0x0000FFFF00000000)>>32;
 4 | uint16_t r3 = (ops[0]&0xFFFF000000000000)>>48;
 5 | 
 6 | uint16_t x = r0;
 7 | if(r1 > x) {x=r1;}
 8 | if(r2 > x) {x=r2;}
 9 | if(r3 > x) {x=r3;}
10 | 
11 | if(ops.size() > 1) { //additional op is acc
12 |   uint16_t b = (uint16_t)ops[1];
13 |   if(b > x) {x=b;}
14 | } 
15 | return x;
16 | 
17 | 


--------------------------------------------------------------------------------
/softbrain-config/src/insts/RedMin16x4.h:
--------------------------------------------------------------------------------
 1 | uint16_t r0 = (ops[0]&0x000000000000FFFF)>>0;
 2 | uint16_t r1 = (ops[0]&0x00000000FFFF0000)>>16;
 3 | uint16_t r2 = (ops[0]&0x0000FFFF00000000)>>32;
 4 | uint16_t r3 = (ops[0]&0xFFFF000000000000)>>48;
 5 | 
 6 | uint16_t x = r0;
 7 | if(r1 < x) {x=r1;}
 8 | if(r2 < x) {x=r2;}
 9 | if(r3 < x) {x=r3;}
10 | 
11 | if(ops.size() > 1) { //additional op is acc
12 |   uint16_t b = (uint16_t)ops[1];
13 |   if(b < x) {x=b;}
14 | } 
15 | return x;
16 | 
17 | 


--------------------------------------------------------------------------------
/softbrain-config/src/insts/RedSMax16x4.h:
--------------------------------------------------------------------------------
 1 | int16_t r0 = (ops[0]&0x000000000000FFFF)>>0;
 2 | int16_t r1 = (ops[0]&0x00000000FFFF0000)>>16;
 3 | int16_t r2 = (ops[0]&0x0000FFFF00000000)>>32;
 4 | int16_t r3 = (ops[0]&0xFFFF000000000000)>>48;
 5 | 
 6 | int16_t x = r0;
 7 | if(r1 > x) {x=r1;}
 8 | if(r2 > x) {x=r2;}
 9 | if(r3 > x) {x=r3;}
10 | 
11 | if(ops.size() > 1) { //additional op is acc
12 |   int16_t b = (int16_t)ops[1];
13 |   if(b > x) {x=b;}
14 | } 
15 | return (uint64_t)x;
16 | 
17 | 


--------------------------------------------------------------------------------
/softbrain-config/src/insts/RedSMin16x4.h:
--------------------------------------------------------------------------------
 1 | int16_t r0 = (ops[0]&0x000000000000FFFF)>>0;
 2 | int16_t r1 = (ops[0]&0x00000000FFFF0000)>>16;
 3 | int16_t r2 = (ops[0]&0x0000FFFF00000000)>>32;
 4 | int16_t r3 = (ops[0]&0xFFFF000000000000)>>48;
 5 | 
 6 | int16_t x = r0;
 7 | if(r1 < x) {x=r1;}
 8 | if(r2 < x) {x=r2;}
 9 | if(r3 < x) {x=r3;}
10 | 
11 | if(ops.size() > 1) { //additional op is acc
12 |   int16_t b = (int16_t)ops[1];
13 |   if(b < x) {x=b;}
14 | } 
15 | return (uint64_t)x;
16 | 
17 | 


--------------------------------------------------------------------------------
/softbrain-config/src/insts/SMax16x4.h:
--------------------------------------------------------------------------------
 1 | int16_t a0 = (ops[0]&0x000000000000FFFF)>>0;
 2 | int16_t a1 = (ops[0]&0x00000000FFFF0000)>>16;
 3 | int16_t a2 = (ops[0]&0x0000FFFF00000000)>>32;
 4 | int16_t a3 = (ops[0]&0xFFFF000000000000)>>48;
 5 | int16_t b0 = (ops[1]&0x000000000000FFFF)>>0;
 6 | int16_t b1 = (ops[1]&0x00000000FFFF0000)>>16;
 7 | int16_t b2 = (ops[1]&0x0000FFFF00000000)>>32;
 8 | int16_t b3 = (ops[1]&0xFFFF000000000000)>>48;
 9 | int16_t t0 = a0 >= b0 ? a0 : b0;
10 | int16_t t1 = a1 >= b1 ? a1 : b1;
11 | int16_t t2 = a2 >= b2 ? a2 : b2;
12 | int16_t t3 = a3 >= b3 ? a3 : b3;
13 | uint64_t c0 = (uint64_t)(t0)<<0;
14 | uint64_t c1 = (uint64_t)(t1)<<16;
15 | uint64_t c2 = (uint64_t)(t2)<<32;
16 | uint64_t c3 = (uint64_t)(t3)<<48;
17 | return c0 | c1 | c2 | c3;
18 | 


--------------------------------------------------------------------------------
/softbrain-config/src/insts/SMin16x4.h:
--------------------------------------------------------------------------------
 1 | int16_t a0 = (ops[0]&0x000000000000FFFF)>>0;
 2 | int16_t a1 = (ops[0]&0x00000000FFFF0000)>>16;
 3 | int16_t a2 = (ops[0]&0x0000FFFF00000000)>>32;
 4 | int16_t a3 = (ops[0]&0xFFFF000000000000)>>48;
 5 | int16_t b0 = (ops[1]&0x000000000000FFFF)>>0;
 6 | int16_t b1 = (ops[1]&0x00000000FFFF0000)>>16;
 7 | int16_t b2 = (ops[1]&0x0000FFFF00000000)>>32;
 8 | int16_t b3 = (ops[1]&0xFFFF000000000000)>>48;
 9 | int16_t t0 = a0 <= b0 ? a0 : b0;
10 | int16_t t1 = a1 <= b1 ? a1 : b1;
11 | int16_t t2 = a2 <= b2 ? a2 : b2;
12 | int16_t t3 = a3 <= b3 ? a3 : b3;
13 | uint64_t c0 = (uint64_t)(t0)<<0;
14 | uint64_t c1 = (uint64_t)(t1)<<16;
15 | uint64_t c2 = (uint64_t)(t2)<<32;
16 | uint64_t c3 = (uint64_t)(t3)<<48;
17 | return c0 | c1 | c2 | c3;
18 | 


--------------------------------------------------------------------------------
/softbrain-config/src/insts/Select.h:
--------------------------------------------------------------------------------
1 | return ops[2]==0 ? ops[0] : ops[1];
2 | 


--------------------------------------------------------------------------------
/softbrain-config/src/insts/Sig16.h:
--------------------------------------------------------------------------------
 1 | #define SIG (op*1024/(1024+op))
 2 | //#define SIG op
 3 | 
 4 | uint16_t op = (uint16_t)ops[0];
 5 | 
 6 | if(ops.size() > 1) {
 7 |   if(ops[1]) {
 8 |     return (uint64_t) SIG; 
 9 |   } else {
10 |     return ops[0];
11 |   }
12 | }
13 | return (uint64_t) SIG;
14 | 


--------------------------------------------------------------------------------
/softbrain-config/src/insts/Sub16x4.h:
--------------------------------------------------------------------------------
 1 | uint16_t a0 = (ops[0]&0x000000000000FFFF)>>0;
 2 | uint16_t a1 = (ops[0]&0x00000000FFFF0000)>>16;
 3 | uint16_t a2 = (ops[0]&0x0000FFFF00000000)>>32;
 4 | uint16_t a3 = (ops[0]&0xFFFF000000000000)>>48;
 5 | uint16_t b0 = (ops[1]&0x000000000000FFFF)>>0;
 6 | uint16_t b1 = (ops[1]&0x00000000FFFF0000)>>16;
 7 | uint16_t b2 = (ops[1]&0x0000FFFF00000000)>>32;
 8 | uint16_t b3 = (ops[1]&0xFFFF000000000000)>>48;
 9 | a0-=b0;
10 | a1-=b1;
11 | a2-=b2;
12 | a3-=b3;
13 | uint64_t c0 = (uint64_t)(a0)<<0;
14 | uint64_t c1 = (uint64_t)(a1)<<16;
15 | uint64_t c2 = (uint64_t)(a2)<<32;
16 | uint64_t c3 = (uint64_t)(a3)<<48;
17 | return c0 | c1 | c2 | c3;
18 | //return (uint64_t) _mm_adds_pu16((__m64)ops[0], (__m64)ops[1]);
19 | 


--------------------------------------------------------------------------------
/softbrain-config/src/insts/Sub64.h:
--------------------------------------------------------------------------------
1 | return ops[0] - ops[1]; 
2 | 


--------------------------------------------------------------------------------
/softbrain-config/src/insts/TAdd16x4.h:
--------------------------------------------------------------------------------
 1 | uint32_t a0 = (ops[0]&0x000000000000FFFF)>>0;
 2 | uint32_t a1 = (ops[0]&0x00000000FFFF0000)>>16;
 3 | uint32_t a2 = (ops[0]&0x0000FFFF00000000)>>32;
 4 | uint32_t a3 = (ops[0]&0xFFFF000000000000)>>48;
 5 | uint32_t b0 = (ops[1]&0x000000000000FFFF)>>0;
 6 | uint32_t b1 = (ops[1]&0x00000000FFFF0000)>>16;
 7 | uint32_t b2 = (ops[1]&0x0000FFFF00000000)>>32;
 8 | uint32_t b3 = (ops[1]&0xFFFF000000000000)>>48;
 9 | a0+=b0;
10 | a1+=b1;
11 | a2+=b2;
12 | a3+=b3;
13 | uint64_t c0 = (uint64_t)(a0&0x0000FFFF)<<0;
14 | uint64_t c1 = (uint64_t)(a1&0x0000FFFF)<<16;
15 | uint64_t c2 = (uint64_t)(a2&0x0000FFFF)<<32;
16 | uint64_t c3 = (uint64_t)(a3&0x0000FFFF)<<48;
17 | return c0 | c1 | c2 | c3;
18 | //return (uint64_t) _mm_adds_pu16((__m64)ops[0], (__m64)ops[1]);
19 | 


--------------------------------------------------------------------------------
/softbrain-config/src/insts/Xor.h:
--------------------------------------------------------------------------------
1 | return ops[0] ^ ops[1]; 
2 | 


--------------------------------------------------------------------------------
/softbrain-config/src/model.cpp:
--------------------------------------------------------------------------------
  1 | #include <fstream>
  2 | #include <iostream>
  3 | #include <sstream>
  4 | #include <cstdlib> 
  5 | #include <math.h>
  6 | 
  7 | #include <assert.h>
  8 | 
  9 | #include "model.h"
 10 | #include "model_parsing.h"
 11 | 
 12 | using namespace std;
 13 | using namespace SB_CONFIG;
 14 | 
 15 | void SbModel::printGamsKinds(ostream& os) {
 16 |   os << "set K \"Type of Node\" /Input,Output";
 17 |   
 18 |   for(int i = 2; i < SB_NUM_TYPES; ++i) {
 19 |     os << "," << name_of_inst((sb_inst_t)i);
 20 |   }
 21 |   os << "/";
 22 | }
 23 | 
 24 | SbModel::SbModel(SubModel* subModel, bool multi_config) {
 25 |   
 26 |   if (subModel) {
 27 |     _subModel = subModel;
 28 |   } else {
 29 |     _subModel = new SubModel(5, 5, SubModel::PortType::everysw, multi_config);
 30 |   }
 31 | }
 32 | 
 33 | SbModel::SbModel(bool multi_config) {
 34 |   _subModel = new SubModel(5, 5, SubModel::PortType::everysw, multi_config);
 35 | }
 36 | 
 37 | void SbModel::parse_exec(std::istream& istream) {
 38 |     string param,value;
 39 |     while(istream.good()) {
 40 |         if(istream.peek()=='[') break;  //break out if done
 41 | 
 42 |         ModelParsing::ReadPair(istream,param,value);
 43 | 
 44 |         ModelParsing::trim(param);
 45 |         ModelParsing::trim(value);
 46 | 
 47 |         if(param.length()==0) {
 48 |           continue;
 49 |         }
 50 | 
 51 |         if(param == string("CMD_DISPATCH")) {
 52 |           if(value == string("INORDER")) {
 53 |             set_dispatch_inorder(true);
 54 |           } else if (value == string("OOO")) {
 55 |             set_dispatch_inorder(false);
 56 |           } else {
 57 |             assert(0 && "Dispatch was not INORDER or OOO");
 58 |           }
 59 |         } else if(param == string("CMD_DISPATCH_WIDTH")) {
 60 |             istringstream(value) >> _dispatch_width;
 61 |         }
 62 | 
 63 |     }
 64 | }
 65 | 
 66 | //File constructor
 67 | SbModel::SbModel(const char* filename, bool multi_config)
 68 | {
 69 |     ifstream ifs(filename, ios::in);
 70 |     string param,value;
 71 |     
 72 |     if(ifs.fail())
 73 |     {
 74 |         cerr << "Could Not Open: " << filename << "\n";
 75 |         return;
 76 |     }
 77 |     
 78 |     char line[512];
 79 |     
 80 |     while(ifs.good())
 81 |     {
 82 |         ifs.getline(line,512);
 83 |         //string line;
 84 | 
 85 |         if(ModelParsing::StartsWith(line,"[exec-model]")) {
 86 |           parse_exec(ifs);
 87 |         }
 88 | 
 89 |         if(ModelParsing::StartsWith(line,"[fu-model]")){
 90 |             _fuModel= new FuModel(ifs);
 91 |         }
 92 |         
 93 |         if(ModelParsing::StartsWith(line,"[sub-model]")){
 94 |             if(_fuModel==NULL) { 
 95 |                 cerr<< "No Fu Model Specified\n";
 96 |                 exit(1);
 97 |             }
 98 |             _subModel=new SubModel(ifs, _fuModel, multi_config);
 99 |         }
100 | 
101 |         if(ModelParsing::StartsWith(line,"[io-model]")) {
102 |             if(_subModel==NULL) { 
103 |                 cerr<< "No Sub Model Specified\n";
104 |                 exit(1);
105 |             }
106 | 
107 |             _subModel->parse_io(ifs);
108 |         }
109 |     }
110 | }
111 | 
112 | extern "C" void libsbconfig_is_present() {}
113 | 
114 | 


--------------------------------------------------------------------------------
/softbrain-config/src/model.h:
--------------------------------------------------------------------------------
 1 | #ifndef __SB_MODEL_H__
 2 | #define __SB_MODEL_H__
 3 | 
 4 | #include <set>
 5 | #include <vector>
 6 | #include <map>
 7 | #include <string>
 8 | #include <ostream>
 9 | 
10 | //#include "inst_model.h"
11 | #include "fu_model.h"
12 | #include "sub_model.h"
13 | 
14 | namespace SB_CONFIG {
15 | 
16 | class SbModel {
17 |     public:
18 |     
19 |     SbModel(bool multi=false);
20 |     SbModel(const char* filename, bool multi=false);
21 |     SbModel(SubModel* sub, bool multi=false);
22 |     
23 |     FuModel* fuModel() {return (_fuModel);}
24 |     SubModel* subModel() {return (_subModel);}
25 |     
26 |     void printGamsKinds(std::ostream& os);
27 | 
28 |     void set_dispatch_inorder(bool d) { _dispatch_inorder = d; }    
29 |     bool dispatch_inorder() { return _dispatch_inorder; }
30 | 
31 |     void set_dispatch_width(int w) { _dispatch_width = w;}    
32 |     int dispatch_width() { return _dispatch_width; }
33 | 
34 |     private:
35 |     //InstModel *instModel;
36 |     FuModel   *_fuModel;
37 |     SubModel  *_subModel;
38 | 
39 |     bool _dispatch_inorder = false;
40 |     int _dispatch_width = 2;
41 |     void parse_exec(std::istream& istream);
42 | };
43 |     
44 | }
45 | 
46 | 
47 | 
48 | 
49 | 
50 | 
51 | 
52 | 
53 | #endif
54 | 


--------------------------------------------------------------------------------
/softbrain-config/src/model_parsing.cpp:
--------------------------------------------------------------------------------
 1 | #include "model_parsing.h"
 2 | 
 3 | #include <algorithm>
 4 | #include <sstream>
 5 | #include <stdio.h>
 6 | #include <string.h>
 7 | 
 8 | using namespace SB_CONFIG;
 9 | using namespace std;
10 | 
11 | bool ModelParsing::StartsWith(const std::string& text,const std::string& token) {
12 |     if(text.length() < token.length()) return false;
13 |     return (text.compare(0, token.length(), token) == 0);
14 | }
15 | 
16 | bool ModelParsing::StartsWith(const std::string& text,const char* token) {
17 |     if(text.length() < strlen(token)) return false;
18 |     return (text.compare(0, strlen(token), token) == 0);
19 | }
20 | 
21 | void ModelParsing::trim_comments(std::string& s) {
22 |   s = s.substr(0, s.find("#"));
23 | }
24 | 
25 | 
26 | //This function reads line from an ifstream, and gets a param and value,
27 | //seperated by a ":"
28 | bool ModelParsing::ReadPair(istream& is, string& param, string& value)
29 | {
30 |     //char line[512];
31 |     //is.getline(line,512);
32 |     
33 |     string line;
34 |     getline(is, line);
35 | 
36 |     if(is.fail()) {
37 |         param="";
38 |         value="";
39 |         return false;
40 |     }
41 | 
42 |     trim_comments(line);
43 | 
44 |     std::stringstream ss(line);
45 |     getline(ss, param, ':');
46 |     getline(ss, value);
47 |     return true;
48 | }
49 | 
50 | 
51 | // trim from start
52 | void ModelParsing::ltrim(std::string &s) {
53 |         s.erase(s.begin(), std::find_if(s.begin(), s.end(), std::not1(std::ptr_fun<int, int>(std::isspace))));
54 | }
55 | 
56 | // trim from end
57 | void ModelParsing::rtrim(std::string &s) {
58 |         s.erase(std::find_if(s.rbegin(), s.rend(), std::not1(std::ptr_fun<int, int>(std::isspace))).base(), s.end());
59 | }
60 | 
61 | // trim from both ends
62 | void ModelParsing::trim(std::string &s) {
63 |         rtrim(s);
64 |         ltrim(s);
65 | }
66 | 
67 | bool ModelParsing::stricmp(const std::string& str1, const std::string& str2) {
68 |     if (str1.size() != str2.size()) {
69 |         return false;
70 |     }
71 |     for (std::string::const_iterator c1 = str1.begin(), c2 = str2.begin(); c1 != str1.end(); ++c1, ++c2) {
72 |         if (tolower(*c1) != tolower(*c2)) {
73 |             return false;
74 |         }
75 |     }
76 |     return true;
77 | }
78 | 
79 | void ModelParsing::split(const std::string &s, const char delim, std::vector<std::string> &elems) {
80 |     std::stringstream ss(s);
81 |     std::string item;
82 |     while(std::getline(ss, item, delim)) {
83 |         elems.push_back(item);
84 |     }
85 | }
86 | 
87 | 


--------------------------------------------------------------------------------
/softbrain-config/src/model_parsing.h:
--------------------------------------------------------------------------------
 1 | #ifndef __SB_MODEL_PARSING_H__
 2 | #define __SB_MODEL_PARSING_H__
 3 | 
 4 | #include <string>
 5 | #include <vector>
 6 | #include <iostream>
 7 | 
 8 | 
 9 | namespace SB_CONFIG {
10 |   class ModelParsing {
11 |   public:
12 |       static bool StartsWith(const std::string& text,const std::string& token);
13 |       static bool StartsWith(const std::string& text,const char* );
14 |       static bool ReadPair(std::istream& ifs, std::string& param, std::string& value);
15 |       static void split(const std::string &s, const char delim, std::vector<std::string> &elems);
16 |         
17 |       static void ltrim(std::string &s);
18 |       static void rtrim(std::string &s);
19 |       static void trim(std::string &s);
20 |       static bool stricmp(const std::string& str1, const std::string& str2);
21 |       static void trim_comments(std::string &s);
22 |   };
23 |   
24 |   
25 | }
26 | 
27 | 
28 | 
29 | 
30 | #endif
31 | 


--------------------------------------------------------------------------------
/softbrain-config/src/sub_model.h:
--------------------------------------------------------------------------------
  1 | #ifndef __SB_SUB_MODEL_H__
  2 | #define __SB_SUB_MODEL_H__
  3 | 
  4 | #include "fu_model.h"
  5 | #include "direction.h"
  6 | 
  7 | #include <string>
  8 | #include <vector>
  9 | #include <sstream>
 10 | #include <unordered_map>
 11 | #include <map>
 12 | #include <utility>
 13 | #include <algorithm>
 14 | 
 15 | namespace SB_CONFIG {
 16 | 
 17 | class sbnode;
 18 | class sbinput;
 19 | class sboutput;
 20 | 
 21 | class sbio_interface {
 22 |     public:
 23 |     //interf_vec_port_num -> [cgra port_num -> vector_offset_elements]
 24 |     std::map<int, std::vector<std::pair<int, std::vector<int> > > > in_vports;
 25 |     std::map<int, std::vector<std::pair<int, std::vector<int> > > > out_vports;
 26 | 
 27 |     //intef_port_num -> possible_elements
 28 |     std::map<int, std::vector<int> > in_ports;
 29 |     std::map<int, std::vector<int> > out_ports;
 30 | 
 31 | 			
 32 | 		void sort_in_vports(std::vector<std::pair<int,int>>& portID2size) {
 33 | 			sort(portID2size, in_vports);		
 34 | 		}
 35 | 
 36 | 		void sort_out_vports(std::vector<std::pair<int,int>>& portID2size) {
 37 | 			sort(portID2size, out_vports);		
 38 | 		}
 39 | 
 40 | 		std::vector<std::pair<int, std::vector<int> > >& getDesc_I(int id) {
 41 | 				assert(in_vports.count(id) != 0);
 42 | 				return in_vports[id];
 43 | 		}	
 44 | 		std::vector<std::pair<int, std::vector<int> > >& getDesc_O(int id) {
 45 | 				assert(out_vports.count(id) != 0);
 46 | 				return out_vports[id];
 47 | 		}	
 48 | 		private:				
 49 | 		void sort(std::vector<std::pair<int,int>>& portID2size, std::map<int, std::vector<std::pair<int, std::vector<int> > > >& vports) {
 50 | 		int index = 0;
 51 | 		portID2size.resize(vports.size());
 52 | 		for(auto i : vports) {
 53 | 			int id = i.first;
 54 | 			int size = i.second.size();
 55 | 			portID2size[index++] = std::make_pair(id,size);
 56 | 		}
 57 | 		std::sort(portID2size.begin(), portID2size.end(), [](std::pair<int,int>& left, std::pair<int,int>& right){
 58 | 			return left.second < right.second;
 59 | 			});
 60 | 	}
 61 | 	
 62 | 
 63 | };
 64 | 
 65 | class sblink {
 66 |     public:
 67 |     //enum linktype { input, output, inter, cross }
 68 |           
 69 |     sblink() : _ID(LINK_ID++) {}
 70 |       
 71 |     sbnode* orig() const {return _orig;}
 72 |     sbnode* dest() const {return _dest;}
 73 |     SbDIR::DIR dir()  const {return _dir;}
 74 |     void setdir(SbDIR::DIR dir) { _dir=dir;}
 75 |     
 76 |     //Constructor
 77 |     sblink(sbnode* orig, sbnode* dest) {
 78 |         _orig=orig;
 79 |         _dest=dest;
 80 |     }
 81 | 
 82 |     std::string name() const;
 83 |     std::string gams_name(int config) const;
 84 |     std::string gams_name(int , int) const;
 85 |     
 86 |     protected:
 87 |     int _ID;
 88 |     sbnode* _orig;
 89 |     sbnode* _dest;
 90 |     SbDIR::DIR _dir;
 91 |     
 92 |     private:
 93 |     friend class SubModel;
 94 |     static int LINK_ID;
 95 | };
 96 |     
 97 |     
 98 | class sbnode {   
 99 |     public:
100 |     sbnode() : _ID(NODE_ID++) {}
101 |       
102 |     sblink* add_link(sbnode* node) { 
103 |         sblink* link = new sblink(this, node);
104 |         _out_links.push_back(link);
105 |         node->add_back_link(link);
106 |         return link;
107 |     }
108 |     
109 |     void add_back_link(sblink* link) {
110 |         _in_links.push_back(link);
111 |     }
112 |     
113 |     virtual std::string name() const {
114 |       return std::string("loadslice"); 
115 |     }
116 |     virtual std::string gams_name(int config=0) const {
117 |       return std::string("loadslice");
118 |     }
119 |     
120 |     typedef std::vector<sblink*>::const_iterator const_iterator;
121 |     const_iterator ibegin() const {return _in_links.begin();}
122 |     const_iterator iend() const {return _in_links.end();}
123 |     const_iterator obegin() const {return _out_links.begin();}
124 |     const_iterator oend() const {return _out_links.end();}
125 |     
126 |     sblink* getFirstOutLink() {
127 |         if(_out_links.size()>0) {
128 |             return _out_links[0];
129 |         } else {
130 |             return NULL;
131 |         }
132 |     }
133 |     
134 |     sblink* getFirstInLink() {
135 |         if(_in_links.size()>0) {
136 |             return _in_links[0];
137 |         } else {
138 |             return NULL;
139 |         }
140 |     }
141 |     
142 |     sblink* getInLink(SbDIR::DIR dir) {
143 |         for(const_iterator I=ibegin(), E=iend();I!=E; ++I) {
144 |             sblink* dlink= *I;
145 |             if(dlink->dir() == dir) return dlink;
146 |         }
147 |         return NULL;
148 |     }
149 | 
150 |     sblink* getOutLink(SbDIR::DIR dir) {
151 |         for(const_iterator I=obegin(), E=oend();I!=E; ++I) {
152 |             sblink* dlink= *I;
153 |             if(dlink->dir() == dir) return dlink;
154 |         }
155 |         return NULL;
156 |     }
157 |     
158 |     int id() {return _ID;}
159 |     
160 |     protected:
161 |         int _ID;
162 |         std::vector<sblink*> _in_links; 
163 |         std::vector<sblink*> _out_links; 
164 |     
165 |     private:
166 |         friend class SubModel;
167 |         static int NODE_ID;
168 | };
169 |     
170 | class sbswitch : public sbnode {
171 |     public:
172 | 
173 |     sbswitch() : sbnode() {}
174 |     
175 |     void setXY(int x,int y) {_x=x;_y=y;}
176 |     int x() const {return _x;}
177 |     int y() const {return _y;}
178 | 
179 |     std::string name() const {
180 |         std::stringstream ss;
181 |         ss << "SW" << "_" << _x << "_" << _y;
182 |         return ss.str();
183 |     }
184 | 
185 |     std::string gams_name(int config) const {
186 |         std::stringstream ss;
187 |         if(config!=0) {
188 |           ss << "Sw" << _x << _y << "c" << config;
189 |         } else {
190 |           ss << "Sw" << _x << _y;
191 |         }
192 |         return ss.str();
193 |     }
194 |     
195 |     sbinput* getInput(int i);
196 |     
197 |     sboutput* getOutput(int i);
198 |     
199 |     protected:
200 |     int _x, _y;
201 | };
202 |     
203 | class sbfu : public sbnode {
204 |     public:   
205 | 
206 |     sbfu() : sbnode() {}
207 |       
208 |     void setFUDef(func_unit_def* fu_def) {_fu_def = fu_def;}
209 |     void setXY(int x, int y) {_x=x;_y=y;}
210 | 
211 |     int x() const {return _x;}
212 |     int y() const {return _y;}
213 |     
214 |     std::string name() const {
215 |         std::stringstream ss;
216 |         ss << "FU" << "_" << _x << "_" << _y;
217 |         return ss.str();
218 |     }
219 | 
220 |     std::string gams_name(int config) const {
221 |         std::stringstream ss;
222 |         if(config!=0) {
223 |           ss << "Fu" << _x << _y << "c" << config;
224 |         } else {
225 |           ss << "Fu" << _x << _y;
226 |         }
227 |         return ss.str();
228 |     }
229 |     
230 |     func_unit_def* fu_def() {return _fu_def;}
231 |     
232 |     protected:      
233 |     int _x, _y;
234 |     func_unit_def* _fu_def;
235 | 
236 |     private:
237 |     friend class SubModel;
238 | };
239 | 
240 | class sbinput : public sbnode { 
241 |     public:
242 |     
243 |     sbinput() : sbnode() {}
244 |       
245 |     void setPort(int port) {_port=port;}
246 |     int port() const {return _port;}
247 |     
248 |     std::string name() const {
249 |         std::stringstream ss;
250 |         ss << "IP" << "_" << _port;
251 |         return ss.str();
252 |     }
253 |     std::string gams_name(int config) const {
254 |         std::stringstream ss;
255 |         if(config!=0) {
256 |           ss << "I" << _port << "c" << config;
257 |         } else {
258 |           ss << "I" << _port;
259 |         }
260 |         return ss.str();
261 |     }
262 |     
263 |     protected:
264 |     int _port;
265 | };  
266 | 
267 | class sboutput : public sbnode {
268 |     public:
269 |     sboutput() : sbnode() {}
270 |       
271 |     void setPort(int port) {_port=port;}
272 |     int port() const {return _port;}
273 |     
274 |     std::string name() const {
275 |         std::stringstream ss;
276 |         ss << "OP" << "_" << _port;
277 |         return ss.str();
278 |     }
279 |     
280 |     std::string gams_name(int config) const {
281 |         std::stringstream ss;
282 |         if(config!=0) {
283 |           ss << "O" << _port << "i" << config;
284 |         } else {
285 |           ss << "O" << _port;
286 |         }
287 |         return ss.str();
288 |     }
289 |     
290 |     protected:
291 |     int _port;
292 | };
293 | 
294 | 
295 | class SubModel {
296 |     public:
297 |     
298 |     //Port type of the substrate nodes
299 |     //opensp -- dyser opensplyser N + N -1 ips
300 |     //three ins -- Softbrain 3 x N
301 |     //everywitch -- all switches has ops and ips
302 |     enum class PortType {opensp, everysw, threein};
303 |     
304 |     typedef std::vector<sbinput>::const_iterator  const_input_iterator;
305 |     typedef std::vector<sboutput>::const_iterator const_output_iterator;
306 |     
307 |     SubModel(std::istream& istream, FuModel*, bool multi_config=true);
308 |     SubModel(int x, int y, PortType pt=PortType::opensp, int ips=2, int ops=2, bool multi_config=true);
309 |     
310 |     void PrintGraphviz(std::ostream& ofs);
311 |     void PrintGamsModel(std::ostream& ofs, 
312 |                         std::unordered_map<std::string, std::pair<sbnode*,int> >&,
313 |                         std::unordered_map<std::string, std::pair<sblink*,int> >&, 
314 |                         std::unordered_map<std::string, std::pair<sbswitch*,int> >&, 
315 |                         std::unordered_map<std::string, std::pair<bool,int>>&,  /*isInput, port*/
316 |                         int n_configs=1);
317 |     
318 |     int sizex() {return _sizex;}
319 |     int sizey() {return _sizey;}
320 |     
321 |     sbfu* fuAt(int x, int y) {return &(_fus[x][y]);}
322 |     sbswitch* switchAt(int x, int y) {return &(_switches[x][y]);}
323 | 
324 |     sbinput*  get_input(int i)  {return &(_inputs[i]); }
325 |     sboutput* get_output(int i) {return &(_outputs[i]);}
326 | 
327 |     const_input_iterator input_begin() { return _inputs.begin();}
328 |     const_input_iterator input_end()  { return _inputs.end();}
329 |     
330 |     const_output_iterator output_begin()  { return _outputs.begin();}
331 |     const_output_iterator output_end()  { return _outputs.end();}
332 |     
333 |     //const_output_iterator output_begin()  { return _outputs.begin();}
334 |     //const_output_iterator output_end()  { return _outputs.end();}
335 |     
336 |     std::vector<std::vector<sbfu> >& fus() {return _fus;}
337 |     std::vector<std::vector<sbswitch> >& switches() {return _switches;}
338 |     
339 |     bool multi_config() { return _multi_config;}
340 |     
341 |     sbswitch* cross_switch() {return &_cross_switch;}
342 |     sbnode* load_slice() {return &_load_slice;}
343 |     
344 |     int num_fu() {return NUM_FU;}
345 |     
346 |     void parse_io(std::istream& istream);
347 |     sbio_interface& io_interf() {return _sbio_interf;}
348 | 
349 |     private:
350 |     
351 |     //void CreateFUArray(int,int);
352 |     
353 |     //void SetTotalFUByRatio();
354 |     //void RandDistributeFUs();
355 |     void build_substrate(int x, int y);
356 |     void connect_substrate(int x, int y, PortType pt, int ips, int ops,bool multi_config);
357 |     
358 |     int _sizex, _sizey;  //size of SB cgra
359 |     bool _multi_config;
360 |     std::vector<sbinput> _inputs;
361 |     std::vector<sboutput> _outputs;
362 |     std::vector<std::vector<sbfu> > _fus;
363 |     std::vector<std::vector<sbswitch> > _switches;
364 |     
365 |     sbswitch _cross_switch;
366 |     sbnode _load_slice;
367 |     sbio_interface _sbio_interf;
368 | 
369 |     int NUM_FU;
370 | };
371 | 
372 | }
373 | 
374 | #endif
375 | 


--------------------------------------------------------------------------------
/softbrain-emu/Makefile:
--------------------------------------------------------------------------------
 1 | #CPP=riscv32-unknown-elf-g++
 2 | ifndef SS_TOOLS
 3 | $(error SS_TOOLS is undefined)
 4 | endif
 5 | 
 6 | 
 7 | prefix= $(SS_TOOLS)
 8 | MKDIR_P = mkdir -p
 9 | CPP=g++
10 | 
11 | CFLAGS= -c --std=c++11 -fPIC -g -gdwarf-3 #-DSB_DEBUG_MSG
12 | DFG_FLAGS= --std=c++11 -O3
13 | LIBFLAGS= -shared --std=c++11 
14 | 
15 | SRCDIR = src
16 | OBJDIR = obj
17 | BINDIR = lib
18 | INCLUDEDIR = include
19 | TARGET = libsoftbrain-emu
20 | 
21 | SOURCES := $(wildcard $(SRCDIR)/*.C)
22 | INCLUDES := $(wildcard $(SRCDIR)/*.h)
23 | OBJECTS := $(SOURCES:$(SRCDIR)/%.C=$(OBJDIR)/%.o)
24 | 
25 | all: mkdirs install install-lib
26 | create-insts: $(BINDIR)/compile-insts
27 | 
28 | install-lib: $(BINDIR)/libsoftbrain-emu $(BINDIR)/libsoftbrain-emu-perf $(BINDIR)/libsoftbrain-emu-dbg
29 | 
30 | mkdirs:
31 | 	${MKDIR_P} ${OBJDIR}
32 | 	${MKDIR_P} ${BINDIR}
33 | 	${MKDIR_P} ${INCLUDEDIR}
34 | 
35 | install: mkdirs install-lib
36 | 	${MKDIR_P} ${prefix}/lib
37 | 	cp lib/* ${prefix}/lib
38 | 	${MKDIR_P} ${prefix}/include/softbrain-lib
39 | 	cp include/* ${prefix}/include/softbrain-lib/
40 | 
41 | $(OBJDIR)/softbrain.o: $(SOURCES) $(INCLUDES)
42 | 	@echo "Building default library"
43 | 	$(CPP) $(CFLAGS) $(SOURCES) -o $@
44 | 
45 | $(OBJDIR)/softbrain-dbg.o: $(SOURCES) $(INCLUDES)
46 | 	@echo "Building debug feedback library."
47 | 	$(CPP) $(CFLAGS) $(SOURCES) -DSB_DEBUG_MSG -o $@
48 | 
49 | $(OBJDIR)/softbrain-perf.o: $(SOURCES) $(INCLUDES)
50 | 	@echo "Building performance feedback library."
51 | 	$(CPP) $(CFLAGS) $(SOURCES) -DSB_PERF_MSG -o $@
52 | 
53 | $(BINDIR)/libsoftbrain-emu: $(OBJDIR)/softbrain.o
54 | 	$(CPP) $(LIBFLAGS) -Wl,-soname,libsoftbrain-emu.so -o $@.so $^
55 | 	cp -f $(SRCDIR)/*.h $(INCLUDEDIR)/
56 | 	@echo "Build complete." 
57 | 
58 | $(BINDIR)/libsoftbrain-emu-perf: $(OBJDIR)/softbrain-perf.o
59 | 	$(CPP) $(LIBFLAGS) -Wl,-soname,libsoftbrain-emu-perf.so -o $@.so $^
60 | 	cp -f $(SRCDIR)/*.h $(INCLUDEDIR)/
61 | 	@echo "Build complete." 
62 | 
63 | $(BINDIR)/libsoftbrain-emu-dbg: $(OBJDIR)/softbrain-dbg.o
64 | 	$(CPP) $(LIBFLAGS) -Wl,-soname,libsoftbrain-emu-dbg.so -o $@.so $^
65 | 	cp -f $(SRCDIR)/*.h $(INCLUDEDIR)/
66 | 	@echo "Build complete."
67 | 
68 | $(BINDIR)/compile-insts: $(SRCDIR)/create_insts.c
69 | 	$(CPP) $(DFG_FLAGS)  $^ -o $@ 
70 | 	@echo "Built compile insts." 
71 | 
72 | clean:
73 | 	@rm -rf $(OBJECTS) $(OBJDIR)/*.o $(BINDIR)/*.so $(BINDIR)/compile-insts
74 | 	@echo "Clean complete."
75 | 


--------------------------------------------------------------------------------
/softbrain-emu/src/.gitignore:
--------------------------------------------------------------------------------
1 | #ignore temp files
2 | *~
3 | 


--------------------------------------------------------------------------------
/softbrain-emu/src/create_insts.c:
--------------------------------------------------------------------------------
  1 | #include <dirent.h>
  2 | #include <string>
  3 | #include <vector>
  4 | #include <iostream>
  5 | #include <fstream>
  6 | 
  7 | std::vector<std::string> open(std::string path = ".") {
  8 |   
  9 |   DIR* dir;
 10 |   dirent* pdir;
 11 |   std::vector<std::string> files;
 12 |   
 13 |   dir = opendir(path.c_str());
 14 |   while(pdir = readdir(dir)) {
 15 |     files.push_back(pdir->d_name);
 16 |   }
 17 | 
 18 |   return files;
 19 | }
 20 | 
 21 | static void usage(const char * program_name) {
 22 |   printf("Usage: %s [CONFIG_PATH] [INCLUDE_PATH]\n", program_name);
 23 |   
 24 |   fputs("\n\
 25 |           CONFIG_PATH -- Path to instructions in Softbrain-Config \n\
 26 |           INCLUDE_PATH -- Path where the sb_c_insts.h file needs to be generated \
 27 |           \n", stdout);
 28 | }
 29 | 
 30 | 
 31 | int main(int argc, char* argv[]) {
 32 |   
 33 |   if (argc < 3){
 34 |     usage(argv[0]);
 35 |     exit(0);
 36 |   }
 37 | 
 38 |   
 39 |   std::vector<std::string> f;
 40 |   std::string path = ".";
 41 |   std::string includePath = ".";
 42 |   std::string exportName = "sb_c_insts.h";
 43 |   if(argc > 2) {
 44 |     path = argv[1];
 45 |     includePath = argv[2];
 46 |   }
 47 | 
 48 |   f = open(path);
 49 |   path = path.append("/");
 50 |   includePath = includePath.append("/");
 51 |   std::string rawPath = path;
 52 |   std::ofstream instsHeader(includePath.append(exportName));
 53 |   path = rawPath;
 54 |   std::string header = ".h";
 55 |   instsHeader << "#ifndef _SB_EMU_INSTS" << std::endl;
 56 |   instsHeader << "#define _SB_EMU_INSTS" << std::endl;
 57 |   instsHeader << "#include <array>" << std::endl;
 58 |   instsHeader << "#include <cstring>" << std::endl;
 59 |   instsHeader << "#include <iostream>" << std::endl;
 60 |   instsHeader << "#include \"sb_init.h\"" << std::endl;
 61 |   /* instsHeader << "float    as_float(std::uint32_t ui);" << std::endl; */
 62 |   /* instsHeader << "uint32_t as_uint32(float f);" << std::endl; */
 63 |   /* instsHeader << "double    as_double(std::uint64_t ui);" << std::endl; */
 64 |   /* instsHeader << "uint64_t as_uint64(double f);" << std::endl << std::endl; */
 65 |   /* instsHeader << "float as_float(uint32_t ui) {" << std::endl */
 66 |   /* 	      << "  float f;" << std::endl */
 67 |   /* 	      << "  std::memcpy(&f, &ui, sizeof(float));" << std::endl */
 68 |   /* 	      << "  return f;" << std::endl */
 69 |   /* 	      << "}" << std::endl << std::endl; */
 70 | 
 71 | 
 72 |   /* instsHeader << "uint32_t as_uint32(float f) {" << std::endl */
 73 |   /* 	      << "  uint32_t ui;" << std::endl */
 74 |   /* 	      << "  std::memcpy(&ui, &f, sizeof(uint32_t));" << std::endl */
 75 |   /* 	      << "  return ui;" << std::endl */
 76 |   /* 	      << "}" << std::endl << std::endl; */
 77 |   
 78 |   /* instsHeader << "double as_double(uint64_t ui) {" << std::endl */
 79 |   /* 	      << "  double f;" << std::endl */
 80 |   /* 	      << "  std::memcpy(&f, &ui, sizeof(double));" << std::endl */
 81 |   /* 	      << "  return f;" << std::endl */
 82 |   /* 	      << "}" << std::endl << std::endl; */
 83 | 
 84 |   /* instsHeader << "uint64_t as_uint64(double f) {" << std::endl */
 85 |   /* 	      << "  uint64_t ui;" << std::endl */
 86 |   /* 	      << "  std::memcpy(&ui, &f, sizeof(uint64_t));" << std::endl */
 87 |   /* 	      << "  return ui;" << std::endl */
 88 |   /* 	      << "}" << std::endl << std::endl; */
 89 |  
 90 |   for(auto iter = f.begin(); iter != f.end(); iter++) {
 91 |     if((*iter).length() > header.length()) {
 92 |       if((*iter).compare((*iter).length() - header.length(), header.length(), header) == 0) {
 93 |         //A valid file
 94 | 	//Get header name as name of instruction
 95 | 	instsHeader << "inline uint64_t ";
 96 | 	instsHeader << (*iter).substr(0, (*iter).find_last_of(".")) << ("(std::array<uint64_t,2> ops) {") << std::endl;
 97 | 	//Done the header for the file. Now open and iterate through.
 98 | 	std::ifstream newFile(path.append((*iter)));
 99 | 	path = rawPath;
100 | 	std::string newLine;
101 | 	if(newFile.is_open()) {
102 | 	  while(std::getline(newFile,newLine)) {
103 | 	    instsHeader << "\t" << newLine << std::endl;
104 | 	  }
105 | 	} else {
106 | 	  std::cout << "Failed to open " << path.append((*iter)) << std::endl;
107 | 	  path = rawPath;
108 | 	}
109 | 	newFile.close();
110 | 	//Done iterating. Exit definition
111 | 	instsHeader << "}" << std::endl << std::endl;
112 |       }
113 |     }
114 |   }
115 |   instsHeader << "#endif" << std::endl;
116 |   instsHeader.close();
117 | }
118 | 


--------------------------------------------------------------------------------
/softbrain-emu/src/sb.h:
--------------------------------------------------------------------------------
  1 | #ifndef SB_H
  2 | #define SB_H
  3 | #include <stdint.h>
  4 | #include <deque>
  5 | #include <cstdio>
  6 | #include <cstdlib>
  7 | #include <cassert>
  8 | #include <typeinfo>
  9 | #include <type_traits>
 10 | #include <cstring>
 11 | using namespace std;
 12 | 
 13 | #if defined(SB_DEBUG_MSG) 
 14 | #define DEBUG_PRINTF(message, arg) \
 15 | printf(message, arg); 
 16 | #else
 17 | #define DEBUG_PRINTF(message, arg) ;
 18 | #endif
 19 | 
 20 | #define SCRATCHPAD_SIZE 8192
 21 | //Will need to read in the config file to set proper values for the 
 22 | //CGRA itself in terms of types, but will write that later
 23 | struct sb_config {
 24 |   void (*dfg_func)(uint64_t**, uint64_t**);
 25 |   int num_inputs;
 26 |   int* input_widths;
 27 |   int num_outputs;
 28 |   int* output_widths;
 29 |   int work;
 30 |   int span;
 31 | };
 32 | 
 33 | enum class InputMode {DATA, RECURRENCE};
 34 | enum class OutputMode {PTR_U, PTR_I, SHF16_U, SHF16_I, RECURRENCE, GARBAGE, SCRATCH};
 35 | enum class OutputType {ULL, ILL, UL, IL, U, I, UC, IC};
 36 | class SoftBrain {
 37 |  public: 
 38 |   SoftBrain();
 39 |   SoftBrain(sb_config mem_addr, long size);
 40 |   ~SoftBrain();
 41 |   void dma_read(void* mem_addr, long stride, long access_size, long num_strides, int port);
 42 |   void dma_write(int port, long stride, long access_size, long num_strides, void* mem_addr);
 43 |   void dma_write_shf16(int port, long stride, long access_size, long num_strides, void* mem_addr);
 44 |     void dma_scratch_load(void* mem_addr, long stride, long access_size, long num_strides, int scratch_addr);
 45 |     void scr_port_stream(int scr_addr, long stride, long access_size, long num_strides, int port); 
 46 |     void scratch_read(int scr_addr, long num_bytes, int port);
 47 |     void scratch_write(int port, long num_bytes, int scr_addr);
 48 |     template<typename T>
 49 |     void sb_const(int port, T val, int num);
 50 |     void wait_all();
 51 |     void recurrence(uint64_t output_port, int input_port, int num_strides);
 52 |     void garbage(int port, int num);
 53 |  private:
 54 |   void (*dfg_func)(uint64_t**, uint64_t**);
 55 |   void verify_lengths();
 56 |   int execute_dfg();
 57 |   void process_recurrence(uint64_t** outputs);
 58 | 
 59 |   uint64_t **inputs;
 60 |   uint64_t **outputs;
 61 |   uint64_t *input_temp;
 62 |   uint64_t *output_temp;
 63 | 
 64 |   int num_inputs;
 65 |   int num_outputs;
 66 |   int iterations;
 67 |   int executions;
 68 |   int aggregate_iterations;
 69 |   int aggregate_executions;
 70 |   int pipeline_fill;
 71 |   bool recurrence_check;
 72 |   int work;
 73 |   int span;
 74 |   long size;
 75 |   sb_config saved_config;
 76 |   //Unlike reading from memory, scratchpad MUST be able to be accessed at byte level
 77 |   uint8_t* scratchpad;
 78 |   int scratchpad_tail;
 79 |   //streams
 80 |   struct input_port_instance {
 81 |     InputMode mode; //Mode = 0 means second value is the value
 82 |     uint64_t* data;
 83 |   };
 84 | 
 85 |   struct input_stream {
 86 |     int width;
 87 |     deque<input_port_instance> fifo;
 88 |     input_stream() {
 89 |     }
 90 |   };
 91 | 
 92 |   struct output_port_instance {
 93 |     OutputMode mode; //Mode = 0 means second value is the value
 94 |     OutputType typing; //Which type to use for this output iteration in mem
 95 |     void** data;
 96 |   };
 97 | 
 98 |   struct output_stream {
 99 |     int width;
100 |     deque<output_port_instance> fifo;
101 |     output_stream() {
102 |     }
103 |   };
104 | 
105 |   input_stream* input_streams;
106 |   output_stream* output_streams;
107 | };
108 | 
109 | template<typename T>
110 | void SoftBrain::sb_const(int port, T val, int num) {
111 |   assert(port < num_inputs);
112 |   assert((num % input_streams[port].width) == 0);
113 |   //Need to copy the contents of val into an iteration
114 |   uint64_t* ullval = (uint64_t*) malloc(sizeof(uint64_t));
115 |   *ullval = 0;
116 |   std::memcpy(ullval, &val, sizeof(T));
117 |   for(int str = 0; str < (num/input_streams[port].width); str++) {
118 |     input_port_instance next_instance;
119 |     next_instance.mode = InputMode::DATA;
120 |     next_instance.data = (uint64_t*) malloc(sizeof(uint64_t)*input_streams[port].width);
121 |     for(int j = 0; j < input_streams[port].width; j++) {
122 |       next_instance.data[j] = *ullval;
123 |     }
124 |     input_streams[port].fifo.push_back(next_instance);
125 |   }
126 |   free(ullval);
127 |   while(execute_dfg());
128 | }
129 | 
130 | #endif
131 | 


--------------------------------------------------------------------------------
/softbrain-emu/src/sb_emu.h:
--------------------------------------------------------------------------------
  1 | #ifndef SB_EMU_H
  2 | #define SB_EMU_H
  3 | #ifndef _REENTRANT
  4 | #include <stdint.h>
  5 | #include <type_traits>
  6 | #include "sb.h"
  7 | 
  8 | using namespace std;
  9 | //Will need to read in the config file to set proper values for the 
 10 | //CGRA itself in terms of types, but will write that later
 11 | 
 12 | //Goal here is to have each define as used by the SB map to a call
 13 | //in a class file that does what the softbrain does. Hence, each define 
 14 | //maps to a matching call on the class SoftBrain.
 15 | extern SoftBrain *sb_emu;
 16 | 
 17 | //Stream in the Config
 18 | #define SB_CONFIG(mem_addr, size) \
 19 |   if(sb_emu == NULL) { \
 20 |     sb_emu = new SoftBrain(mem_addr, size);	\
 21 |   } else { \
 22 |     delete sb_emu; \
 23 |     sb_emu = new SoftBrain(mem_addr, size);	\
 24 |   }
 25 | 
 26 | //Fill the scratchpad from DMA (from memory or cache)
 27 | //Note that scratch_addr will be written linearly
 28 | #define SB_DMA_SCRATCH_LOAD(mem_addr, stride, access_size, num_strides, scratch_addr) \
 29 |   sb_emu->dma_scratch_load(static_cast<void*>(mem_addr), stride, access_size, num_strides, scratch_addr);
 30 | 
 31 | //Read from scratch into a cgra port
 32 | #define SB_SCR_PORT_STREAM(scr_addr, stride, access_size, num_strides, port ) \
 33 |   sb_emu->scr_port_stream(scr_addr, stride, access_size, num_strides, port); 
 34 | 
 35 | //A convienience CMD if you want to read linearly
 36 | #define SB_SCRATCH_READ(scr_addr, num_bytes, port) \
 37 |   sb_emu->scratch_read(scr_addr, num_bytes, port);
 38 | 
 39 | //Read from DMA into a port
 40 | #define SB_DMA_READ(mem_addr, stride, access_size, num_strides, port ) \
 41 |   sb_emu->dma_read(static_cast<void*>(mem_addr), stride, access_size, num_strides, port);
 42 | 
 43 | //Throw away some outputs.  We will add a proper instruction for this at some point, rather then writing to memory
 44 | #define SB_GARBAGE(output_port, num_elem) \
 45 |   sb_emu->garbage(output_port, num_elem); 
 46 | 
 47 | //Write to DMA.
 48 | #define SB_DMA_WRITE(output_port, stride, access_size, num_strides, mem_addr) \
 49 |   sb_emu->dma_write(output_port, stride, access_size, num_strides, static_cast<void*>(mem_addr));
 50 | 
 51 | //Write to DMA, but throw away all but the last 16-bits from each word
 52 | #define SB_DMA_WRITE_SHF16(output_port, stride, access_size, num_strides, mem_addr) \
 53 |   sb_emu->dma_write_shf16(output_port, stride, access_size, num_strides, static_cast<void*>(mem_addr));
 54 | 
 55 | //Write to DMA, but throw away all but the last 16-bits from each word
 56 | //WARNING -- (NOT IMPLEMENTED IN SIMULTOR YET)
 57 | //#define SB_DMA_WRITE_SHF32(output_port, stride, access_size, num_strides, mem_addr) \
 58 |   __asm__ __volatile__("sb_stride   %0, %1" : : "r"(stride), "r"(access_size)); \
 59 |   __asm__ __volatile__("sb_wr_dma   %0, %1, %2"   : : "r"(mem_addr),  "r"(num_stirides), "i"(output_port|0x80)); 
 60 | 
 61 | //  __asm__ __volatile__("sb_dma_addr %0, %1" : : "r"(access_size), "r"(stride)); \
 62 | //  __asm__ __volatile__("sb_wr %0 "          : : "i"(output_port)); \
 63 | //  __asm__ __volatile__("sb_stride   %0, %1" : : "r"(mem_addr), "r"(stride)); \
 64 | //  __asm__ __volatile__("sb_dma_addr_p %0, %1, " #output_port : : "r"(mem_addr), "r"(stride_size)); \
 65 | //  __asm__ __volatile__("sb_dma_wr   %0, " : : "r"(num_strides)); 
 66 | 
 67 | //Send a constant value, repetated num_elements times to a port
 68 | #define SB_CONST(port, val, num_elements) \
 69 |   sb_emu->sb_const(port, val, num_elements); 
 70 | 
 71 | //Write to Scratch from a CGRA output port.  Note that only linear writes are currently allowed
 72 | #define SB_SCRATCH_WRITE(output_port, num_bytes, scratch_addr) \
 73 |   sb_emu->scratch_write(output_port, num_bytes, scratch_addr); 
 74 | 
 75 | //Write from output to input port
 76 | #define SB_RECURRENCE(output_port, input_port, num_strides) \
 77 |   sb_emu->recurrence(static_cast<uint64_t>(output_port), input_port, num_strides);
 78 | 
 79 | //Wait with custom bit vector -- probably don't need to use
 80 | //#define SB_WAIT(bit_vec) \
 81 |   __asm__ __volatile__("sb_wait t0, t0, " #bit_vec); \
 82 | 
 83 | //Wait for all softbrain commands to be done -- This will block the processor indefinately if there is
 84 | //unbalanced commands
 85 | #define SB_WAIT_ALL() \
 86 |   sb_emu->wait_all();
 87 | 
 88 | //For now, cast wait to wait all
 89 | #define SB_WAIT(wait_amt) \
 90 |   ;
 91 | 
 92 | //Wait for all prior scratch writes to be complete.
 93 | #define SB_WAIT_SCR_WR() ;
 94 |   //Do nothing for a wait			 \
 95 | 
 96 | //wait for everything except outputs to be complete. (useful for debugging)
 97 | #define SB_WAIT_COMPUTE() ;
 98 | //__asm__ __volatile__("sb_wait t0, t0, 2");	\
 99 | 
100 | //wait for all prior scratch reads to be complete (NOT IMPLEMENTED IN SIMULTOR YET)
101 | #define SB_WAIT_SCR_RD() ;
102 | //__asm__ __volatile__("sb_wait t0, t0, 4");	\
103 | 
104 | #endif
105 | 
106 | #ifdef _REENTRANT
107 | #include <stdint.h>
108 | #include <type_traits>
109 | #include "sb.h"
110 | #include <map>
111 | #include <pthread.h>
112 | 
113 | 
114 | using namespace std;
115 | //Will need to read in the config file to set proper values for the 
116 | //CGRA itself in terms of types, but will write that later
117 | 
118 | //Goal here is to have each define as used by the SB map to a call
119 | //in a class file that does what the softbrain does. Hence, each define 
120 | //maps to a matching call on the class SoftBrain.
121 | extern map<pthread_t, SoftBrain*>* softbrains;
122 | extern pthread_mutex_t configlock; 
123 | 
124 | //Stream in the Config
125 | #define SB_CONFIG(mem_addr, size) \
126 |   pthread_mutex_lock(&configlock); \
127 |   if(softbrains == NULL) { \
128 |     softbrains = new map<pthread_t, SoftBrain*>(); \
129 |   } \
130 |   auto sb = softbrains->find(pthread_self()); \
131 |   if(sb == softbrains->end()) {			   \
132 |     softbrains->insert(make_pair(pthread_self(), new SoftBrain(mem_addr, size))); \
133 |   } else { \
134 |     delete sb->second; \
135 |     sb->second = new SoftBrain(mem_addr, size);	\
136 |   } \
137 |   pthread_mutex_unlock(&configlock);
138 | 
139 | //Fill the scratchpad from DMA (from memory or cache)
140 | //Note that scratch_addr will be written linearly
141 | #define SB_DMA_SCRATCH_LOAD(mem_addr, stride, access_size, num_strides, scratch_addr) \
142 |   softbrains->find(pthread_self())->second->dma_scratch_load(static_cast<void*>(mem_addr), stride, access_size, num_strides, scratch_addr);
143 | 
144 | //Read from scratch into a cgra port
145 | #define SB_SCR_PORT_STREAM(scr_addr, stride, access_size, num_strides, port ) \
146 |   softbrains->find(pthread_self())->second->scr_port_stream(scr_addr, stride, access_size, num_strides, port); 
147 | 
148 | //A convienience CMD if you want to read linearly
149 | #define SB_SCRATCH_READ(scr_addr, num_bytes, port) \
150 |   softbrains->find(pthread_self())->second->scratch_read(scr_addr, num_bytes, port);
151 | 
152 | //Read from DMA into a port
153 | #define SB_DMA_READ(mem_addr, stride, access_size, num_strides, port ) \
154 |   softbrains->find(pthread_self())->second->dma_read(static_cast<void*>(mem_addr), stride, access_size, num_strides, port);
155 | 
156 | //Throw away some outputs.  We will add a proper instruction for this at some point, rather then writing to memory
157 | #define SB_GARBAGE(output_port, num_elem) \
158 |   softbrains->find(pthread_self())->second->garbage(output_port, num_elem); 
159 | 
160 | //Write to DMA.
161 | #define SB_DMA_WRITE(output_port, stride, access_size, num_strides, mem_addr) \
162 |   softbrains->find(pthread_self())->second->dma_write(output_port, stride, access_size, num_strides, static_cast<void*>(mem_addr));
163 | 
164 | //Write to DMA, but throw away all but the last 16-bits from each word
165 | #define SB_DMA_WRITE_SHF16(output_port, stride, access_size, num_strides, mem_addr) \
166 |   softbrains->find(pthread_self())->second->dma_write_shf16(output_port, stride, access_size, num_strides, static_cast<void*>(mem_addr));
167 | 
168 | //Write to DMA, but throw away all but the last 16-bits from each word
169 | //WARNING -- (NOT IMPLEMENTED IN SIMULTOR YET)
170 | //#define SB_DMA_WRITE_SHF32(output_port, stride, access_size, num_strides, mem_addr) \
171 |   __asm__ __volatile__("sb_stride   %0, %1" : : "r"(stride), "r"(access_size)); \
172 |   __asm__ __volatile__("sb_wr_dma   %0, %1, %2"   : : "r"(mem_addr),  "r"(num_stirides), "i"(output_port|0x80)); 
173 | 
174 | //  __asm__ __volatile__("sb_dma_addr %0, %1" : : "r"(access_size), "r"(stride)); \
175 | //  __asm__ __volatile__("sb_wr %0 "          : : "i"(output_port)); \
176 | //  __asm__ __volatile__("sb_stride   %0, %1" : : "r"(mem_addr), "r"(stride)); \
177 | //  __asm__ __volatile__("sb_dma_addr_p %0, %1, " #output_port : : "r"(mem_addr), "r"(stride_size)); \
178 | //  __asm__ __volatile__("sb_dma_wr   %0, " : : "r"(num_strides)); 
179 | 
180 | //Send a constant value, repetated num_elements times to a port
181 | #define SB_CONST(port, val, num_elements) \
182 |   softbrains->find(pthread_self())->second->sb_const(port, val, num_elements); 
183 | 
184 | //Write to Scratch from a CGRA output port.  Note that only linear writes are currently allowed
185 | #define SB_SCRATCH_WRITE(output_port, num_bytes, scratch_addr) \
186 |   softbrains->find(pthread_self())->second->scratch_write(output_port, num_bytes, scratch_addr); 
187 | 
188 | //Write from output to input port
189 | #define SB_RECURRENCE(output_port, input_port, num_strides) \
190 |   softbrains->find(pthread_self())->second->recurrence(static_cast<uint64_t>(output_port), input_port, num_strides);
191 | 
192 | //Wait with custom bit vector -- probably don't need to use
193 | //#define SB_WAIT(bit_vec) \
194 |   __asm__ __volatile__("sb_wait t0, t0, " #bit_vec); \
195 | 
196 | //Wait for all softbrain commands to be done -- This will block the processor indefinately if there is
197 | //unbalanced commands
198 | #define SB_WAIT_ALL() \
199 |   softbrains->find(pthread_self())->second->wait_all();
200 | 
201 | //For now, cast wait to wait all
202 | #define SB_WAIT(wait_amt) \
203 |   ;
204 | 
205 | //Wait for all prior scratch writes to be complete.
206 | #define SB_WAIT_SCR_WR() ;
207 |   //Do nothing for a wait			 \
208 | 
209 | //wait for everything except outputs to be complete. (useful for debugging)
210 | #define SB_WAIT_COMPUTE() ;
211 | //__asm__ __volatile__("sb_wait t0, t0, 2");	\
212 | 
213 | //wait for all prior scratch reads to be complete (NOT IMPLEMENTED IN SIMULTOR YET)
214 | #define SB_WAIT_SCR_RD() ;
215 | //__asm__ __volatile__("sb_wait t0, t0, 4");	\
216 | 
217 | #endif
218 | 
219 | #endif
220 | 


--------------------------------------------------------------------------------
/softbrain-emu/src/sb_init.h:
--------------------------------------------------------------------------------
 1 | #ifndef __SB_INIT__
 2 | #define __SB_INIT__
 3 | #include <iostream>
 4 | #include <math.h>
 5 | 
 6 | // NOTE: The macros below were copied from cambricon/include/fix_common.h
 7 | // in the softbrain-workloads repository
 8 | 
 9 | // In 16-bit integer representation,
10 | // one bit is reserved for sign,
11 | // the maximum supported number is 32767,
12 | // the minimum supported number is -32768.
13 | // Here FIX_MAX = 32767, and FIX_MIN is chosen to be negative
14 | // of FIX_MAX instead of -32768 to keep the symmetry.
15 | // 
16 | // FIX_TRUNC is to keep the number falling within the range
17 | // between FIX_MIN and FIX_MAX (both inclusively)
18 | #define FIX_MAX ((1 << 15) - 1)
19 | #define FIX_MIN (-FIX_MAX)
20 | #define FIX_TRUNC(x)  (x > FIX_MAX ? FIX_MAX : (x < FIX_MIN ? FIX_MIN : x) )
21 | 
22 | // FRAC_BITS is the number of bits reserved for fractional parts.
23 | // So the integer part has 15 - FRAC_BITS bits.
24 | // 
25 | // DELTA is the minimum positive amount that can be represented in this number system.
26 | // 
27 | // FLOAT_MAX is the largest real value that can be represented in this number system.
28 | // FLOAT_MIN is the smallest real value that can be represented in this number system.
29 | // 
30 | // FLOAT_TRUNC is to keep numbers within the range
31 | // between FLOAT_MIN and FLOAT_MAX (both inclusively)
32 | #define FRAC_BITS 11  // 11 or 12 is recommended
33 | #define DELTA (((double)1.0)/(1 << FRAC_BITS))
34 | #define FLOAT_MAX (FIX_MAX * DELTA)
35 | #define FLOAT_MIN (FIX_MIN * DELTA)
36 | #define FLOAT_TRUNC(x)  (x > FLOAT_MAX ? FLOAT_MAX : (x < FLOAT_MIN ? FLOAT_MIN : x) )
37 | 
38 | // DOUBLE_TO_FIX converts a double number to integer in our fixed representation.
39 | // FIX_TO_DOUBLE converts a integer number to double in our fixed representation.
40 | #define DOUBLE_TO_FIX(x)  ( (int)(FLOAT_TRUNC(x) / DELTA) )
41 | #define FIX_TO_DOUBLE(x) (x * DELTA)
42 | 
43 | // FIX_ADD fixed addition.
44 | // FIX_MINUS fixed subtraction.
45 | // FIX_MUL fixed multiplication.
46 | // FIX_TAN_H fixed tanh, but is right now using tanh from math.h
47 | #define FIX_ADD(a, b) ( FIX_TRUNC( (int)a + (int)b ) )
48 | #define FIX_MINUS(a, b) ( FIX_ADD(a, -b) )
49 | #define FIX_MUL(a, b) ( FIX_TRUNC( ((int)a * (int)b) >> FRAC_BITS ) )
50 | #define FIX_TAN_H(x) ( DOUBLE_TO_FIX(tanh(FIX_TO_DOUBLE(x))) )
51 | 
52 | extern uint64_t accum;
53 | 
54 | inline float as_float(std::uint32_t ui) {
55 |   float f;
56 |   std::memcpy(&f, &ui, sizeof(float));
57 |   return f;
58 | }
59 | 
60 | inline uint32_t as_uint32(float f) {
61 |   uint32_t ui;
62 |   std::memcpy(&ui, &f, sizeof(uint32_t));
63 |   return ui;
64 | }
65 | 
66 | inline double as_double(std::uint64_t ui) {
67 |   double f;
68 |   std::memcpy(&f, &ui, sizeof(double));
69 |   return f;
70 | }
71 | 
72 | inline uint64_t as_uint64(double f) {
73 |   uint64_t ui;
74 |   std::memcpy(&ui, &f, sizeof(uint64_t));
75 |   return ui;
76 | }
77 | #endif
78 | 


--------------------------------------------------------------------------------
/softbrain-scheduler/.gitignore:
--------------------------------------------------------------------------------
 1 | *.d
 2 | src/gams_models/*.h
 3 | build/
 4 | *.swo
 5 | *.swp
 6 | *.swn
 7 | drivers/sb_dfg_emu
 8 | drivers/sb_sched
 9 | drivers/stat-config
10 | dfgs/*/*.h
11 | gams/
12 | verif/
13 | viz/
14 | remap.dot
15 | 


--------------------------------------------------------------------------------
/softbrain-scheduler/Makefile:
--------------------------------------------------------------------------------
 1 | include $(SS_STACK)/msg.mk
 2 | prefix:=$(SS_TOOLS)
 3 | 
 4 | 
 5 | level=./
 6 | include make.config
 7 | 
 8 | all: directories program make_drivers
 9 | 
10 | include make.rules
11 | 
12 | program:
13 | 	+make -C src
14 | 
15 | make_drivers: program
16 | 	make -C drivers
17 | 
18 | install: directories install_headers install_program install_drivers 
19 | 	
20 | 
21 | install_headers:
22 | 	${MKDIR_P} ${prefix}/include/softbrain-scheduler
23 | 	cp src/*.h ${prefix}/include/softbrain-scheduler/
24 | 
25 | install_drivers: make_drivers
26 | 	${MKDIR_P} ${prefix}/bin
27 | 	cp drivers/sb_dfg_emu ${prefix}/bin
28 | 
29 | 
30 | install_program: program
31 | 	${MKDIR_P} ${prefix}/lib
32 | 	cp ${build}/lib/* ${prefix}/lib
33 | 	
34 | clean:
35 | 	make -C src clean
36 | 	make -C drivers clean
37 | 
38 | 
39 | 


--------------------------------------------------------------------------------
/softbrain-scheduler/dfgs/5x4/bfs.dfg:
--------------------------------------------------------------------------------
 1 | Input: H
 2 | Input: L
 3 | Input: ONE
 4 | Input: reset
 5 | 
 6 | IncH = Add64(H,ONE)
 7 | Cond = ICmpEQ(L, H)
 8 | 
 9 | NewL = Select(L,IncH,Cond)
10 | CNT  = Acc64(ONE,reset)
11 | 
12 | Output: NewL
13 | Output: CNT
14 | 
15 | 


--------------------------------------------------------------------------------
/softbrain-scheduler/dfgs/5x4/dot.dfg:
--------------------------------------------------------------------------------
 1 | #R = A . B + carry
 2 | 
 3 | Input: A [4] 
 4 | Input: B [4] 
 5 | Input: carry
 6 | 
 7 | M0 = Mul16x4(A0 , B0 )
 8 | M1 = Mul16x4(A1 , B1 )
 9 | M2 = Mul16x4(A2 , B2 )
10 | M3 = Mul16x4(A3 , B3 )
11 | 
12 | T0 = Add16x4(M0 , M1 )
13 | T1 = Add16x4(M2 , M3 )
14 | 
15 | T2 = Add16x4(T0 , T1 )
16 | 
17 | R =  Red16x4(T2, carry)
18 | 
19 | Output: R
20 | 
21 | 


--------------------------------------------------------------------------------
/softbrain-scheduler/dfgs/5x4/long.dfg:
--------------------------------------------------------------------------------
 1 | Input: I [8]      # Original Image  
 2 | Input: F [4]      # Filter
 3 | Input: C [8]      # Carry
 4 | 
 5 | 
 6 | M0 = Mul64(I0, F0)
 7 | M1 = Mul64(I1, F0)
 8 | M2 = Mul64(I2, F1)
 9 | M3 = Mul64(I3, F1)
10 | M4 = Mul64(I4, F2)
11 | M5 = Mul64(I5, F2)
12 | M6 = Mul64(I6, F3)
13 | M7 = Mul64(I7, F3)
14 | 
15 | O0 = Add64(M0, C0)
16 | O1 = Add64(M1, C1)
17 | O2 = Add64(M2, C2)
18 | O3 = Add64(M3, C3)
19 | O4 = Add64(M4, C4)
20 | O5 = Add64(M5, C5)
21 | O6 = Add64(M6, C6)
22 | O7 = Add64(M7, C7)
23 | 
24 | Output: O [8]
25 | 
26 | 
27 | 


--------------------------------------------------------------------------------
/softbrain-scheduler/dfgs/5x4/medium.dfg:
--------------------------------------------------------------------------------
 1 | 
 2 | Input: I [8]
 3 | Input: A
 4 | 
 5 | O0 =Add64(A,I0)
 6 | O1 =Add64(A,I1)
 7 | O2 =Add64(I2,A)
 8 | O3 =Add64(I3,A)
 9 | O4 =Add64(I4,A)
10 | O5 =Add64(I5,A)
11 | O6 =Add64(I6,A)
12 | O7 =Add64(I7,A)
13 | 
14 | Output: O [8]
15 | 


--------------------------------------------------------------------------------
/softbrain-scheduler/dfgs/5x4/medium_short.dfg:
--------------------------------------------------------------------------------
 1 | 
 2 | Input: I [4]
 3 | Input: A
 4 | 
 5 | O0 =Add64(A,I0)
 6 | O1 =Add64(A,I1)
 7 | O2 =Add64(I2,A)
 8 | O3 =Add64(I3,A)
 9 | 
10 | Output: O [4]
11 | 


--------------------------------------------------------------------------------
/softbrain-scheduler/dfgs/5x4/mm_sb.dfg:
--------------------------------------------------------------------------------
 1 | Input: A
 2 | Input: B [8]
 3 | Input: reset
 4 | 
 5 | M0 = Mul64(B0, A)
 6 | M1 = Mul64(B1, A)
 7 | M2 = Mul64(B2, A)
 8 | M3 = Mul64(B3, A)
 9 | M4 = Mul64(B4, A)
10 | M5 = Mul64(B5, A)
11 | M6 = Mul64(B6, A)
12 | M7 = Mul64(B7, A)
13 | 
14 | R0 = Acc64(M0, reset)
15 | R1 = Acc64(M1, reset)
16 | R2 = Acc64(M2, reset)
17 | R3 = Acc64(M3, reset)
18 | R4 = Acc64(M4, reset)
19 | R5 = Acc64(M5, reset)
20 | R6 = Acc64(M6, reset)
21 | R7 = Acc64(M7, reset)
22 | 
23 | Output: R [8]
24 | 


--------------------------------------------------------------------------------
/softbrain-scheduler/dfgs/5x4/out.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PolyArch/stream-dataflow/e9a40c04268501202f4591d914ed69c46881baa3/softbrain-scheduler/dfgs/5x4/out.txt


--------------------------------------------------------------------------------
/softbrain-scheduler/dfgs/5x4/pool2x2l4avg.dfg:
--------------------------------------------------------------------------------
 1 | Input: R0
 2 | Input: R1
 3 | Input: R2
 4 | Input: R3 
 5 | Input: R4
 6 | 
 7 | Input: P [4] 
 8 | 
 9 | I0 =Add16x4(R0, R1)
10 | I1 =Add16x4(R1, R2)
11 | I2 =Add16x4(R2, R3)
12 | I3 =Add16x4(R3, R4)
13 | 
14 | H0 =Add16x4(P0, I0)
15 | H1 =Add16x4(P1, I1)
16 | H2 =Add16x4(P2, I2)
17 | H3 =Add16x4(P3, I3)
18 | 
19 | O0=RShf16x4(H0,2)
20 | O1=RShf16x4(H1,2)
21 | O2=RShf16x4(H2,2)
22 | O3=RShf16x4(H3,2)
23 | 
24 | Output: I [4]
25 | 
26 | Output: O0
27 | Output: O1
28 | Output: O2
29 | Output: O3
30 | 


--------------------------------------------------------------------------------
/softbrain-scheduler/dfgs/5x4/pool4x4l2avg.dfg:
--------------------------------------------------------------------------------
 1 | Input: R0
 2 | Input: R1
 3 | Input: R2
 4 | Input: R3 
 5 | Input: R4
 6 | 
 7 | Input:  Xa [2] 
 8 | Input:  Xb [2] 
 9 | Input:  Xc [2] 
10 | 
11 | R12=Add16x4(R1, R2)
12 | R123=Add16x4(R12, R3)
13 | R0123=Add16x4(R0,R123)
14 | R1234=Add16x4(R123, R4)
15 | 
16 | Xd0=R0123
17 | Xd1=R1234
18 | 
19 | Xcd0=Add16x4(Xc0,Xd0)
20 | Xcd1=Add16x4(Xc1,Xd1)
21 | 
22 | Xab0=Add16x4(Xa0,Xb0)
23 | Xab1=Add16x4(Xa1,Xb1)
24 | 
25 | O0_p=Add16x4(Xab0,Xcd0)
26 | O1_p=Add16x4(Xab1,Xcd1)
27 | 
28 | O0=RShf16x4(O0_p,4)
29 | O1=RShf16x4(O1_p,4)
30 | 
31 | Oa0=Xb0
32 | Oa1=Xb1
33 | Ob0=Xc0
34 | Ob1=Xc1
35 | Oc0=R0123
36 | Oc1=R1234
37 | 
38 | Output:  Oa [2] 
39 | Output:  Ob [2] 
40 | Output:  Oc [2] 
41 | 
42 | Output: O0
43 | Output: O1
44 | 


--------------------------------------------------------------------------------
/softbrain-scheduler/dfgs/5x4/pool_simple.dfg:
--------------------------------------------------------------------------------
 1 | Input:  in  [8] 
 2 | Input:  acc [8] 
 3 | 
 4 | out0=Add16x4(in0, acc0)
 5 | out1=Add16x4(in1, acc1)
 6 | out2=Add16x4(in2, acc2)
 7 | out3=Add16x4(in3, acc3)
 8 | out4=Add16x4(in4, acc4)
 9 | out5=Add16x4(in5, acc5)
10 | out6=Add16x4(in6, acc6)
11 | out7=Add16x4(in7, acc7)
12 | 
13 | Output:  out [8] 
14 | 
15 | 


--------------------------------------------------------------------------------
/softbrain-scheduler/dfgs/5x4/red16to1sig.dfg:
--------------------------------------------------------------------------------
 1 | Input: N [4]  
 2 | Input: S [8]  
 3 | Input: acc [2]
 4 | Input: pred
 5 | 
 6 | #compute lanes "A" and "B"
 7 | 
 8 | AM0 =Mul16x4(N0, S0)
 9 | AM1 =Mul16x4(N1, S1)
10 | AM2 =Mul16x4(N2, S2)
11 | AM3 =Mul16x4(N3, S3)
12 | 
13 | AS0 =Add16x4(AM0, AM1)
14 | AS1 =Add16x4(AM2, AM3)
15 | 
16 | AS2 =Add16x4(AS0, AS1)
17 | 
18 | AR = Red16x4(AS2, acc0)
19 | 
20 | out0 = Sig16(AR, pred)
21 | 
22 | 
23 | 
24 | BM0 =Mul16x4(N0, S4)
25 | BM1 =Mul16x4(N1, S5)
26 | BM2 =Mul16x4(N2, S6)
27 | BM3 =Mul16x4(N3, S7)
28 | 
29 | BS0 =Add16x4(BM0, BM1)
30 | BS1 =Add16x4(BM2, BM3)
31 | 
32 | BS2 =Add16x4(BS0, BS1)
33 | 
34 | BR = Red16x4(BS2, acc1)
35 | 
36 | out1 = Sig16(BR, pred)
37 | 
38 | Output: out [2]
39 | 
40 | 


--------------------------------------------------------------------------------
/softbrain-scheduler/dfgs/5x4/red16to1sigx2-simple.dfg:
--------------------------------------------------------------------------------
 1 | Input: NA [4]
 2 | Input: SA [4]  
 3 | Input: NB [4]
 4 | Input: SB [4]  
 5 | 
 6 | #compute lanes "A" and "B"
 7 | 
 8 | MA0 =Mul16x4(NA0, SA0)
 9 | MA1 =Mul16x4(NA1, SA1)
10 | MA2 =Mul16x4(NA2, SA2)
11 | MA3 =Mul16x4(NA3, SA3)
12 | 
13 | SA0 =Add16x4(MA0, MA1)
14 | SA1 =Add16x4(MA2, MA3)
15 | 
16 | SA2 =Add16x4(SA0, SA1)
17 | 
18 | RA = Add16x4(SA2,0)
19 | 
20 | out0 = Sig16(RA,0)
21 | 
22 | 
23 | 
24 | MB0 =Mul16x4(NB0, SB0)
25 | MB1 =Mul16x4(NB1, SB1)
26 | MB2 =Mul16x4(NB2, SB2)
27 | MB3 =Mul16x4(NB3, SB3)
28 | 
29 | SB0 =Add16x4(MB0, MB1)
30 | SB1 =Add16x4(MB2, MB3)
31 | 
32 | SB2 =Add16x4(SB0, SB1)
33 | 
34 | RB = Add16x4(SB2,0)
35 | 
36 | out1 = Sig16(RB,0)
37 | 
38 | Output: out [2]
39 | 
40 | 
41 | 


--------------------------------------------------------------------------------
/softbrain-scheduler/dfgs/5x4/red16to1sigx2.dfg:
--------------------------------------------------------------------------------
 1 | Input: NA [4]
 2 | Input: SA [4]  
 3 | Input: NB [4]
 4 | Input: SB [4]  
 5 | 
 6 | Input: acc [2]
 7 | Input: pred [2]
 8 | 
 9 | #compute lanes "A" and "B"
10 | 
11 | MA0 =Mul16x4(NA0, SA0)
12 | MA1 =Mul16x4(NA1, SA1)
13 | MA2 =Mul16x4(NA2, SA2)
14 | MA3 =Mul16x4(NA3, SA3)
15 | 
16 | SA0 =Add16x4(MA0, MA1)
17 | SA1 =Add16x4(MA2, MA3)
18 | 
19 | SA2 =Add16x4(SA0, SA1)
20 | 
21 | RA = Red16x4(SA2, acc0)
22 | 
23 | out0 = Sig16(RA,pred0)
24 | 
25 | 
26 | 
27 | MB0 =Mul16x4(NB0, SB0)
28 | MB1 =Mul16x4(NB1, SB1)
29 | MB2 =Mul16x4(NB2, SB2)
30 | MB3 =Mul16x4(NB3, SB3)
31 | 
32 | SB0 =Add16x4(MB0, MB1)
33 | SB1 =Add16x4(MB2, MB3)
34 | 
35 | SB2 =Add16x4(SB0, SB1)
36 | 
37 | RB = Red16x4(SB2, acc1)
38 | 
39 | out1 = Sig16(RB,pred1)
40 | 
41 | Output: out [2]
42 | 
43 | 
44 | 


--------------------------------------------------------------------------------
/softbrain-scheduler/dfgs/5x4/red32to1sig.dfg:
--------------------------------------------------------------------------------
 1 | Input: N [8]  # 8Wide 1Deep
 2 | Input: S [8]  # 8Wide 1Deep
 3 | Input: acc
 4 | Input: pred
 5 | 
 6 | M0 =Mul16x4(N0, S0)
 7 | M1 =Mul16x4(N1, S1)
 8 | M2 =Mul16x4(N2, S2)
 9 | M3 =Mul16x4(N3, S3)
10 | M4 =Mul16x4(N4, S4)
11 | M5 =Mul16x4(N5, S5)
12 | M6 =Mul16x4(N6, S6)
13 | M7 =Mul16x4(N7, S7)
14 | 
15 | A0 =Add16x4(M0, M1)
16 | A1 =Add16x4(M2, M3)
17 | A2 =Add16x4(M4, M5)
18 | A3 =Add16x4(M6, M7)
19 | 
20 | A8 =Add16x4(A0, A1)
21 | A9 =Add16x4(A2, A3)
22 | 
23 | A10 = Add16x4(A8, A9)
24 | 
25 | R = Red16x4(A10, acc)
26 | 
27 | out=Sig16(R,    pred)
28 | 
29 | Output: out
30 | 
31 | 
32 | 


--------------------------------------------------------------------------------
/softbrain-scheduler/dfgs/5x4/red8to1sig.dfg:
--------------------------------------------------------------------------------
 1 | Input: N [2]  
 2 | Input: S [4]  
 3 | Input: acc [2]
 4 | Input: pred
 5 | 
 6 | #compute lanes "A" and "B"
 7 | 
 8 | AM0 =Mul16x4(N0, S0)
 9 | AM1 =Mul16x4(N1, S1)
10 | 
11 | AS0 =Add16x4(AM0, AM1)
12 | 
13 | AR = Red16x4(AS0, acc0)
14 | 
15 | out0 = Sig16(AR, pred)
16 | 
17 | 
18 | 
19 | BM0 =Mul16x4(N0, S2)
20 | BM1 =Mul16x4(N1, S3)
21 | 
22 | BS0 =Add16x4(BM0, BM1)
23 | 
24 | BR = Red16x4(BS0, acc1)
25 | 
26 | out1 = Sig16(BR, pred)
27 | 
28 | Output: out [2]
29 | 
30 | 


--------------------------------------------------------------------------------
/softbrain-scheduler/dfgs/5x4/run-all-sched.sh:
--------------------------------------------------------------------------------
 1 | #subalg="M.R.T M.RT MR.T MR.RT MR'.RT MRT'.RT  MRT"
 2 | 
 3 | #subalg="MR.RT MRT'.RT MR'.RT MR' MRT' MRT"
 4 | subalg="MRT' MRT'.RT"
 5 | #subalg=MRT
 6 | #subalg="MR'.RT MRT'.RT"
 7 | 
 8 | 
 9 | logfile=log.txt
10 | sum=summary.txt
11 | 
12 | 
13 | for ed in 15 7 3; do
14 | 
15 |   echo  "ed = $ed" | tee -a $sum
16 |   
17 |   for i in $subalg; do
18 |     echo $i  | tee -a $sum
19 |   
20 |     echo -e "\n\n\n\n\n\n**********          $i            *********" >> $logfile
21 |     run-sched.sh gams $i $ed >> $logfile
22 |   
23 |     cat sum.txt | tee -a $sum
24 |   done
25 | 
26 | done
27 | 


--------------------------------------------------------------------------------
/softbrain-scheduler/dfgs/5x4/run-sched.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | mkdir -p output
 3 | 
 4 | lat=""
 5 | lat_eq=""
 6 | 
 7 | lat_eq="0"
 8 | time_eq="0"
 9 | 
10 | if [ -z "$1" ]; then
11 | alg="sg"
12 | else
13 | alg=$1
14 | fi
15 | 
16 | if [ -z "$2" ]; then
17 | subalg="MR.RT"
18 | else
19 | subalg=$2
20 | fi
21 | 
22 | if [ -z "$3" ]; then
23 | ed=15
24 | else
25 | ed=$3
26 | fi
27 | 
28 | 
29 | bench=""
30 | 
31 | for i in *.dfg; do
32 |   echo "************ $i *************"; 	
33 |   cmd="$SS_TOOLS/bin/sb_sched $SS_TOOLS/configs/softbrain_5x4.sbmodel $i --verbose --algorithm $alg --sub-alg $subalg --show-gams --mipstart --max-edge-delay=$ed --timeout=3600";
34 | 
35 |   echo $cmd
36 |   $cmd | tee out.txt
37 |   #$cmd > out.txt
38 |   bench="$bench $i"
39 |   lat="$lat `grep "latency:" out.txt | cut -d" " -f 2`"
40 |   time="$time `grep "sched_time:" out.txt | cut -d" " -f 2`"
41 |   lat_eq="$lat_eq+`grep "latency:" out.txt | cut -d" " -f 2`"
42 |   time_eq="$time_eq+`grep "sched_time:" out.txt | cut -d" " -f 2`"
43 | done
44 | 
45 | echo $bench             | tee sum.txt 
46 | echo $lat               | tee -a sum.txt 
47 | echo $time              | tee -a sum.txt 
48 | 
49 | 


--------------------------------------------------------------------------------
/softbrain-scheduler/dfgs/5x4/spmv.dfg:
--------------------------------------------------------------------------------
 1 | Input: Val
 2 | Input: Vec
 3 | Input: reset
 4 | 
 5 | M0 = Mul64(Val, Vec)
 6 | 
 7 | O = Acc64(M0, reset)
 8 | 
 9 | Output: O
10 | 


--------------------------------------------------------------------------------
/softbrain-scheduler/dfgs/5x4/stencil.dfg:
--------------------------------------------------------------------------------
 1 | Input: M
 2 | Input: MIP
 3 | Input: MIM
 4 | Input: MJP
 5 | Input: MJM
 6 | Input: MKP
 7 | Input: MKM
 8 | Input: C0
 9 | Input: C1
10 | 
11 | AI = Add64(MIP, MIM)
12 | AJ = Add64(MJP, MJM)
13 | AK = Add64(MKP, MKM)
14 | 
15 | AIJ = Add64(AI, AJ)
16 | AIJK = Add64(AIJ, AK)
17 | 
18 | P0 = Mul64(AIJK,C1)
19 | P1 = Mul64(M,C0)
20 | 
21 | R = Add64(P0, P1)
22 | 
23 | Output: R
24 | 
25 | 


--------------------------------------------------------------------------------
/softbrain-scheduler/dfgs/5x4/sum.txt:
--------------------------------------------------------------------------------
1 | bfs.dfg dot.dfg long.dfg medium.dfg medium_short.dfg mm_sb.dfg pool2x2l4avg.dfg pool4x4l2avg.dfg pool_simple.dfg red16to1sig.dfg red16to1sigx2.dfg red16to1sigx2-simple.dfg red32to1sig.dfg red8to1sig.dfg spmv.dfg stencil.dfg vadd4.dfg vadd5.dfg vadd6.dfg vadd.dfg viterbi.dfg
2 | 1.07 1.49 1.29 1.79 0.86 2.27 10.24 21.27 1.56 2.58 2.38 1.64 1.88 1.40 0.48 1.93 1.29 1.16 1.66 1.54 2.34  | 62.12
3 | 1.07 1.49 1.29 1.79 0.86 2.27 10.24 21.27 1.56 2.58 2.38 1.64 1.88 1.40 0.48 1.93 1.29 1.16 1.66 1.54 2.34  | 62.12
4 | 


--------------------------------------------------------------------------------
/softbrain-scheduler/dfgs/5x4/vadd.dfg:
--------------------------------------------------------------------------------
 1 | 
 2 | Input: I [8]
 3 | Input: A [8]
 4 | 
 5 | O0 =Add64(A0,I0)
 6 | O1 =Add64(A1,I1)
 7 | O2 =Add64(A2,I2)
 8 | O3 =Add64(A3,I3)
 9 | O4 =Add64(A4,I4)
10 | O5 =Add64(A5,I5)
11 | O6 =Add64(A6,I6)
12 | O7 =Add64(A7,I7)
13 | 
14 | Output: O [8]
15 | 


--------------------------------------------------------------------------------
/softbrain-scheduler/dfgs/5x4/vadd4.dfg:
--------------------------------------------------------------------------------
 1 | 
 2 | Input: I [4]
 3 | Input: A [4]
 4 | 
 5 | O0 =Add64(A0,I0)
 6 | O1 =Add64(A1,I1)
 7 | O2 =Add64(A2,I2)
 8 | O3 =Add64(A3,I3)
 9 | 
10 | Output: O [4]
11 | 


--------------------------------------------------------------------------------
/softbrain-scheduler/dfgs/5x4/vadd5.dfg:
--------------------------------------------------------------------------------
 1 | 
 2 | Input: I [5]
 3 | Input: A [5]
 4 | 
 5 | O0 =Add64(A0,I0)
 6 | O1 =Add64(A1,I1)
 7 | O2 =Add64(A2,I2)
 8 | O3 =Add64(A3,I3)
 9 | O4 =Add64(A4,I4)
10 | 
11 | Output: O [5]
12 | 


--------------------------------------------------------------------------------
/softbrain-scheduler/dfgs/5x4/vadd6.dfg:
--------------------------------------------------------------------------------
 1 | 
 2 | Input: I [6]
 3 | Input: A [6]
 4 | 
 5 | O0 =Add64(A0,I0)
 6 | O1 =Add64(A1,I1)
 7 | O2 =Add64(A2,I2)
 8 | O3 =Add64(A3,I3)
 9 | O4 =Add64(A4,I4)
10 | O5 =Add64(A5,I5)
11 | 
12 | Output: O [6]
13 | 


--------------------------------------------------------------------------------
/softbrain-scheduler/dfgs/5x4/viterbi.dfg:
--------------------------------------------------------------------------------
 1 | Input: llike [4]
 2 | Input: trans [4]
 3 | Input: reset
 4 | Input: emission
 5 | 
 6 | S0 = Add64(llike0,trans0)
 7 | S1 = Add64(llike1,trans1)
 8 | S2 = Add64(llike2,trans2)
 9 | S3 = Add64(llike3,trans3)
10 | 
11 | #these should be mins
12 | M01 = Add64(S0,S1)
13 | M12 = Add64(S2,S3)
14 | 
15 | M  = Add64(M01,M12)
16 | ME = Add64(M,emission)
17 | 
18 | MR = Acc64(ME,reset)
19 | 
20 | Output: MR
21 | 
22 | 


--------------------------------------------------------------------------------
/softbrain-scheduler/drivers/Makefile:
--------------------------------------------------------------------------------
 1 | include $(SS_STACK)/msg.mk
 2 | 
 3 | level=../
 4 | include ../make.config
 5 | 
 6 | SYS = $(shell sys)
 7 | CXX = g++
 8 | 
 9 | CXXFLAGS :=  -Wall -g -std=c++11 
10 | 
11 | LIB_PATH=$(SS_TOOLS)/lib
12 | INC_SBMODEL_PATH=$(SS_TOOLS)/include/softbrain-config
13 | INC_SBSCHED_PATH=$(SS_TOOLS)/include/softbrain-scheduler
14 | 
15 | CXXFLAGS += -I$(INC_SBMODEL_PATH) -I$(INC_SBSCHED_PATH) -Wl,-rpath,$(LIB_PATH) 
16 | 
17 | #all: reschedule stat-config
18 | 
19 | all: sb_dfg_emu
20 | #reschedul
21 | 
22 | 
23 | sb_dfg_emu : sb_dfg_emu.cpp
24 | 	$(CXX) $(CXXFLAGS) -MD -o $@ $< -L$(LIB_PATH) -lsbscheduler -lsbconfig -Wl,-rpath,${SS_TOOLS}/lib
25 | 
26 | 
27 | clean:
28 | 	rm -rf *.o  sb_dfg_emu sb_sched stat-config reschedule *.d
29 | 
30 | include ../make.rules
31 | 
32 | 


--------------------------------------------------------------------------------
/softbrain-scheduler/drivers/sb_dfg_emu.cpp:
--------------------------------------------------------------------------------
 1 | #include "model.h"
 2 | #include <assert.h>
 3 | #include <iostream>
 4 | #include <fstream>
 5 | 
 6 | #include "sbpdg.h"
 7 | #include <cstdlib>
 8 | #include <string>
 9 | 
10 | using namespace std;
11 | using namespace SB_CONFIG;
12 | 
13 | std::string basename(std::string& filename) {
14 |   size_t lastindex = filename.find_last_of("."); 
15 |   string basename = filename.substr(0, lastindex); 
16 |  
17 |   lastindex = filename.find_last_of("\\/"); 
18 |   if(lastindex != string::npos) {
19 |     basename = basename.substr(lastindex+1);
20 |   }
21 |   return basename;
22 | }
23 | 
24 | std::string basedir(std::string& filename) {
25 |   size_t lastindex = filename.find_last_of("\\/"); 
26 |   if(lastindex == string::npos) {
27 |     return std::string("./");
28 |   }
29 |   return filename.substr(0, lastindex);  
30 | }
31 | 
32 | 
33 | 
34 | int main(int argc, char* argv[])
35 | {
36 |   
37 |   if(argc<2) {
38 |     cerr <<  "Usage: sb_dfg_emu <config> <dfg file>\n";
39 |     exit(1);
40 |   }
41 | 
42 | 
43 |   std::string pdg_filename=argv[2];
44 | 
45 |   int lastindex = pdg_filename.find_last_of("."); 
46 |   string pdg_rawname = pdg_filename.substr(0, lastindex); 
47 |   string dfg_rawname = pdg_rawname;
48 |   if(dfg_rawname.find_last_of("/") < dfg_rawname.length()) {
49 |     dfg_rawname = dfg_rawname.substr(dfg_rawname.find_last_of("/")+1,dfg_rawname.length()); 
50 |   }
51 |   //sbpdg object based on the dfg
52 |   SbPDG sbpdg(pdg_filename);
53 | 
54 | 
55 |   std::string dfg_emu_header=pdg_rawname+string(".h");
56 |   std::ofstream out_file(dfg_emu_header);     
57 |   assert(out_file.good()); 
58 |   sbpdg.printEmuDFG(out_file, dfg_rawname);
59 | }
60 | 
61 | 


--------------------------------------------------------------------------------
/softbrain-scheduler/make.config:
--------------------------------------------------------------------------------
 1 | MKDIR_P = mkdir -p
 2 | 
 3 | SYS = $(shell sys)
 4 | CXX = g++
 5 | CXXFLAGS :=  -Wall -g -std=c++11 -O3
 6 | 
 7 | build ?= $(shell pwd)/${level}/build
 8 | prefix ?= $(shell pwd)
 9 | boost ?= /usr/lib64
10 | 


--------------------------------------------------------------------------------
/softbrain-scheduler/make.rules:
--------------------------------------------------------------------------------
1 | .PHONY: directories 
2 | 
3 | directories:
4 | 	${MKDIR_P} ${build}/obj
5 | 	${MKDIR_P} ${build}/lib
6 | 


--------------------------------------------------------------------------------
/softbrain-scheduler/src/Makefile:
--------------------------------------------------------------------------------
 1 | include $(SS_STACK)/msg.mk
 2 | level=../
 3 | include ../make.config
 4 | 
 5 | 
 6 | SYS = $(shell sys)
 7 | CXX = g++
 8 | 
 9 | OPT = -O3
10 | #OPT = -Og
11 | #OPT = -O0
12 | 
13 | CXXFLAGS :=  -Wall -g -std=c++11 $(OPT) -ggdb -gdwarf-3 -lm $(FLAGS)
14 | SOURCES= sbpdg.cpp  
15 | 
16 | INCLUDE_DEST=../src
17 | LIB_DEST=${build}/lib
18 | OBJ_DEST=${build}/obj
19 | 
20 | 
21 | PRE_OBJECTS=$(SOURCES:.cpp=.o)
22 | OBJECTS = $(patsubst %,$(OBJ_DEST)/%,$(PRE_OBJECTS))
23 | 
24 | BOOST_PATH=${boost}
25 | INC_SBMODEL_PATH=${SS_TOOLS}/include/softbrain-config/
26 | 
27 | all: $(LIB_DEST)/libsbscheduler.a $(LIB_DEST)/libsbscheduler.so
28 | 
29 | CXXFLAGS += -I$(INC_SBMODEL_PATH)
30 | 
31 | CXXFLAGS += -I$(INCLUDE_DEST) -L$(BOOST_PATH) -fPIC -lboost_regex
32 | 
33 | $(LIB_DEST)/libsbscheduler.a: $(OBJECTS)
34 | 	ar crs $@ $^ 
35 |     
36 | $(LIB_DEST)/libsbscheduler.so: $(OBJECTS)
37 | 	$(CXX) $(CXXFLAGS) -MD -shared -o $@ $^     
38 | 
39 | 
40 |     
41 | $(OBJ_DEST)/%.o: %.cpp $(INCLUDE_DEST)/%.h
42 | 	$(CXX) $(CXXFLAGS) -MD -c -o $@ $<
43 | 
44 | 
45 | 
46 | .phony: clean
47 | 
48 | clean:
49 | 	-rm -Rf $(LIB_DEST)/*.so $(LIB_DEST)/*.a *.o $(OBJ_DEST)/*.o  *.d  $(OBJ_DEST)/*.d  $(GAMS_INC)
50 | 
51 | include ../make.rules
52 | 
53 | 


--------------------------------------------------------------------------------
/softbrain-scheduler/src/sbpdg.h:
--------------------------------------------------------------------------------
  1 | #ifndef __SBPDG_H__
  2 | #define __SBPDG_H__
  3 | 
  4 | #include <iostream>
  5 | #include <sstream>
  6 | #include <fstream>
  7 | #include <vector>
  8 | #include "sbinst.h"
  9 | #include <unordered_map>
 10 | #include <map>
 11 | #include <vector>
 12 | #include <assert.h>
 13 | #include <sstream>
 14 | #include <algorithm>
 15 | 
 16 | #include "model.h"
 17 | 
 18 | class SbPDG_Node;
 19 | 
 20 | class SbPDG_Edge {
 21 |   public:
 22 |     enum EdgeType { data, ctrl_true, ctrl_false };
 23 |     
 24 |     EdgeType etype() {return _etype;}
 25 |     
 26 |     SbPDG_Edge(SbPDG_Node* def, SbPDG_Node* use, EdgeType etype) {
 27 |        _def=def;
 28 |        _use=use;
 29 |        _etype=etype;
 30 |        _ID=ID_SOURCE++;
 31 |     }
 32 | 
 33 |     SbPDG_Node* def() const {return _def;}
 34 |     SbPDG_Node* use() const {return _use;}
 35 |     
 36 |     std::string gamsName();
 37 |     std::string name();
 38 | 
 39 |     void set_delay(int d) {_delay=d;}
 40 |     int delay() {return _delay;}
 41 |   private:
 42 |     int _ID;
 43 |     SbPDG_Node *_def, *_use;
 44 |     EdgeType _etype;
 45 | 
 46 |     int _delay =0;
 47 | 
 48 |     
 49 | 
 50 |   private:
 51 |     static int ID_SOURCE;
 52 | };
 53 | 
 54 | //PDG Node -- abstract base class
 55 | class SbPDG_Node {
 56 |  public:
 57 |     virtual void printGraphviz(std::ostream& os);
 58 |     virtual void printEmuDFG(std::ostream& os, std::string dfg_name);
 59 |     void setScalar() {_scalar = true;};
 60 |     bool getScalar() {return _scalar;};
 61 |     int findDepth(std::ostream& os, std::string dfg_name, int level);
 62 |     SbPDG_Node() {
 63 |         _ID=ID_SOURCE++;
 64 |     }
 65 |     
 66 |     typedef std::vector<SbPDG_Edge*>::const_iterator const_edge_iterator;
 67 |      
 68 |     void addIncEdge(unsigned pos, SbPDG_Edge *edge) { 
 69 |       assert(pos <=4);
 70 |       if(_ops.size()<=pos) { 
 71 |          _ops.resize(pos+1,NULL); 
 72 |        }
 73 | 
 74 |        if(_ops[pos]) {
 75 |          std::cerr << "ERROR: overwriting op at pos" << pos 
 76 |                    << " name:" << _ops[pos]->def()->name() << "\n";
 77 |          assert(0);
 78 |        }
 79 |        _ops[pos]=edge;
 80 |     }
 81 |     
 82 |     void addOutEdge(unsigned pos, SbPDG_Edge *edge) {
 83 |       assert(pos <= 64 && "more than 64 users, check this! (may be okay if really large grid\n"); 
 84 |        if(_uses.size()<=pos) { 
 85 |          _uses.resize(pos+1,NULL); 
 86 |        }
 87 | 
 88 |        if(_uses[pos]) {
 89 |          std::cerr << "ERROR: overwriting use at pos" << pos 
 90 |                    << " name: " << _uses[pos]->use()->name() << "\n";
 91 |          assert(0);
 92 |        }
 93 |        
 94 |        _uses[pos]=edge;
 95 |     }
 96 |     
 97 |     SbPDG_Edge* getLinkTowards(SbPDG_Node* to) {
 98 |        for(unsigned i = 0; i < _uses.size(); ++ i) {
 99 |          if(_uses[i] && _uses[i]->use()==to) {
100 |            return _uses[i];
101 |          }
102 |        }
103 |        return NULL;
104 |     }
105 |     
106 |     int num_inc() const { return  _ops.size();  }
107 |     int num_out() const { return  _uses.size(); }
108 |     
109 |     virtual std::string name() = 0;     //pure func
110 |     void setName(std::string& name) {_name = name;}
111 |     virtual std::string gamsName() = 0;
112 |     
113 |     const_edge_iterator ops_begin() const {return _ops.begin();}
114 |     const_edge_iterator ops_end() const {return _ops.end();}
115 |     const_edge_iterator uses_begin() const {return _uses.begin();}
116 |     const_edge_iterator uses_end() const {return _uses.end();}
117 |     
118 |     int id() {return _ID;}
119 |     
120 |     void     set_value(uint64_t v) {_val=v;}
121 |     uint64_t get_value()           {return _val;}
122 |     bool input = false;
123 |     bool output = false;
124 |     int _iter;
125 | 
126 |   protected:    
127 |     uint64_t _val;
128 |     int _ID;
129 |     std::string _name;
130 |     std::vector<SbPDG_Edge *> _ops;     //in edges 
131 |     std::vector<SbPDG_Edge *> _uses;   //out edges  
132 |     bool _scalar = false;
133 | 
134 |   private:
135 |     static int ID_SOURCE;
136 | };
137 | 
138 | 
139 | class SbPDG_IO : public SbPDG_Node {
140 |   public:
141 |   void setVPort(int vport) { _vport = vport; } 
142 |   int vport() {return _vport;}
143 | 
144 |   protected:
145 |     int _vport;
146 | };
147 | 
148 | //Instruction
149 | class SbPDG_Inst : public SbPDG_Node {
150 |   public:
151 |     SbPDG_Inst() : SbPDG_Node(), _predInv(false), _imm_slot(-1), _subFunc(0),_accum(0){
152 |     }
153 | 
154 |     void printGraphviz(std::ostream& os);
155 |     void printEmuDFG(std::ostream& os, std::string dfg_name);
156 | 
157 |     void setImm( uint64_t val ) { _imm=val; }
158 | //    void setImm( float val ) { _imm=*reinterpret_cast<int32_t*>(&val); }
159 | 
160 | //    float getImmFloat() { return *reinterpret_cast<float*>(&_imm); } 
161 |     int   getImmInt() { return _imm; }
162 | 
163 |     uint64_t   imm() { return _imm; }
164 | 
165 |     void setPredInv(bool predInv) { _predInv=predInv;}
166 |     bool predInv() {return _predInv;}
167 | 
168 |     void setInst(SB_CONFIG::sb_inst_t sbinst) { _sbinst=sbinst; }
169 |     SB_CONFIG::sb_inst_t inst() { return _sbinst; }
170 | 
171 |     std::string name() {
172 |         std::stringstream ss;
173 |         ss << _name << ":";
174 |         ss << SB_CONFIG::name_of_inst(_sbinst);
175 |         if(_imm_slot!=-1) {
176 |           ss<<" Imm:"<<_imm;
177 |         }
178 |         return ss.str();
179 |     }
180 | 
181 |     std::string gamsName();
182 | 
183 |     void setImmSlot(int i);
184 |     int immSlot() const { return _imm_slot; }
185 | 
186 |     void setSubFunc(int i) {_subFunc=i;}
187 |     int subFunc() const {return _subFunc;}
188 | 
189 |     void compute(bool print, bool verif); 
190 | 
191 |     void set_verif_id(std::string s) {_verif_id = s;}
192 | 
193 |   private:
194 |     std::ofstream _verif_stream;
195 |     std::string _verif_id;
196 |     std::vector<uint64_t> _input_vals;
197 |     bool _predInv;
198 |     int _imm_slot;
199 |     int _subFunc;
200 |     uint64_t _accum;
201 |     uint64_t _imm;
202 |     SB_CONFIG::sb_inst_t _sbinst;
203 | };
204 | 
205 | class SbPDG_Input : public SbPDG_IO {       //inturn inherits sbnode
206 |   public:
207 |     void printGraphviz(std::ostream& os);
208 |     void printEmuDFG(std::ostream& os, std::string dfg_name, std::string* realName, int* iter, std::vector<int>* input_sizes);
209 |     
210 |     std::string name() {
211 |         std::stringstream ss;
212 |         ss << _name << ":";
213 |         ss << "I" << _vport;
214 |         ss << _name;
215 |         return ss.str();
216 |     }
217 |     std::string gamsName();
218 |     
219 |     std::string _realName;
220 |     int _subIter;
221 |     int _size;
222 | };
223 | 
224 | class SbPDG_Output : public SbPDG_IO {
225 |   public:
226 |     void printGraphviz(std::ostream& os);
227 |     void printDirectAssignments(std::ostream& os, std::string dfg_name);
228 |     void printEmuDFG(std::ostream& os, std::string dfg_name, std::string* realName, int* iter, std::vector<int>* output_sizes);
229 | 
230 |     std::string name() {
231 |         std::stringstream ss;
232 |         ss << _name << ":";
233 |         ss << "O" << _vport;
234 |         ss << _name;
235 |         return ss.str();
236 |     }
237 |     std::string gamsName();
238 | 
239 |     //returns the instruction producing the
240 |     //value to this output node
241 |     //Returns NULL if the producing instruction is an input!
242 |     SbPDG_Inst* out_inst() {
243 |       return dynamic_cast<SbPDG_Inst*>(_ops[0]->def());
244 |     }
245 | 
246 |     //retrieve the value of the def
247 |     uint64_t retrieve() {
248 |       assert(_ops.size()==1);
249 |       return _ops[0]->def()->get_value();
250 |     }
251 | 
252 |     std::string _realName;
253 |     int _subIter;
254 |     int _size;
255 | };
256 | 
257 | //vector class
258 | class SbPDG_Vec {
259 |   public:
260 |   SbPDG_Vec(std::string name, int id) : _name(name), _ID(id) {
261 |     _locMap.resize(1); //set up default loc map
262 |     _locMap[0].push_back(0);
263 |   }
264 | 
265 |   void setLocMap(std::vector<std::vector<int> >& vec) { _locMap=vec;}
266 |   std::vector<std::vector<int> >& locMap() {return _locMap;}
267 | 
268 |   int id() {return _ID;}
269 |   
270 |   virtual std::string gamsName() = 0;
271 |   virtual std::string name() {return _name;}
272 | 
273 |   protected:
274 |     std::string _name;
275 |     std::vector<std::vector<int>> _locMap;
276 |     int _ID;
277 | };
278 | 
279 | class SbPDG_VecInput : public SbPDG_Vec {
280 |   public:
281 | 
282 |   SbPDG_VecInput(std::string name, int id) : SbPDG_Vec(name,id) {}
283 | 
284 |   virtual std::string gamsName() {
285 |     std::stringstream ss;
286 |     ss << "IPV_" << _name ;
287 |     return ss.str();
288 |   }
289 | 
290 |   void addInput(SbPDG_Input* in) { _inputs.push_back(in); }
291 |   std::vector<SbPDG_Input*>::iterator input_begin() {return _inputs.begin();}
292 |   std::vector<SbPDG_Input*>::iterator input_end() {return _inputs.end();}
293 |   unsigned num_inputs() const {return _inputs.size();}
294 | 
295 |   SbPDG_Input* getInput(int i) {return _inputs[i];}
296 | 
297 | 	/*bool operator < (const SbPDG_VecInput& s) const
298 |   {
299 |      return (this->num_inputs() > s.num_inputs());
300 |   }*/
301 | 
302 |   private:
303 |     std::vector<SbPDG_Input*> _inputs;
304 | };
305 | 
306 | 
307 | class SbPDG_VecOutput : public SbPDG_Vec {
308 |   public:
309 | 
310 |   SbPDG_VecOutput(std::string name, int id) : SbPDG_Vec(name,id) {}
311 | 
312 |   virtual std::string gamsName() {
313 |     std::stringstream ss;
314 |     ss << "OPV_" << _name ;
315 |     return ss.str();
316 |   }
317 | 
318 |   void addOutput(SbPDG_Output* out) { _outputs.push_back(out); }
319 |   std::vector<SbPDG_Output*>::iterator output_begin() {return _outputs.begin();}
320 |   std::vector<SbPDG_Output*>::iterator output_end() {return _outputs.end();}
321 |   unsigned num_outputs() const {return _outputs.size();}
322 | 
323 |   SbPDG_Output* getOutput(int i) {return _outputs[i];}
324 | 
325 | 	/*bool operator < (const SbPDG_VecOutput& s) const
326 |   {
327 |      return (this->num_outputs() > s.num_outputs());
328 |   }*/
329 | 
330 |   private:
331 |     std::vector<SbPDG_Output*> _outputs;
332 | };
333 | 
334 | 
335 | class SbPDG {
336 |   public:
337 |     SbPDG();
338 |     SbPDG(std::string filename);
339 | 
340 |     ~SbPDG(){
341 |     }
342 | 
343 |     void printGraphviz(std::ostream& os);
344 |     void printEmuDFG(std::ostream& os, std::string dfg_name);
345 |     void printGraphviz(const char *fname) {
346 |       std::ofstream os(fname);
347 |       assert(os.good());
348 |       printGraphviz(os);
349 |     }
350 |     
351 |     void printGams(std::ostream& os, std::unordered_map<std::string,SbPDG_Node*>&,
352 |                                      std::unordered_map<std::string,SbPDG_Edge*>&,
353 |                                      std::unordered_map<std::string, SbPDG_Vec*>&);
354 | 
355 |     void printPortCompatibilityWith(std::ostream& os, SB_CONFIG::SbModel* sbModel);
356 | 
357 | 
358 | 
359 |     SbPDG_Inst* CreateInst() {
360 |       return new SbPDG_Inst();
361 |     }
362 | 
363 |     void addInst(SbPDG_Inst* inst) {
364 |         _insts.push_back(inst); 
365 |         _nodes.push_back(inst);}
366 | 
367 |     //Just for adding single input without keeping track of name/sym-table
368 |     void addInput(SbPDG_Input* input) {
369 |         _inputs.push_back(input); 
370 |         _nodes.push_back(input);
371 |     }
372 | 
373 |     void addOutput(SbPDG_Output* output) {
374 |         _outputs.push_back(output); 
375 |         _nodes.push_back(output);
376 |     }
377 | 
378 |     void addScalarInput(std::string name, std::map<std::string, SbPDG_Node*>& syms) {
379 |       SbPDG_VecInput* vec_input = new SbPDG_VecInput(name, _vecInputs.size()); 
380 |       insert_vec_in(vec_input);
381 |  
382 |       SbPDG_Input* pdg_in = new SbPDG_Input();  //new input nodes
383 |       syms[name]=pdg_in;
384 |       pdg_in->setName(name);
385 |       pdg_in->setVPort(_vecInputs.size());
386 |       pdg_in->setScalar();
387 |       addInput(pdg_in);
388 |       vec_input->addInput(pdg_in);
389 |     } 
390 | 
391 |     //scalar output node
392 |     void addScalarOutput(std::string name, std::map<std::string,SbPDG_Node*>& syms) {
393 | 
394 |       SbPDG_Node* out_node = syms[name];
395 |       if(out_node==NULL) {
396 |         std::cerr << "Could not find" + name + "\n";
397 |         assert("0");
398 |       }
399 | 
400 |       //new vector output
401 |       SbPDG_VecOutput* vec_output = new SbPDG_VecOutput(name,_vecOutputs.size()); 
402 |       insert_vec_out(vec_output);
403 |  
404 |       SbPDG_Output* pdg_out = new SbPDG_Output();
405 |       std::string out_name=name+"_out";
406 |       syms[out_name]=pdg_out;
407 |       pdg_out->setName(out_name);
408 |       pdg_out->setVPort(_vecOutputs.size());
409 |       pdg_out->setScalar();
410 |       addOutput(pdg_out);
411 |       vec_output->addOutput(pdg_out);       //its own vector of out nodes
412 | 
413 |       connect(out_node, pdg_out,0,SbPDG_Edge::data);
414 |     } 
415 | 
416 | 
417 |     //Need to confirm the functionality here
418 |     void addVecOutput(std::string name,
419 |                      std::vector<std::vector<int> >& pm,
420 |                      std::map<std::string,SbPDG_Node*>& syms ) {
421 |         
422 |       SbPDG_VecOutput* vec_output = new SbPDG_VecOutput(name,_vecOutputs.size()); 
423 |       vec_output->setLocMap(pm);
424 |       insert_vec_out(vec_output); 
425 |        
426 |       int entries = pm.size();
427 |       //std::cout << "entries: " << entries << "\n";
428 | 
429 |       for(int i = 0; i < entries; ++i) {
430 |         std::stringstream ss;
431 |         ss << name << i;
432 |         //std::cout << "name: " << name << "\n";
433 |         std::string dep_name = ss.str();
434 | 
435 |         SbPDG_Node* out_node = syms[dep_name];
436 |         if(out_node==NULL) {
437 |           std::cerr << "Could not find \"" + dep_name + "\"\n";
438 |           assert(0);
439 |         }
440 | 
441 |         SbPDG_Output* pdg_out = new SbPDG_Output();
442 |         std::string out_name = dep_name + "_out";
443 |         syms[out_name]=pdg_out;
444 |         pdg_out->setName(out_name);
445 |         pdg_out->setVPort(_vecOutputs.size());
446 |         addOutput(pdg_out);
447 |         vec_output->addOutput(pdg_out);
448 | 
449 |         connect(out_node, pdg_out,0,SbPDG_Edge::data);
450 |       } 
451 | 
452 | 
453 |       //assert(0 && "addVecOutput not implemented");
454 |     }
455 | 
456 |     void addVecInput(std::string name,
457 |                      std::vector<std::vector<int> >& pm,
458 |                      std::map<std::string,SbPDG_Node*>& syms ) {
459 | 
460 |       SbPDG_VecInput* vec_input = new SbPDG_VecInput(name,_vecInputs.size()); 
461 |       vec_input->setLocMap(pm);
462 |       insert_vec_in(vec_input);
463 | 
464 |       //number of vector entries -- each vector element is a input node
465 |       int entries = pm.size();
466 |       //std::cout << "entries: " << entries << "\n";
467 | 
468 |       for(int i = 0; i < entries; ++i) {
469 |         std::stringstream ss;
470 |         ss << name << i;                //Vector input names: A0, A1
471 |         //std::cout << "name: " << name << "\n";
472 |         SbPDG_Input* pdg_in = new SbPDG_Input();
473 |         std::string name = ss.str();
474 |         syms[name]=pdg_in;
475 |         pdg_in->setName(name);
476 |         pdg_in->setVPort(_vecInputs.size());
477 |         addInput(pdg_in);
478 |         vec_input->addInput(pdg_in);
479 |       }
480 |     }
481 | 
482 |     void parse_and_add_vec(std::string name, std::string line,
483 |                            std::map<std::string,SbPDG_Node*>& syms ,bool input);
484 | 
485 |     SbPDG_Edge* connect(SbPDG_Node* orig, SbPDG_Node* dest,int slot,SbPDG_Edge::EdgeType etype);
486 | 
487 |     void parse_and_add_inst(std::string var_out, std::string opcode, 
488 |                             std::map<std::string,SbPDG_Node*>& syms,
489 |                             std::vector<std::string> inc_nodes);
490 |     
491 |     typedef std::vector<SbPDG_Node*>::const_iterator   const_node_iterator;
492 |     typedef std::vector<SbPDG_Inst*>::const_iterator   const_inst_iterator;
493 |     typedef std::vector<SbPDG_Input*>::const_iterator  const_input_iterator;
494 |     typedef std::vector<SbPDG_Output*>::const_iterator const_output_iterator;
495 |     typedef std::vector<SbPDG_Edge*>::const_iterator   const_edge_iterator;
496 |     
497 |     const_inst_iterator inst_begin() {return _insts.begin();}
498 |     const_inst_iterator inst_end() {return _insts.end();}
499 |     int num_insts() {return _insts.size();}
500 |     
501 |     const_input_iterator input_begin() {return _inputs.begin();}
502 |     const_input_iterator input_end() {return _inputs.end();}
503 |     
504 |     const_output_iterator output_begin() {return _outputs.begin();}
505 |     const_output_iterator output_end() {return _outputs.end();}
506 | 
507 |     int num_nodes() {return _nodes.size();}
508 | 
509 |     int num_vec_input() {return _vecInputs.size();}
510 |     int num_vec_output() {return _vecOutputs.size();}
511 | 
512 |     void insert_vec_in(SbPDG_VecInput*    in) {_vecInputs.push_back(in);}
513 |     void insert_vec_out(SbPDG_VecOutput* out) {_vecOutputs.push_back(out);}
514 | 
515 |     SbPDG_VecInput*  vec_in(int i) {return _vecInputs[i];}
516 |     SbPDG_VecOutput* vec_out(int i) {return _vecOutputs[i];}
517 | 		void sort_vec_in() {
518 | 			sort(_vecInputs.begin(), _vecInputs.end(),[](SbPDG_VecInput*& left, SbPDG_VecInput*& right){
519 | 				return left->num_inputs() > right->num_inputs();
520 | 			});
521 | 		}
522 | 
523 | 		void sort_vec_out() {
524 | 			sort(_vecOutputs.begin(), _vecOutputs.end(),[](SbPDG_VecOutput*& left, SbPDG_VecOutput*& right){	
525 | 				return left->num_outputs() > right->num_outputs();
526 | 			});
527 | 		}
528 |     void compute(bool print, bool verif);
529 | 
530 |   private:
531 |     std::vector<SbPDG_Node*> _nodes;
532 |     
533 |     //redundant storage:
534 |     std::vector<SbPDG_Inst*> _insts;
535 |     std::vector<SbPDG_Input*> _inputs;
536 |     std::vector<SbPDG_Output*> _outputs;
537 | 
538 |     std::vector<SbPDG_Inst*> _orderedInsts;
539 | 
540 | 
541 |     std::vector<SbPDG_VecInput*> _vecInputs;
542 |     std::vector<SbPDG_VecOutput*> _vecOutputs;
543 | 
544 |     std::vector<SbPDG_Edge*> _edges;
545 |     
546 |     int span;
547 |     int work;
548 | };
549 | 
550 | #endif
551 | 


--------------------------------------------------------------------------------
/workloads/diannao/Makefile:
--------------------------------------------------------------------------------
  1 | ifndef SS_TOOLS
  2 | $(error SS_TOOLS is undefined)
  3 | endif
  4 | 
  5 | CPP=g++
  6 | 
  7 | OPT?=-O3
  8 | CFLAGS=$(OPT) --std=c++11 -g -ggdb -gdwarf-3 
  9 | MODULE          := conv1p conv2p conv3p conv4p pool1p pool3p pool5p class1p class3p conv5 conv5p  conv1sb conv2sb conv3sb conv4sb  class1sb class3sb 
 10 | #SRC             := $(MODULE:=.cpp)
 11 | #OBJ             := $(MODULE:=.o)
 12 | 
 13 | .PHONY: all clean
 14 | 
 15 | INCLUDES = -I$(SS_TOOLS)/include/softbrain-lib
 16 | LIBS = $(INCLUDES) -L$(SS_TOOLS)/lib -lsoftbrain-emu
 17 | 
 18 | 
 19 | all: $(MODULE) 
 20 | 
 21 | HEADERS=dnn.hpp
 22 | 
 23 | 
 24 | CONV_DFGS=red32to1sig.dfg red16to1sig.dfg red8to1sig.dfg
 25 | CONV_DFG_HEADERS=$(CONV_DFGS:.dfg=.h)
 26 | 
 27 | CLASS_DFGS=red32to1sig.dfg
 28 | CLASS_DFG_HEADERS=$(CLASS_DFGS:.dfg=.h)
 29 | 
 30 | #pool2x2avg.dfg  test.dfg pool_simple.dfg
 31 | POOL_DFGS=pool4x4l2avg.dfg pool2x2l4avg.dfg
 32 | POOL_DFG_HEADERS=$(POOL_DFGS:.dfg=.h)
 33 | 
 34 | DFG_HEADERS=$(sort $(CLASS_DFG_HEADERS) $(POOL_DFG_HEADERS) $(CONV_DFG_HEADERS))
 35 | 
 36 | SB_CONFIG=$(SS_TOOLS)/configs/diannao_simd64.sbmodel
 37 | #SB_CONFIG=$(SS_TOOLS)/../softbrain-compiler/softbrain-config/configs/diannao_simd64_half.sbmodel
 38 | 
 39 | 
 40 | $(DFG_HEADERS): %.h: %.dfg
 41 | 	$(SS_TOOLS)/bin/sb_dfg_emu $(SB_CONFIG) $< 
 42 | 	#$(SS_TOOLS)/bin/sb_sched $(SB_CONFIG) $< 
 43 | 
 44 | #conv1  Nx=500, Ny=375, Kx=9,  Ky=9,  Ni=32,  No=48,  priv=False
 45 | #conv2  Nx=200, Ny=200, Kx=18, Ky=18, Ni=8,   No=8,   priv=True 
 46 | #conv3  Nx=32,  Ny=32,  Kx=4,  Ky=4,  Ni=108, No=200, priv=False
 47 | #conv4  Nx=32,  Ny=32,  Kx=7,  Ky=7,  Ni=16,  No=512, priv=False
 48 | #conv5  Nx=256, Ny=256, Kx=11, Ky=11, Ni=256, No=384, priv=True
 49 | #
 50 | #pool1  Nx=492, Ny=367, Kx=2,  Ky=2,  Ni=12,                                
 51 | #pool3  Nx=32,  Ny=32,  Kx=4,  Ky=4,  Ni=100,                               
 52 | #pool5  Nx=256, Ny=256, Kx=2,  Ky=2,  Ni=256,                               
 53 | #
 54 | #class1                               Ni=200, No=100,                       
 55 | #class3                               Ni=960, No=20,                        
 56 | 
 57 | #padded versions
 58 | 
 59 | TF=-DTn=16 -DTi=16 -DTii=32 -DTnn=32 -DTx=16 -DTy=16
 60 | 
 61 | #Tii not in convolution
 62 | conv1p: convolution.cpp $(HEADERS)
 63 | 	$(CPP) $^ $(LIBS) $(CFLAGS) $(TF)  -o $@ -DNx=10  -DNy=25  -DKx=9  -DKy=9  -DNi=32  -DNn=64  -DSHARED=1      -DTnn=64 -DTn=64 -DTx=10 -DTy=25 -DTi=32 
 64 | 	#$(CPP) $^ $(LIB) $(CFLAGS) $(TF)  -o $@ -DNx=500 -DNy=375 -DKx=9  -DKy=9  -DNi=32  -DNn=64  -DSHARED=1      -DTnn=64 -DTn=32 -DTx=10 -DTy=25 -DTi=32
 65 | 
 66 | conv2p: convolution.cpp $(HEADERS)
 67 | 	$(CPP) $^ $(LIBS) $(CFLAGS) $(TF)  -o $@ -DNx=16 -DNy=16 -DKx=18 -DKy=18 -DNi=8   -DNn=8  -DSHARED=0      -DTii=8 -DTi=8 -DTnn=8 -DTn=8 -DTx=4 -DTy=16 
 68 | 	#$(CPP) $^ $(LIB) $(CFLAGS) $(TF)  -o $@ -DNx=200 -DNy=200 -DKx=18 -DKy=18 -DNi=16   -DNn=16  -DSHARED=0      -DTii=16 -DTi=16 -DTnn=16 -DTn=16 -DTx=4 -DTy=20
 69 | 
 70 | conv3p: convolution.cpp $(HEADERS)
 71 | 	$(CPP) $^ $(LIB) $(CFLAGS) $(TF)  -o $@ -DNx=32  -DNy=32  -DKx=4  -DKy=4  -DNi=128 -DNn=224 -DSHARED=1      -DTi=64 -DTx=4
 72 | 
 73 | conv4p: convolution.cpp $(HEADERS)
 74 | 	$(CPP) $^ $(LIB) $(CFLAGS) $(TF)  -o $@ -DNx=32  -DNy=32  -DKx=7  -DKy=7  -DNi=16  -DNn=512 -DSHARED=1      -DTnn=64 -DTn=64 -DTx=8
 75 | 
 76 | conv5p: convolution.cpp $(HEADERS)
 77 | 	#$(CPP) $^ $(LIB) $(CFLAGS) $(TF)  -o $@ -DNx=8 -DNy=8 -DKx=11 -DKy=11 -DNi=256 -DNn=384 -DSHARED=0      -DTnn=64 -DTi=64 -DTx=8 -DTy=8
 78 | 	$(CPP) $^ $(LIBS) $(CFLAGS) $(TF)  -o $@ -DNx=2 -DNy=2 -DKx=11 -DKy=11 -DNi=256 -DNn=64 -DSHARED=0      -DTnn=64 -DTi=32 -DTx=2 -DTy=2 -DTn=64  -DTii=32 -DTnn=64  -DSB
 79 | #	$(CPP) $^ $(LIB) $(CFLAGS) $(TF)  -o $@ -DNx=256 -DNy=256 -DKx=11 -DKy=11 -DNi=256 -DNn=384 -DSHARED=0      -DTnn=64 -DTi=64 -DTx=8 -DTy=8
 80 | 
 81 | pool1p: pooling.cpp $(HEADERS)
 82 | 	$(CPP) $^ $(LIB) $(CFLAGS) $(TF)  -o $@ -DNx=492 -DNy=368 -DKx=2  -DKy=2  -DNi=16     -DTii=16 -DTx=2 -DTy=2
 83 |                                                                                                  
 84 | pool3p: pooling.cpp  $(HEADERS)                                                                             
 85 | 	$(CPP) $^ $(LIB) $(CFLAGS) $(TF)  -o $@ -DNx=32  -DNy=32  -DKx=4  -DKy=4  -DNi=128    -DTii=64 -DTi=32 -DTx=2 -DTy=2
 86 |                                                                                                  
 87 | pool5p: pooling.cpp  $(HEADERS)                                                                             
 88 | 	$(CPP) $^ $(LIB) $(CFLAGS) $(TF)  -o $@ -DNx=256 -DNy=256 -DKx=2  -DKy=2  -DNi=256    -DTii=256 -DTi=32 -DTx=4 -DTy=4
 89 | 
 90 | class1p: classifier.cpp $(HEADERS)
 91 | 	$(CPP) $^ $(LIB) $(CFLAGS) $(TF)  -o $@ -DNi=960 -DNn=32     -DTii=192 -DTi=192 -DTx=2 -DTy=4
 92 | 
 93 | 
 94 | class3p: classifier.cpp $(HEADERS)
 95 | 	$(CPP) $^ $(LIB) $(CFLAGS) $(TF)  -o $@ -DNi=224 -DNn=128    -DTii=32 -DTi=32 -DTx=2 -DTy=4 
 96 | 
 97 | 
 98 | # ---------------------------------------------------------------------------------------------------------
 99 | 
100 | conv1sb: convolution.cpp $(CONV_DFG_HEADERS) $(HEADERS)
101 | 	$(CPP) $^ $(LIBS) $(CFLAGS) $(TF)  -o $@ -DNx=10  -DNy=25  -DKx=9  -DKy=9  -DNi=32  -DNn=64  -DSHARED=1      -DTnn=64 -DTn=64 -DTx=10 -DTy=25 -DTi=32 -DSB
102 | #	$(CPP) $^ $(LIBS) $(CFLAGS) $(TF)  -o $@ -DNx=500 -DNy=375 -DKx=9  -DKy=9  -DNi=32  -DNn=64  -DSHARED=1      -DTnn=64 -DTn=32 -DTx=10 -DTy=25 -DTi=32
103 | 
104 | 
105 | conv2sb: convolution.cpp $(CONV_DFG_HEADERS) $(HEADERS)
106 | 	$(CPP) $^ $(LIBS) $(CFLAGS) $(TF)  -o $@ -DNx=16 -DNy=16 -DKx=18 -DKy=18 -DNi=8   -DNn=8  -DSHARED=0      -DTii=8 -DTi=8 -DTnn=8 -DTn=8 -DTx=4 -DTy=16 -DSB
107 | #	$(CPP) $^ $(LIBS) $(CFLAGS) $(TF)  -o $@ -DNx=200 -DNy=200 -DKx=18 -DKy=18 -DNi=16   -DNn=16  -DSHARED=0      -DTii=16 -DTi=16 -DTnn=16 -DTn=16 -DTx=4 -DTy=20
108 | 
109 | 
110 | conv3sb: convolution.cpp $(CONV_DFG_HEADERS) $(HEADERS)
111 | 	$(CPP) $^ $(LIBS) $(CFLAGS) $(TF)  -o $@ -DNx=4  -DNy=4  -DKx=4  -DKy=4  -DNi=128  -DNn=64  -DSHARED=1      -DTi=32 -DTx=4 -DTy=4 -DTn=64  -DTii=32 -DTnn=64   -DSB    #Nx=32, Ny=32, Nn=224
112 | #	$(CPP) $^ $(LIBS) $(CFLAGS) $(TF)  -o $@ -DNx=32 -DNy=32 -DKx=4  -DKy=4  -DNi=128 -DNn=224 -DSHARED=1      -DTi=32 -DTx=4 -DTy=4     -DSB
113 | 
114 | 
115 | conv0sb: convolution.cpp $(CONV_DFG_HEADERS) $(HEADERS)
116 | 	$(CPP) $^ $(LIBS) $(CFLAGS) $(TF)  -o $@ -DNx=2  -DNy=2  -DKx=2  -DKy=2  -DNi=32  -DNn=32  -DSHARED=1      -DTi=32 -DTx=1 -DTy=1 -DTn=32  -DTii=32 -DTnn=32   -DSB 
117 | 
118 | 
119 | conv4sb: convolution.cpp $(CONV_DFG_HEADERS) $(HEADERS)
120 | 	$(CPP) $^ $(LIBS) $(CFLAGS) $(TF)  -o $@ -DNx=8  -DNy=8  -DKx=7  -DKy=7  -DNi=16  -DNn=512 -DSHARED=1      -DTnn=64 -DTn=64 -DTx=8  -DTy=8 -DTii=16   -DSB
121 | #	$(CPP) $^ $(LIBS) $(CFLAGS) $(TF)  -o $@ -DNx=32  -DNy=32  -DKx=7  -DKy=7  -DNi=16  -DNn=512 -DSHARED=1      -DTnn=64 -DTn=64 -DTx=8 -DSB
122 | 
123 | 
124 | conv5sb: convolution.cpp $(CONV_DFG_HEADERS) $(HEADERS)
125 | 	$(CPP) $^ $(LIBS) $(CFLAGS) $(TF)  -o $@ -DNx=2 -DNy=2 -DKx=11 -DKy=11 -DNi=256 -DNn=64 -DSHARED=0      -DTnn=64 -DTi=32 -DTx=2 -DTy=2 -DTn=64  -DTii=32 -DTnn=64  -DSB
126 | #	$(CPP) $^ $(LIBS) $(CFLAGS) $(TF)  -o $@ -DNx=8 -DNy=8 -DKx=11 -DKy=11 -DNi=256 -DNn=384 -DSHARED=0      -DTnn=64 -DTi=64 -DTx=8 -DTy=8  -DSB
127 | 
128 | 
129 | 
130 | 
131 | pool1sb: pooling.cpp  $(POOL_DFG_HEADERS) $(HEADERS)
132 | 	$(CPP) $^ $(LIBS) $(CFLAGS) $(TF)  -o $@ -DNx=492 -DNy=368 -DKx=2  -DKy=2  -DNi=16     -DTii=16 -DTx=2 -DTy=2 -DSB
133 | 	#$(CPP) $^ $(LIBS) $(CFLAGS) $(TF)  -o $@ -DNx=512 -DNy=368 -DKx=2  -DKy=2  -DNi=16     -DTii=16 -DTx=16 -DTy=16 -DSB 
134 | 
135 | pool3sb: pooling.cpp  $(POOL_DFG_HEADERS) $(HEADERS) 
136 | 	$(CPP) $^ $(LIBS) $(CFLAGS) $(TF)  -o $@ -DNx=32  -DNy=32  -DKx=4  -DKy=4  -DNi=128    -DTii=64 -DTi=32 -DTx=16 -DTy=4 -DSB
137 | 
138 | pool5sb: pooling.cpp  $(HEADERS) $(POOL_DFG_HEADERS) 
139 | 	$(CPP) $^ $(LIBS) $(CFLAGS) $(TF)  -o $@ -DNx=256 -DNy=256 -DKx=2  -DKy=2  -DNi=256    -DTii=256 -DTi=32 -DTx=16 -DTy=8 -DSB 
140 | 
141 | class1sb: classifier.cpp $(CLASS_DFG_HEADERS) $(HEADERS)
142 | 	$(CPP) $^ $(LIBS) $(CFLAGS) $(TF)  -o $@ -DNi=960 -DNn=20     -DTii=32 -DTi=32 -DTx=2 -DTy=4 -DSB
143 | 
144 | class3sb: classifier.cpp $(CLASS_DFG_HEADERS) $(HEADERS)
145 | 	$(CPP) $^ $(LIBS) $(CFLAGS) $(TF)  -o $@ -DNi=224 -DNn=128    -DTii=32 -DTi=32 -DTx=2 -DTy=4 -DSB
146 | 
147 | 
148 | # --------------------------------------------------------------------------------------------------------------------------------
149 | 
150 | conv1: convolution.cpp $(HEADERS)
151 | 	$(CPP) $^ $(LIB) $(CFLAGS)  -o $@ -DNx=500 -DNy=375 -DKx=9  -DKy=9  -DNi=32  -DNn=48  -DSHARED=1     -DTnn=64 -DTn=32 -DTx=10 -DTy=25 -DTi=32
152 | 
153 | conv2: convolution.cpp $(HEADERS)
154 | 	$(CPP) $^ $(LIB) $(CFLAGS)  -o $@ -DNx=200 -DNy=200 -DKx=18 -DKy=18 -DNi=8   -DNn=8   -DSHARED=0     -DTnn=8 -DTn=8 -DTi=8 -DTx=16 -DTy=16
155 | 
156 | conv3: convolution.cpp $(HEADERS)
157 | 	$(CPP) $^ $(LIB) $(CFLAGS)  -o $@ -DNx=32  -DNy=32  -DKx=4  -DKy=4  -DNi=108 -DNn=200 -DSHARED=1     -DTnn=20 -DTn=10 -DTi=20 -DTx=16 -DTy=16
158 | 
159 | conv4: convolution.cpp $(HEADERS)
160 | 	$(CPP) $^ $(LIB) $(CFLAGS)  -o $@ -DNx=32  -DNy=32  -DKx=7  -DKy=7  -DNi=16  -DNn=512 -DSHARED=1     -DTnn=64 -DTn=16 -DTi=16 -DTx=16 -DTy=16
161 | 
162 | conv5: convolution.cpp $(HEADERS)
163 | 	$(CPP) $^ $(LIB) $(CFLAGS)  -o $@ -DNx=8 -DNy=8 -DKx=11 -DKy=11 -DNi=256 -DNn=384 -DSHARED=0     -DTnn=64 -DTn=16 -DTi=16 -DTx=16 -DTy=16
164 | #	$(CPP) $^ $(LIB) $(CFLAGS)  -o $@ -DNx=256 -DNy=256 -DKx=11 -DKy=11 -DNi=256 -DNn=384 -DSHARED=0     -DTnn=64 -DTn=16 -DTi=16 -DTx=16 -DTy=16
165 | 
166 | 
167 | pool1: pooling.cpp $(HEADERS)
168 | 	$(CPP) $^ $(LIB) $(CFLAGS)  -o $@ -DNx=492 -DNy=367 -DKx=2  -DKy=2  -DNi=12     -DTii=12 -DTi=12 -DTx=16 -DTy=16
169 |                                                                                                
170 | pool3: pooling.cpp $(HEADERS)                                                                  
171 | 	$(CPP) $^ $(LIB) $(CFLAGS)  -o $@ -DNx=32  -DNy=32  -DKx=4  -DKy=4  -DNi=100    -DTii=50 -DTi=25 -DTx=16 -DTy=16
172 |                                                                                                
173 | pool5: pooling.cpp $(HEADERS)                                                       
174 | 	$(CPP) $^ $(LIB) $(CFLAGS)  -o $@ -DNx=256 -DNy=256 -DKx=2  -DKy=2  -DNi=256    -DTii=64 -DTi=16 -DTx=16 -DTy=16
175 | 
176 | 
177 | class1: classifier.cpp  $(HEADERS)
178 | 	$(CPP) $^ $(LIB) $(CFLAGS)  -o $@ -DNi=960 -DNn=20     -DTii=80 -DTi=20 -DTnn=20 -DTn=20
179 | 
180 | 
181 | class3: classifier.cpp  $(HEADERS)
182 | 	$(CPP) $^ $(LIB) $(CFLAGS)  -o $@ -DNi=200 -DNn=100    -DTii=40 -DTi=20 -DTnn=40 -DTn=20
183 | 
184 | 
185 | clean:
186 | 	@rm -f $(MODULE) convolution pooling classifier $(DFG_HEADERS)
187 | 
188 | 


--------------------------------------------------------------------------------
/workloads/diannao/classifier.cpp:
--------------------------------------------------------------------------------
  1 | #include <iostream>
  2 | #include "dnn.hpp"
  3 | #include "softbrain.hpp"
  4 | #include <inttypes.h>
  5 | 
  6 | #if SB
  7 | #include "red32to1sig.h"
  8 | #endif
  9 | 
 10 | #include "sim_timing.h"
 11 | 
 12 | 
 13 | 
 14 | using namespace std;
 15 | 
 16 | // Problem Size
 17 | //#define Nn 100  // Number of Output Layers
 18 | //#define Ni 200  // Number of Input  Layers
 19 | 
 20 | #ifndef Nn
 21 | #define Nn 128  // Number of Output Layers
 22 | #define Ni 224  // Number of Input  Layers
 23 | #endif
 24 | 
 25 | #ifndef Tii
 26 | // Tiling Sizes
 27 | #define Tnn 32  
 28 | #define Tii 32
 29 | //#define Tn 5
 30 | //#define Ti 25
 31 | #define Tn 16
 32 | #define Ti 16
 33 | #endif
 34 | 
 35 | //Arrays:
 36 | VTYPE synapse[Nn][Ni] __attribute__((aligned(64)));
 37 | VTYPE neuron_i[Ni] __attribute__((aligned(64)));
 38 | VTYPE neuron_n[Nn] __attribute__((aligned(64))),    neuron_n2[Nn] __attribute__((aligned(64)));
 39 | 
 40 | 
 41 | int classifier_layer(VTYPE (&synapse)[Nn][Ni], VTYPE (&neuron_i)[Ni], VTYPE (&neuron_n)[Nn]) {
 42 |   int total_calc=0;
 43 |   for (int n = 0; n < Nn; n++) {
 44 |     VTYPE temp=0;
 45 |     for (int i = 0; i < Ni; i++) {
 46 |       temp += synapse[n][i] * neuron_i[i];
 47 |     }
 48 |     neuron_n[n] = sigmoid(temp);
 49 |   }
 50 |   return total_calc;
 51 | }
 52 | 
 53 | #if SB
 54 | 
 55 | // CGRA Pipe
 56 | #define PIPEWIDTH 8 // adders at mouth of 1 CGRA pipe
 57 | #define PIPEDEPTH 32 // approx. depth of CGRA pipeline
 58 | int classifier_layer_sb(VTYPE (&synapse)[Nn][Ni], VTYPE (&neuron_i)[Ni], VTYPE (&neuron_n)[Nn]) {
 59 |   // Fits in scratchpad? (should be true for our benches)
 60 |   if(Ni > SCRATCHSIZE){
 61 |     cout << "Error: inputs do not fit in scratch for classifier layer" << endl;
 62 |     return -1;
 63 |   }     
 64 | 
 65 |   // Handle class3, Nn = 32  -- not sure what this does
 66 |   int pipedepth = PIPEDEPTH;
 67 |   if(Nn < PIPEDEPTH){
 68 |     pipedepth = Nn;
 69 |   }
 70 | 
 71 |   // Stream in CGRA config (do this somewhere else?) 
 72 |   SB_CONFIG(red32to1sig_config, red32to1sig_size);
 73 | 
 74 |   // Stream in inputs to scratch
 75 |   SB_DMA_SCRATCH_LOAD(&neuron_i, sizeof(VTYPE)*4, sizeof(VTYPE)*4, Ni/4, 0); 
 76 |   SB_WAIT_ALL();
 77 | 
 78 |   for(int n = 0; n < Nn; n += pipedepth){
 79 |     SB_CONST(P_red32to1sig_acc, 0, pipedepth); 
 80 | 
 81 |     for(int i = 0; i < Ni; i+= PIPEWIDTH*4){
 82 |       // Enable sigmoid on final itr
 83 |       if(i + PIPEWIDTH*4 < Ni){   
 84 |         SB_CONST(P_red32to1sig_pred, 0, pipedepth); 
 85 |         SB_RECURRENCE(P_red32to1sig_out, P_red32to1sig_acc, pipedepth);
 86 |       } else {
 87 |         SB_CONST(P_red32to1sig_pred, 1, pipedepth); 
 88 |       }
 89 |       
 90 |       SB_DMA_READ(&synapse[n][i],  sizeof(VTYPE)*Ni, 4*sizeof(VTYPE)*PIPEWIDTH, pipedepth, P_red32to1sig_S); //Read Synapses 
 91 |       SB_SCR_PORT_STREAM(i*sizeof(VTYPE), 0, 4*sizeof(VTYPE)*PIPEWIDTH, pipedepth, P_red32to1sig_N); //Read Neurons
 92 |     }
 93 | 
 94 |     // write completed outputs out to memory
 95 |     SB_DMA_WRITE_SHF16(P_red32to1sig_out, 4*sizeof(VTYPE), 4*sizeof(VTYPE), pipedepth/4, &neuron_n[n]); 
 96 |   } 
 97 | 
 98 |   SB_WAIT_ALL();
 99 | 
100 |   return 0;
101 | }
102 | #endif
103 | 
104 | void fill_classifier(VTYPE (&synapse)[Nn][Ni], VTYPE (&neuron_i)[Ni], 
105 |     VTYPE (&neuron_n)[Nn],   VTYPE (&neuron_n2)[Nn]) {
106 |   for(int n = 0; n < Nn; ++n) {
107 |     for(int i = 0; i < Ni; ++i) {
108 |       synapse[n][i] = rand()%(1+n/4); //n*Ni+i;
109 |     }
110 |   }
111 |   for(int i = 0; i < Ni; ++i) {
112 |     neuron_i[i] = rand()%16; //i;
113 |   }
114 |   for(int n = 0; n < Nn; ++n) {
115 |     neuron_n[n] = 0; //i;
116 |     neuron_n2[n] = 0; //i;
117 |   }
118 | }
119 | 
120 | int classifier_layer_blocked(VTYPE (&synapse)[Nn][Ni], VTYPE (&neuron_i)[Ni], 
121 |                               VTYPE (&neuron_n)[Nn]) {
122 |   int total_calc=0;
123 |   VTYPE sum[Nn]={0};
124 |   for (int nnn = 0; nnn < Nn; nnn += Tnn) { // tiling for output neurons;
125 |     for (int iii = 0; iii < Ni; iii += Tii) { // tiling for input neurons;
126 |       for (int nn = nnn; nn < nnn + Tnn; nn += Tn) {
127 |         for (int ii = iii; ii < iii + Tii; ii += Ti) {
128 |           //total_calc++;
129 | 
130 |           // — Original code —
131 |           for (int n = nn; n < nn + Tn; n++) {
132 |             VTYPE sum_sc=0;
133 |             for (int i = ii; i < ii + Ti; i++) {
134 |               sum_sc += (synapse[n][i] * neuron_i[i]);
135 |               //sum_sc += synapse[n][i] * i;
136 |             }
137 |             sum[n]+=sum_sc;
138 |           }
139 |         }
140 |       }
141 |     }
142 |     for (int nn = nnn; nn < nnn + Tnn; nn++) {
143 |       neuron_n[nn] = sigmoid(sum[nn]);
144 |     }
145 |   }
146 |   return total_calc;
147 | }
148 | 
149 | int main(int argc, char** argv) {
150 |   fill_classifier(synapse,neuron_i,neuron_n,neuron_n2);
151 | 
152 |   if(argc==3) {
153 |   
154 |   } else if(argc==2) {
155 |     begin_roi();
156 |     #ifdef SB
157 |     int calc = classifier_layer_sb(synapse,neuron_i,neuron_n);  
158 |     #else
159 |     int calc = classifier_layer_blocked(synapse,neuron_i,neuron_n);  
160 |     #endif
161 |     end_roi();
162 | 
163 |     if(calc > 0) {
164 |       cout << "calc: " << calc << "\n";
165 |     }
166 |     //cout << "Perf Run Complete\n";
167 |   } else {
168 |     int calc  = classifier_layer(synapse,neuron_i,neuron_n);
169 | 
170 |     begin_roi();
171 |     #ifdef SB
172 |     int calc2 = classifier_layer_sb(synapse,neuron_i,neuron_n2);  
173 |     #else
174 |     int calc2 = classifier_layer_blocked(synapse,neuron_i,neuron_n2);  
175 |     #endif
176 |     end_roi();
177 |  
178 |     cout << "C1: " << calc << " C2: " << calc2 << "\n";
179 |   
180 |     compare(neuron_n,neuron_n2,Nn);
181 |   
182 |     cout << "mults: " << Nn*Ni <<  " sigmoids: " << Nn << "\n";
183 |   }
184 |   sb_stats();
185 | }
186 | 
187 | 


--------------------------------------------------------------------------------
/workloads/diannao/convolution_old.cpp:
--------------------------------------------------------------------------------
  1 | #include <iostream>
  2 | #include <string>
  3 | #include "dnn.hpp"
  4 | 
  5 | using namespace std;
  6 | 
  7 | #ifndef SHARED
  8 | #define SHARED 1
  9 | #endif
 10 | 
 11 | #ifndef Ny
 12 |   //Problem Size
 13 |   #define Ny 32
 14 |   #define Ny 32
 15 |   
 16 |   #define Kx 4
 17 |   #define Ky 4
 18 |   //#define Ni 108
 19 |   //#define Nn 200
 20 |   
 21 |   #define Ni 112
 22 |   #define Nn 224
 23 | #endif
 24 | 
 25 | //slide increment
 26 | #ifndef Sy
 27 |   #define Sy 1
 28 |   #define Sx 1
 29 | #endif
 30 | 
 31 | #ifndef Tnn
 32 |   //Tiling Sizes
 33 |   #define Tnn 32
 34 |   //#define Tn  25
 35 |   //#define Ti  16
 36 |   #define Tn  16
 37 |   #define Ti  16
 38 |   
 39 |   #define Ty  8
 40 |   #define Tx  8
 41 | #endif
 42 | 
 43 | #define NYPAD (Ny+Ky)
 44 | #define NXPAD (Ny+Kx)
 45 | 
 46 | #define NYSCL (Ny/Sy)
 47 | #define NXSCL (Ny/Sx)
 48 | 
 49 | 
 50 | //Arrays:
 51 | #if SHARED == 1
 52 | #define SYNAPSE_SIZE (1L*Ky*Kx*Nn*Ni)
 53 | #else
 54 | #define SYNAPSE_SIZE (1L*NYSCL*NXSCL*Ky*Kx*Nn*Ni)
 55 | #endif
 56 | 
 57 | #if SHARED == 1
 58 | VTYPE (*synapse)[Ky][Kx][Nn][Ni];
 59 | #else
 60 | VTYPE (*synapse)[NYSCL][NXSCL][Ky][Kx][Nn][Ni];
 61 | #endif
 62 | 
 63 | //VTYPE neuron_i[NYPAD][NXPAD][Ni];
 64 | //VTYPE neuron_n[NYSCL][NXSCL][Nn]={0},    neuron_n2[NYSCL][NXSCL][Nn]={0};
 65 | 
 66 | VTYPE  (*neuron_i)[NYPAD][NXPAD][Ni];
 67 | VTYPE  (*neuron_n)[NYSCL][NXSCL][Nn];
 68 | VTYPE (*neuron_n2)[NYSCL][NXSCL][Nn];
 69 | 
 70 | 
 71 | void fill_convolution_shared_simple(VTYPE (&synapse)[Ky][Kx][Nn][Ni], 
 72 |                                     VTYPE (&neuron_i)[NYPAD][NXPAD][Ni]) {
 73 |   for(int yy = 0; yy < Ky; ++yy) {
 74 |     for(int xx = 0; xx < Kx; ++xx) {
 75 |       for(int nn = 0; nn < Nn; ++nn) {
 76 |         for(int ni = 0; ni < Ni; ++ni) {
 77 |           synapse[yy][xx][nn][ni] = 2;
 78 |   } } } }
 79 |   for(int yy = 0; yy < NYPAD; ++yy) {
 80 |     for(int xx = 0; xx < NXPAD; ++xx) {      
 81 |       for(int ni = 0; ni < Ni; ++ni) {
 82 |         neuron_i[yy][xx][ni] = 1;
 83 |   }  }  }
 84 | 
 85 | }
 86 | 
 87 | void fill_convolution_private(VTYPE (&synapse)[NYSCL][NXSCL][Ky][Kx][Nn][Ni], 
 88 |                                     VTYPE (&neuron_i)[NYPAD][NXPAD][Ni]) {
 89 |   for(int yout = 0; yout < NYSCL; ++yout) {
 90 |     for(int xout = 0; xout < NXSCL; ++xout) {
 91 |       for(int yy = 0; yy < Ky; ++yy) {
 92 |         for(int xx = 0; xx < Kx; ++xx) {
 93 |           for(int nn = 0; nn < Nn; ++nn) {
 94 |             for(int ni = 0; ni < Ni; ++ni) {
 95 |               synapse[xout][yout][yy][xx][nn][ni] = 2;
 96 |   } } } } } }
 97 |   for(int yy = 0; yy < NYPAD; ++yy) {
 98 |     for(int xx = 0; xx < NXPAD; ++xx) {      
 99 |       for(int ni = 0; ni < Ni; ++ni) {
100 |         neuron_i[yy][xx][ni] = 1;
101 |   }  }  }
102 | 
103 | }
104 | 
105 | 
106 | void fill_convolution_shared(VTYPE (&synapse)[Ky][Kx][Nn][Ni], 
107 |                              VTYPE (&neuron_i)[NYPAD][NXPAD][Ni]) {
108 |   int total1=0,total2=0;
109 |   for(int yy = 0; yy < Ky; ++yy) {
110 |     for(int xx = 0; xx < Kx; ++xx) {
111 |       for(int nn = 0; nn < Nn; ++nn) {
112 |         for(int ni = 0; ni < Ni; ++ni) {
113 |           synapse[yy][xx][nn][ni] = total1;
114 |           total1+=1;
115 |         }
116 |       }
117 |     }
118 |   }
119 |   for(int yy = 0; yy < NYPAD; ++yy) {
120 |     for(int xx = 0; xx < NXPAD; ++xx) {      
121 |       for(int ni = 0; ni < Ni; ++ni) {
122 |         neuron_i[yy][xx][ni] = total2;
123 |         total2+=2;
124 |       }
125 |     }
126 |   }
127 | }
128 | 
129 | 
130 | 
131 | std::pair<int,int> convolution_layer_blocked(
132 | #if SHARED == 1
133 |                               VTYPE (&synapse)[Ky][Kx][Nn][Ni], 
134 | #else
135 |                               VTYPE (&synapse)[NYSCL][NXSCL][Ky][Kx][Nn][Ni], 
136 | #endif
137 |                               VTYPE (&neuron_i)[NYPAD][NXPAD][Ni], 
138 |                               VTYPE (&neuron_n)[NYSCL][NXSCL][Nn]) {
139 |   int c1=0,c2=0;
140 |   VTYPE sum[Nn]={0};
141 | 
142 |   for (int yy = 0; yy < Ny; yy += Ty) {
143 |     for (int xx = 0; xx < Ny; xx += Tx) {
144 |       for (int nnn = 0; nnn < Nn; nnn += Tnn) {
145 |         int yout = yy/Sy;
146 |         for (int y = yy; y < yy + Ty; y += Sy) { // tiling for y;
147 |           int xout = xx/Sx;
148 | 
149 |           for (int x = xx; x < xx + Tx; x += Sx) { // tiling for x;
150 | 
151 |             //LOAD_SCRATCH -- read cube from larger 3D cube to compact 3D cube
152 |             //start_addr: neuron[y][x][i]
153 |             //stride_size: Ni*Kx
154 |             //stride: Ni*Nx
155 |             //num_strides: ky
156 | 
157 |             for (int nn = nnn; nn < nnn + Tnn; nn += Tn) {
158 |               for (int n = nn; n < nn + Tn; n++) {
159 |                 sum[n] = 0;
160 |               }
161 | 
162 |               for (int ky = 0; ky < Ky; ky++) {  // sliding window;
163 |                 for (int kx = 0; kx < Kx; kx++) {
164 | 
165 |                   int ii = 0;
166 |                   VTYPE sum_sc;
167 | 
168 |                   for (; ii < Ni -Ti+1; ii += Ti) {
169 |                     //SCRATCH -> PORT
170 |                     // addr: neuron_i[ky + y][kx + x][ii]
171 |                     // stride_len: Tn*2
172 |                     // num_strides: 8
173 |                     
174 |                     //DMA -> PORT
175 |                     // addr: synapse[ky][kx][n][ii]
176 |                     // stride_len: Tn*2
177 |                     // stride_dist: 
178 |                     // num_strides:8
179 | 
180 |                     //*****
181 |                     for (int n = nn; n < nn + Tn; n++) {
182 |                       sum_sc=0;
183 |                       for (int i = ii; i < ii + Ti; i++) {
184 |                         #if SHARED == 1 // version with shared kernels
185 |                         VTYPE sv = synapse[ky][kx][n][i];
186 |                         VTYPE nv = neuron_i[ky + y][kx + x][i];
187 |                         #else // version with private kernels
188 |                         VTYPE sv = synapse[yout][xout][ky][kx][n][i];
189 |                         VTYPE nv = neuron_i[ky + y][kx + x][i];
190 |                         #endif
191 |                         sum_sc+=(sv*nv)>>1;
192 |                       }
193 |                       sum[n]+=sum_sc;
194 |                     }
195 |                     //****
196 |                   }
197 |                 }
198 |               }
199 | 
200 |               //sigmoid
201 |               for (int n = nn; n < nn + Tn; n++) {
202 |                 neuron_n[yout][xout][n] = sigmoid(sum[n]);
203 |                 //c2++;
204 |               }
205 |             }
206 |             xout++; 
207 |           }
208 |           yout++;
209 |         }
210 |       }
211 |     }
212 |   }
213 |   return make_pair(c1,c2);
214 | }
215 | 
216 | /*
217 | * MM convolution layer implemented for softbrain
218 | * Sb config constants (SCRATCHSIZE, NUMPIPES, etc) in softbrain.hh
219 | * Code assumes NUMPIPES = 2
220 | * Safe because inputs % PIPEDEPTH = 0 (for PIPEDEPTH = 32)
221 | * This version does not tile inputs. If the inputs don't fit in the 
222 | * scratchpad, it invokes convolution_layer_sb_tiled
223 | * NOTE THE CHANGED ARRAY DIMS FOR SYNAPSE
224 | */
225 | int convolution_layer_sb(
226 | #if SHARED == 1
227 |                     VTYPE (&synapse)[Ky][Kx][Nn][Ni],   // MM CHANGED ORDERING OF ALL INPUTS
228 | #else
229 |                     VTYPE (&synapse)[NYSCL][NXSCL][Ky][Kx][Nn][Ni], 
230 | #endif
231 |                     VTYPE (&neuron_i)[NYPAD][NXPAD][Ni], 
232 |                     VTYPE (&neuron_n)[NYSCL][NXSCL][Nn]) {
233 | 
234 |   // Stream in CGRA config 
235 |   int *cgra_config;
236 |   int cgra_cofig_sz;
237 |   DMA_SB_CONFIG(cgra_config, cgra_config_sz);
238 |  
239 |   if(Kx * Ky * Ni > SCRATCHSIZE){       // input neurons don't fit in scratch
240 |     convolution_layer_sb_tiled(synapse, neuron_i, neuron_n); // call tiled version
241 |   }
242 | 
243 |   VTYPE neuron_i_scratch[Ky][Kx][Ni];
244 |   &neuron_i_scratch[0][0][0] = SCRATCHSTART;
245 |   int yout = 0;
246 |   for (int y = 0; y < Ny; y += Sy) {
247 |     int xout = 0;
248 |     for (int x = 0; x < Nx; x += Sx) {
249 |       IC_DMA_SCRATCH_LOAD(&neuron_i[y][x][0], sizeof(VTYPE) * Ni * Nx, sizeof(VTYPE) * Ni * Kx, Ky, &neuron_i_scratch[0][0][0]);
250 |       for(int n = 0; n < Nn; n += 2*PIPEDEPTH){ // each pipe does PIPEDEPTH output layers
251 |         IC_CONST(INPUTPRED0, 0, Ni*Kx*Ky - 1);
252 |         IC_CONST(INPUTPRED1, 0, Ni*Kx*Ky - 1); 
253 |         for(int ky = 0; ky < Ky; ++ky){ // Spin through windows...
254 |           for(int kx = 0; kx < Kx; ++kx){ 
255 |             for(int i = 0; i < Ni; i+=PIPEWIDTH){   // ...and input layers
256 |               for(int nn = 0; nn < PIPEDEPTH; ++nn){ // Both pipes get PIPEDEPTH copies of same neurons
257 |                 IC_SCRATCH_READ(&neuron_i_scratch[ky][kx][i], PIPEWIDTH, INPUTNEURON0);  
258 |                 IC_SCRATCH_READ(&neuron_i_scratch[ky][kx][i], PIPEWIDTH, INPUTNEURON1);  
259 |               } 
260 |           
261 |               // Each entry gets weights for different output layers
262 |               #if SHARED == 1
263 |                 IC_DMA_READ((&synapse)[ky][kx][n][i], sizeof(VTYPE)*Ni, sizeof(VTYPE)*PIPEWIDTH, PIPEDEPTH, INPUTWEIGHT0); 
264 |                 IC_DMA_READ((&synapse)[ky][kx][n+PIPEDEPTH][i], sizeof(VTYPE)*Ni, sizeof(VTYPE)*PIPEWIDTH, PIPEDEPTH, INPUTWEIGHT1); 
265 |               #else
266 |                 IC_DMA_READ((&synapse)[yout][xout][ky][kx][n][i], sizeof(VTYPE)*Ni, sizeof(VTYPE)*PIPEWIDTH, PIPEDEPTH, INPUTWEIGHT0); 
267 |                 IC_DMA_READ((&synapse)[yout][xout][ky][kx][n+PIPEWIDTH][i], sizeof(VTYPE)*Ni, sizeof(VTYPE)*PIPEWIDTH, PIPEDEPTH, INPUTWEIGHT1); 
268 |               #endif
269 | 
270 |               if(ky + 1 < Ky || kx + 1 < Kx || i + PIPEWIDTH < Ni){ // don't recurse on last pass
271 |                 OC_RECURRENCE(OUTPUT0, INPUTACC0, PIPEDEPTH); // until input stack complete
272 |                 OC_RECURRENCE(OUTPUT1, INPUTACC1, PIPEDEPTH);
273 |               }
274 |               if((kx + 1 == Kx) && (ky + 1 == Ky) && (i + PIPEWIDTH >= Ni - 1)){    // sigmoid -- before last step of last tile
275 |                 IC_CONST(INPUTPRED0, 1, PIPEDEPTH); 
276 |                 IC_CONST(INPUTPRED1, 1, PIPEDEPTH); 
277 |               }
278 |             }
279 |           }
280 |         }
281 |         // Write completed input stacks out to mem
282 |         OC_DMA_WRITE(OUTPUT0, sizeof(VTPYE), sizeof(VTYPE), PIPEDEPTH, neuron_n[yout][xout][n]);
283 |         OC_DMA_WRITE(OUTPUT1, sizeof(VTPYE), sizeof(VTYPE), PIPEDEPTH, neuron_n[yout][xout][n+PIPEDEPTH]);
284 |       }
285 |       xout++; 
286 |     }
287 |     yout++;
288 |   }
289 |   
290 |   return 0;
291 | }
292 | 
293 | // The full input stack won't fit in the scratchpad at once,
294 | // so we must tile it into chunks that do.
295 | std::pair<int,int>  convolution_layer_sb_tiled(
296 | #if SHARED == 1
297 |                     VTYPE (&synapse)[Ky][Kx][Nn][Ni], 
298 | #else
299 |                     VTYPE (&synapse)[NYSCL][NXSCL][Ky][Kx][Nn][Ni], 
300 | #endif
301 |                     VTYPE (&neuron_i)[NYPAD][NYPAD][Ni], 
302 |                     VTYPE (&neuron_n)[NYSCL][NXSCL][Nn]) {
303 |  
304 |   // Most places in the code, Ni is replaced with ti-excess (number of input layers in current tile) 
305 |   int ti = SCRATCHSIZE / (Kx*Ky); // tile size: # of input layers that fit in scratch, round down
306 |   if(ti % PIPEWIDTH != 0){      // Make sure ti%pipewidth = 0 for easy chunking later
307 |     ti = ti - (ti % PIPEWIDTH);
308 |   }
309 |   int excess;
310 | 
311 |   VTYPE neuron_i_scratch[Ky][Kx][ti];
312 |   neuron_i_scratch = SCRATCHSTART;
313 |  
314 |   // Outer tiled loop 
315 |   for(int ii = 0; ii < Ni - 1 + ti; ii += ti){
316 |     // deal with overflow (Ni%ti != 0)
317 |     if(ii >= Ni){
318 |       excess = ii - (Ni - 1);
319 |     } else {
320 |       excess = 0;
321 |     }
322 |     int yout = 0;
323 |     for (int y = 0; y < Ny; y += Sy) {
324 |       int xout = 0;
325 |       for (int x = 0; x < Nx; x += Sx) {
326 |         IC_DMA_SCRATCH_LOAD(&neuron_i[y][x][ii], sizeof(VTYPE)*Ni*Nx, sizeof(VTYPE)*(ti-excess)*Kx, Ky, neuron_i_scratch);
327 |         for(int n = 0; n < Nn; n += 2*PIPEDEPTH){ // each pipe does PIPEDEPTH output layers
328 |           IC_CONST(INPUTPRED0, 0, (ti-excess)*Kx*Ky - 1);
329 |           IC_CONST(INPUTPRED1, 0, (ti-excess)*Kx*Ky - 1); 
330 |          
331 |           // If not first ii itr, load output acc. from memory
332 |           if(ii != 0){
333 |             IC_DMA_READ(neruon_n[yout][xout][n], sizeof(VTYPE) * PIPEDEPTH, sizeof(VTYPE)*PIPEDEPTH, 1, INPUTACC0); 
334 |             IC_DMA_READ(neruon_n[yout][xout][n+PIPEDEPTH], sizeof(VTYPE) * PIPEDEPTH, sizeof(VTYPE)*PIPEDEPTH, 1, INPUTACC1); 
335 |           }
336 |  
337 |           for(int ky = 0; ky < Ky; ++ky){ // Spin through windows...
338 |             for(int kx = 0; kx < Kx; ++kx){ 
339 |               for(int i = ii; i < ii+(ti-excess); i+=PIPEWIDTH){   // ...and input layers
340 |                 for(int nn = 0; nn < PIPEDEPTH; ++nn){ // Both pipes get PIPEDEPTH copies of same neurons
341 |                   IC_SCRATCH_READ(&neuron_i_scratch[ky][kx][i], PIPEWIDTH, INPUTNEURON0);  
342 |                   IC_SCRATCH_READ(&neuron_i_scratch[ky][kx][i], PIPEWIDTH, INPUTNEURON1);  
343 |                 } 
344 |           
345 |                 // Each entry gets weights for different output layers
346 |                 #if SHARED == 1
347 |                   IC_DMA_READ((&synapse)[ky][kx][n][i], sizeof(VTYPE)*Ni, sizeof(VTYPE)*PIPEWIDTH, PIPEDEPTH, INPUTWEIGHT0); 
348 |                   IC_DMA_READ((&synapse)[ky][kx][n+PIPEDEPTH][i], sizeof(VTYPE)*Ni, sizeof(VTYPE)*PIPEWIDTH, PIPEDEPTH, INPUTWEIGHT1); 
349 |                 #else
350 |                   IC_DMA_READ((&synapse)[yout][xout][ky][kx][n][i], sizeof(VTYPE)*Ni, sizeof(VTYPE)*PIPEWIDTH, PIPEDEPTH, INPUTWEIGHT0); 
351 |                   IC_DMA_READ((&synapse)[yout][xout][ky][kx][n+PIPEWIDTH][i], sizeof(VTYPE)*Ni, 
352 |                               sizeof(VTYPE)*PIPEWIDTH, PIPEDEPTH, INPUTWEIGHT1); 
353 |                 #endif
354 | 
355 |                 if(ky + 1 < Ky || kx + 1 < Kx || i + PIPEWIDTH < (ti-excess)){ // don't recurse on last pass
356 |                   OC_RECURRENCE(OUTPUT0, INPUTACC0, PIPEDEPTH); // until input stack complete
357 |                   OC_RECURRENCE(OUTPUT1, INPUTACC1, PIPEDEPTH);
358 |                 }
359 |                 if((kx + 1 == Kx) && (ky + 1 == Ky) && (i + PIPEWIDTH >= (ti-excess) - 1)){    // sigmoid -- before last step of last tile
360 |                   IC_CONST(INPUTPRED0, 1, PIPEDEPTH); 
361 |                   IC_CONST(INPUTPRED1, 1, PIPEDEPTH); 
362 |                 }
363 |               }
364 |             }
365 |           }
366 |           // Write partial output stacks out to mem
367 |           OC_DMA_WRITE(OUTPUT0, sizeof(VTPYE), sizeof(VTYPE), PIPEDEPTH, neuron_n[yout][xout][n]);
368 |           OC_DMA_WRITE(OUTPUT1, sizeof(VTPYE), sizeof(VTYPE), PIPEDEPTH, neuron_n[yout][xout][n+PIPEDEPTH]);
369 |         }
370 |         xout++;
371 |       } 
372 |       yout++;
373 |     }
374 |   }
375 |  
376 |   return 0;
377 | }
378 | 
379 | std::pair<int,int>  convolution_layer(
380 | #if SHARED == 1
381 |                     VTYPE (&synapse)[Ky][Kx][Nn][Ni], 
382 | #else
383 |                     VTYPE (&synapse)[NYSCL][NXSCL][Ky][Kx][Nn][Ni], 
384 | #endif
385 |                     VTYPE (&neuron_i)[NYPAD][NYPAD][Ni], 
386 |                     VTYPE (&neuron_n)[NYSCL][NXSCL][Nn]) {
387 |   int c1=0,c2=0;
388 |   VTYPE sum[Nn]={0};
389 | 
390 |   // — Original code — (excluding nn, ii loops)
391 |   int yout = 0;
392 |   for (int y = 0; y < Ny; y += Sy) { // tiling for y;
393 |     int xout = 0;
394 |     for (int x = 0; x < Ny; x += Sx) { // tiling for x;
395 |       for (int nn = 0; nn < Nn; nn += Tn) {
396 |         for (int n = nn; n < nn + Tn; n++) {
397 |           sum[n]=0;
398 |         }
399 | 
400 |         // sliding window;
401 |         for (int ky = 0; ky < Ky; ky++)
402 |           for (int kx = 0; kx < Kx; kx++)
403 |             for (int n = nn; n < nn + Tn; n++)
404 |               for (int i = 0; i < Ni; i++) {
405 |                 #if SHARED == 1 // version with shared kernels
406 |                 VTYPE sv = synapse[ky][kx][n][i];
407 |                 VTYPE nv = neuron_i[ky + y][kx + x][i];
408 |                 #else // version with private kernels
409 |                 VTYPE sv = synapse[yout][xout][ky][kx][n][i];
410 |                 VTYPE nv = neuron_i[ky + y][kx + x][i];
411 |                 #endif
412 |                 sum[n]+=(sv*nv)>>1;
413 |               }
414 |         //sigmoid
415 |         for (int n = nn; n < nn + Tn; n++) {
416 |           neuron_n[yout][xout][n] = sigmoid(sum[n]);
417 |           c2++;
418 |         }
419 |       }
420 |       xout++; 
421 |     }
422 |     yout++;
423 |   }
424 |   return make_pair(c1,c2);
425 | }
426 | 
427 | int main(const int argc, const char** argv) {
428 | 
429 |   #if SHARED == 1
430 |   synapse = (VTYPE (*)[Ky][Kx][Nn][Ni]) malloc(SYNAPSE_SIZE*sizeof(VTYPE));
431 |   #else
432 |   synapse = (VTYPE (*)[NYSCL][NXSCL][Ky][Kx][Nn][Ni]) malloc(SYNAPSE_SIZE*sizeof(VTYPE));
433 |   #endif
434 | 
435 |   neuron_i  = (VTYPE (*)[NYPAD][NXPAD][Ni])malloc(NYPAD*NXPAD*Ni*sizeof(VTYPE));
436 |   neuron_n  = (VTYPE (*)[NYSCL][NXSCL][Nn])malloc(NYSCL*NXSCL*Nn*sizeof(VTYPE));
437 |   neuron_n2 = (VTYPE (*)[NYSCL][NXSCL][Nn])malloc(NYSCL*NXSCL*Nn*sizeof(VTYPE));
438 | 
439 |   #if SHARED == 1
440 |   fill_convolution_shared_simple(*synapse,*neuron_i);
441 |   #else
442 |   fill_convolution_private(*synapse,*neuron_i);
443 |   #endif
444 | 
445 |   begin_roi();
446 |   if(argc==3) {
447 | 
448 | //  } else if(argc==2 && string(argv[1])=="perf") {
449 |   } else if(argc==2) {
450 |     auto calc  = convolution_layer_blocked(*synapse,*neuron_i,*neuron_n);
451 |     //cout << "Perf Run Complete\n";
452 |   } else {
453 |   cout << "argc: " << argc << "\n";
454 | 
455 |     auto calc  = convolution_layer_blocked(*synapse,*neuron_i,*neuron_n);
456 |     auto calc2 = convolution_layer(*synapse,*neuron_i,*neuron_n2);
457 |     if(calc.first!=0) {
458 |       cout << "blocks=" << calc.first << "\n";
459 |     }
460 |     compare((VTYPE*)*neuron_n,(VTYPE*)*neuron_n2,NYSCL*NXSCL*Nn);
461 |     int n_outputs= Ny/Sy * Ny/Sx * Nn;
462 |     cout << "mults: " << n_outputs*Ni*Kx*Ky << " sigmoids: "  << n_outputs << "\n";
463 |     cout << "argc: " << argc << "\n";
464 |   }
465 |   end_roi();
466 |  
467 |   //cout << "mult-block:  " << calc.first   << " sigmoid-block: " << calc.second  << "\n";
468 |   //cout << "mult-orig:  "  << calc2.first  << " sigmoid-orig:  " << calc2.second << "\n";
469 | }
470 | 
471 | 


--------------------------------------------------------------------------------
/workloads/diannao/dnn.hpp:
--------------------------------------------------------------------------------
 1 | #ifndef DNN_H
 2 | #define DNN_H
 3 | 
 4 | #define M_REPEAT_4(X)  X X X X
 5 | #define M_REPEAT_8(X)  M_REPEAT_4(X)  M_REPEAT_4(X)
 6 | #define M_REPEAT_16(X) M_REPEAT_8(X)  M_REPEAT_8(X)
 7 | #define M_REPEAT_32(X) M_REPEAT_16(X) M_REPEAT_16(X)
 8 | 
 9 | #include <iostream>
10 | 
11 | //#define VTYPE uint32_t
12 | #define VTYPE uint16_t
13 | 
14 | 
15 | //static __inline__ uint64_t rdtsc(void) {
16 | //  unsigned a, d;
17 | //  //asm("cpuid");
18 | //  //asm volatile("rdtsc" : "=a" (a), "=d" (d));
19 | //
20 | //  return (((uint64_t)a) | (((uint64_t)d) << 32));
21 | //}
22 | //
23 | //uint64_t ticks;
24 | //__attribute__ ((noinline))  void begin_roi() {
25 | //  ticks=rdtsc();
26 | //}
27 | //__attribute__ ((noinline))  void end_roi()   {
28 | //  ticks=(rdtsc()-ticks);
29 | //  std::cout << "ticks: " << ticks << "\n";
30 | //}
31 | 
32 | //VTYPE a[16];
33 | //VTYPE b[16];
34 | //
35 | VTYPE sigmoid(VTYPE i) {
36 | //  return a[i&0xF]*i+b[i&0xF];
37 |     return i*1024/(1024+i);
38 |   return i;
39 | }
40 | 
41 | 
42 | 
43 | void compare_short(VTYPE* neuron1, VTYPE* neuron2, int size) {
44 |   bool error = false;
45 |   for(int i = 0; i < size; ++i) {
46 |     if(neuron1[i] != neuron2[i]) {
47 |       printf("%d: %d %d\n",i,neuron1[i],neuron2[i]);
48 |       error=true;
49 |       //std::cout << i << " " << neuron1[i] << ":" << neuron2[i] << "\n";
50 |     }
51 |   }
52 |   if(error) {
53 |     std::cout << "ERROR: Results DO NOT Match\n";
54 |   } else {
55 |     std::cout << "Results Match\n";
56 |   }
57 | }
58 | 
59 | void compare(VTYPE* neuron1, VTYPE* neuron2, int size) {
60 |   bool error = false;
61 |   for(int i = 0; i < size; ++i) {
62 |     if(neuron1[i] != neuron2[i]) {
63 |       error = true; 
64 |       break;
65 |     }
66 |   }
67 |   if(error) {
68 |     for(int i = 0; i < size; ++i) {
69 |       std::cout << i << " " << neuron1[i] << ":" << neuron2[i];;
70 |       if(neuron1[i] != neuron2[i]) {
71 |         std::cout << " \t\tERROR";
72 |       }
73 |       std::cout << "\n";
74 |     }
75 |   } else {
76 |     std::cout << "results match\n";
77 |   }
78 | }
79 | 
80 | void* aligned_malloc(uint64_t align, uint64_t bytes)  {
81 |   size_t mask = (align-1)^((size_t)-1);
82 |   char* ptr = (((char*)malloc(bytes+align)) + align);
83 |   ptr = (char*) (((size_t)ptr) & mask);
84 | 
85 |   //touch each page to bring into OS! -- yes this takes a long time
86 |   //never mind that, touch each cache line to bring into l2
87 |   for(int i = 0; i < bytes; i+=32) {
88 |     ptr[i]=0;
89 |   }
90 |   return (void*) ptr;
91 | }
92 | 
93 | #endif
94 | 


--------------------------------------------------------------------------------
/workloads/diannao/pool2x2avg.dfg:
--------------------------------------------------------------------------------
 1 | Input: R0
 2 | Input: R1
 3 | Input: R2
 4 | Input: R3 
 5 | Input: R4
 6 | 
 7 | InputVec: prev [0, 1, 2, 3] 
 8 | 
 9 | vsum0 =Add16x4(R0, R1)
10 | vsum1 =Add16x4(R1, R2)
11 | vsum2 =Add16x4(R2, R3)
12 | vsum3 =Add16x4(R3, R4)
13 | 
14 | H0 =HAdd16x4(R0, vsum0)
15 | H1 =HAdd16x4(R1, vsum1)
16 | H2 =HAdd16x4(R2, vsum2)
17 | H3 =HAdd16x4(R3, vsum3)
18 | 
19 | O0=RShf16x4(H0)
20 | O1=RShf16x4(H1)
21 | O2=RShf16x4(H2)
22 | O3=RShf16x4(H3)
23 | 
24 | Output: O0
25 | Output: O1
26 | Output: O2
27 | Output: O3
28 | 
29 | 


--------------------------------------------------------------------------------
/workloads/diannao/pool2x2l4avg.dfg:
--------------------------------------------------------------------------------
 1 | Input: R0
 2 | Input: R1
 3 | Input: R2
 4 | Input: R3 
 5 | Input: R4
 6 | 
 7 | InputVec: P [0, 1, 2, 3] 
 8 | 
 9 | I0 =Add16x4(R0, R1)
10 | I1 =Add16x4(R1, R2)
11 | I2 =Add16x4(R2, R3)
12 | I3 =Add16x4(R3, R4)
13 | 
14 | H0 =Add16x4(P0, I0)
15 | H1 =Add16x4(P1, I1)
16 | H2 =Add16x4(P2, I2)
17 | H3 =Add16x4(P3, I3)
18 | 
19 | O0=RShf2_16x4(H0)
20 | O1=RShf2_16x4(H1)
21 | O2=RShf2_16x4(H2)
22 | O3=RShf2_16x4(H3)
23 | 
24 | OutputVec: I [0, 1, 2, 3]
25 | 
26 | Output: O0
27 | Output: O1
28 | Output: O2
29 | Output: O3
30 | 


--------------------------------------------------------------------------------
/workloads/diannao/pool4x4l2avg.dfg:
--------------------------------------------------------------------------------
 1 | Input: R0
 2 | Input: R1
 3 | Input: R2
 4 | Input: R3 
 5 | Input: R4
 6 | 
 7 | InputVec:  Xa [0, 1] 
 8 | InputVec:  Xb [0, 1] 
 9 | InputVec:  Xc [0, 1] 
10 | 
11 | R12=Add16x4(R1, R2)
12 | R123=Add16x4(R12, R3)
13 | R0123=Add16x4(R0,R123)
14 | R1234=Add16x4(R123, R4)
15 | 
16 | Xd0=R0123
17 | Xd1=R1234
18 | 
19 | Xcd0=Add16x4(Xc0,Xd0)
20 | Xcd1=Add16x4(Xc1,Xd1)
21 | 
22 | Xab0=Add16x4(Xa0,Xb0)
23 | Xab1=Add16x4(Xa1,Xb1)
24 | 
25 | O0_p=Add16x4(Xab0,Xcd0)
26 | O1_p=Add16x4(Xab1,Xcd1)
27 | 
28 | O0=RShf4_16x4(O0_p)
29 | O1=RShf4_16x4(O1_p)
30 | 
31 | Oa0=Xb0
32 | Oa1=Xb1
33 | Ob0=Xc0
34 | Ob1=Xc1
35 | Oc0=R0123
36 | Oc1=R1234
37 | 
38 | OutputVec:  Oa [0, 1] 
39 | OutputVec:  Ob [0, 1] 
40 | OutputVec:  Oc [0, 1] 
41 | 
42 | Output: O0
43 | Output: O1
44 | 


--------------------------------------------------------------------------------
/workloads/diannao/pooling.cpp:
--------------------------------------------------------------------------------
  1 | #include <iostream>
  2 | #include <string>
  3 | #include "dnn.hpp"
  4 | #include <stdlib.h>
  5 | #include <stdio.h>
  6 | 
  7 | 
  8 | using namespace std;
  9 | 
 10 | #if SB
 11 | #include "pool2x2l4avg.h"
 12 | #include "pool4x4l2avg.h"
 13 | #endif
 14 | 
 15 | #include "sim_timing.h"
 16 | 
 17 | #define AVG 1
 18 | 
 19 | //Problem Size
 20 | #ifndef Ny //if Ny is undefined, then assume nothing is defined
 21 |   #define Ny 32
 22 |   #define Nx 32
 23 |   
 24 |   #define Kx 4
 25 |   #define Ky 4
 26 |   //#define Ni 100  //Input Layers == Ouptut Layers
 27 |   #define Ni 128
 28 | #endif
 29 | 
 30 | //slide increment
 31 | #ifndef Sy
 32 |   #define Sx 1
 33 |   #define Sy 1
 34 | #endif
 35 | 
 36 | #ifndef Tii //Tiling Sizes:
 37 |   #define Tii 64
 38 |   #define Ti  16
 39 |   #define Ty  16
 40 |   #define Tx  16
 41 | #endif
 42 | 
 43 | #define NYPAD (Ny+Ky)
 44 | #define NXPAD (Nx+Kx)
 45 | 
 46 | #define NYSCL (Ny/Sy)
 47 | #define NXSCL (Nx/Sx)
 48 | 
 49 | 
 50 | void fill_pooling(VTYPE (&neuron_i)[NYPAD][NXPAD][Ni],
 51 |                   VTYPE (&neuron_n1)[NYSCL][NXSCL][Ni],
 52 |                   VTYPE (&neuron_n2)[NYSCL][NXSCL][Ni]) {
 53 |   int total=0;
 54 |   for(int yy = 0; yy < NYPAD; ++yy) {
 55 |     for(int xx = 0; xx < NXPAD; ++xx) {      
 56 |       for(int ni = 0; ni < Ni; ++ni) {
 57 |         //neuron_i[yy][xx][ni] = xx+yy+ni;
 58 |         neuron_i[yy][xx][ni] = rand() &0x3FFF;
 59 | 
 60 |         //neuron_i[yy][xx][ni] = 1;
 61 |       }
 62 |     }
 63 |   }
 64 |   //takes too long....
 65 |   for(int yy = 0; yy < NYSCL; ++yy) {
 66 |     for(int xx = 0; xx < NXSCL; ++xx) {      
 67 |       for(int ni = 0; ni < Ni; ++ni) {
 68 |         neuron_n1[yy][xx][ni] = 0;
 69 |         neuron_n2[yy][xx][ni] = 0;
 70 |       }
 71 |     }
 72 |   }
 73 | }
 74 | 
 75 | int pooling_layer_blocked(VTYPE (&neuron_i)[NYPAD][NXPAD][Ni],
 76 |                            VTYPE (&neuron_n)[NYSCL][NXSCL][Ni]) {
 77 |   int c=0;
 78 | 
 79 |   VTYPE value[Ni]={0};
 80 |   for (int yy = 0; yy < Ny; yy += Ty) {
 81 |     for (int xx = 0; xx < Nx; xx += Tx) {
 82 |       for (int iii = 0; iii < Ni; iii += Tii) {
 83 |         // — Original code — (excluding ii loop)
 84 |         int yout = yy/Sy;
 85 |         for (int y = yy; y < yy + Ty; y += Sy) {
 86 |           int xout = xx/Sx;
 87 |           for (int x = xx; x < xx + Tx; x += Sx) {
 88 |     
 89 |             for (int ii = iii; ii < iii + Tii; ii += Ti) {
 90 |               for (int i = ii; i < ii + Ti; i++) {
 91 |                 value[i] = 0;
 92 |               }
 93 |     
 94 |               for (int ky = 0; ky < Ky; ky++) {
 95 |                 for (int kx = 0; kx < Kx; kx++) {
 96 |                   //c++;
 97 |                   for (int i = ii; i < ii + Ti; i++) {
 98 |                     #ifdef AVG
 99 |                     value[i] += neuron_i[ky + y][kx + x][i];
100 |                     #else
101 |                     value[i] = max(value[i], neuron_i[ky + y][kx + x][i]);
102 |                     #endif
103 |                   }
104 |                 }
105 |               }
106 | 
107 |               for (int i = ii; i < ii + Ti; i++) {
108 |                 #ifdef AVG
109 |                 neuron_n[yout][xout][i] = value[i] / (Kx * Ky);
110 |                 #else
111 |                 neuron_n[yout][xout][i] = value[i];
112 |                 #endif
113 |               }
114 |             }
115 |             xout++;
116 |           }
117 |           yout++;
118 |         }
119 |       }
120 |     }
121 |   }
122 |   return c;
123 | }
124 | 
125 | void pooling_layer(VTYPE (&neuron_i)[NYPAD][NXPAD][Ni],
126 |                    VTYPE (&neuron_n)[NYSCL][NXSCL][Ni]) {
127 |   VTYPE value[Ni]={0};
128 |   // — Original code —
129 |   int yout = 0;
130 |   for (int y = 0; y < Ny; y += Sy) {
131 |     int xout = 0;
132 |     for (int x = 0; x < Nx; x += Sx) {
133 |       for (int i = 0; i < Ni; i++) {
134 |         value[i]=0;
135 |       }
136 | 
137 |       for (int ky = 0; ky < Ky; ky++) {
138 |         for (int kx = 0; kx < Kx; kx++) {
139 |           for (int i = 0; i < Ni; i++) {
140 |             #ifdef AVG
141 |             value[i] += neuron_i[ky + y][kx + x][i];
142 |             #else
143 |             value[i] = max(value[i], neuron_i[ky + y][kx + x][i]);
144 |             #endif
145 |           }
146 |         }
147 |       }
148 | 
149 |       for (int i = 0; i < Ni; i++) {
150 |         #ifdef AVG
151 |         neuron_n[yout][xout][i] = value[i] / (Kx * Ky);
152 |         #else
153 |         neuron_n[yout][xout][i] = value[i];
154 |         #endif
155 |       }
156 |       xout++;
157 |     }
158 |     yout++;
159 |   }
160 | }
161 | 
162 | #if SB
163 | 
164 | int pooling_layer_blocked_sb_4x4_sx1_sy1(VTYPE (&neuron_i)[NYPAD][NXPAD][Ni],
165 |                                          VTYPE (&neuron_n)[NYSCL][NXSCL][Ni]) {
166 |   int c=0;
167 | 
168 |   int pipedepth=16;
169 |   int pipedepth_bytes=(pipedepth*8);
170 | 
171 |   SB_CONFIG(pool4x4l2avg_config, pool4x4l2avg_size); 
172 | 
173 |   VTYPE value[Ni]={0};
174 |   for (int yy = 0; yy < Ny; yy += Ty) {
175 |     for (int xx = 0; xx < Nx; xx += Tx) {
176 |       //cout << dec << "\n yy: " << yy << " xx: " << xx << "\n";
177 | 
178 | 
179 |       for (int iii = 0; iii < Ni; iii += pipedepth*4) {
180 |         // — Original code — (excluding ii loop)
181 |         int yout = yy/Sy;
182 |         
183 |         for (int y = yy; y < yy + Ty; y += 2) { // two rows at a time
184 |           //int xout = xx/Sx;
185 |             //upper -- xx + Tx
186 |             //lower - xx
187 | 
188 |           //cout << dec << "\n yy: " << yy << " xx: " << xx << " iii: " << iii << "\n";
189 | 
190 |           //First three loops produce garbage
191 | 
192 |           SB_CONST(P_pool4x4l2avg_Xa, 0, 2*pipedepth*1); //Initialize garbage inputs
193 |           SB_CONST(P_pool4x4l2avg_Xb, 0, 2*pipedepth*1);
194 |           SB_CONST(P_pool4x4l2avg_Xc, 0, 2*pipedepth*1);
195 | 
196 |           SB_DMA_READ(&neuron_i[y+0][xx][iii],Ni*sizeof(VTYPE),pipedepth_bytes,Tx+3,P_pool4x4l2avg_R0);
197 |           SB_DMA_READ(&neuron_i[y+1][xx][iii],Ni*sizeof(VTYPE),pipedepth_bytes,Tx+3,P_pool4x4l2avg_R1);
198 |           SB_DMA_READ(&neuron_i[y+2][xx][iii],Ni*sizeof(VTYPE),pipedepth_bytes,Tx+3,P_pool4x4l2avg_R2);
199 |           SB_DMA_READ(&neuron_i[y+3][xx][iii],Ni*sizeof(VTYPE),pipedepth_bytes,Tx+3,P_pool4x4l2avg_R3);
200 |           SB_DMA_READ(&neuron_i[y+4][xx][iii],Ni*sizeof(VTYPE),pipedepth_bytes,Tx+3,P_pool4x4l2avg_R4);
201 | 
202 |           //each rec has a slightly different number of iterations
203 |           SB_RECURRENCE(P_pool4x4l2avg_Oa,P_pool4x4l2avg_Xa,2*pipedepth*(Tx+2));  
204 |           SB_RECURRENCE(P_pool4x4l2avg_Ob,P_pool4x4l2avg_Xb,2*pipedepth*(Tx+2));
205 |           SB_RECURRENCE(P_pool4x4l2avg_Oc,P_pool4x4l2avg_Xc,2*pipedepth*(Tx+2));
206 | 
207 |           SB_GARBAGE(P_pool4x4l2avg_O0,pipedepth*3);
208 |           SB_GARBAGE(P_pool4x4l2avg_O1,pipedepth*3);
209 | 
210 |           SB_DMA_WRITE(P_pool4x4l2avg_O0,Ni*sizeof(VTYPE),pipedepth_bytes,Tx,&neuron_n[y+0][xx][iii]);
211 |           SB_DMA_WRITE(P_pool4x4l2avg_O1,Ni*sizeof(VTYPE),pipedepth_bytes,Tx,&neuron_n[y+1][xx][iii]);
212 | 
213 |           SB_GARBAGE(P_pool4x4l2avg_Oa,2*pipedepth*1);
214 |           SB_GARBAGE(P_pool4x4l2avg_Ob,2*pipedepth*1);
215 |           SB_GARBAGE(P_pool4x4l2avg_Oc,2*pipedepth*1);
216 |         }
217 |       }
218 |     }
219 |   }
220 |   SB_WAIT_ALL();
221 |   return c;
222 | }
223 | 
224 | 
225 | int pooling_layer_blocked_sb_2x2_sx1_sy1(VTYPE (&neuron_i)[NYPAD][NXPAD][Ni],
226 |                                          VTYPE (&neuron_n)[NYSCL][NXSCL][Ni]) {
227 |   int c=0;
228 | 
229 |   int pipedepth=16;
230 |   int pipedepth_bytes=(pipedepth*8);
231 | 
232 |   SB_CONFIG(pool2x2l4avg_config, pool2x2l4avg_size); 
233 | 
234 |   for (int yy = 0; yy < Ny; yy += Ty) {
235 |     for (int xx = 0; xx < Nx; xx += Tx) {
236 |       for (int iii = 0; iii < Ni; iii += pipedepth*4) {
237 |         // — Original code — (excluding ii loop)
238 |         int yout = yy/Sy;
239 |         for (int y = yy; y < yy + Ty; y += 4) { // two rows at a time
240 |           //int xout = xx/Sx;
241 |             //upper -- xx + Tx
242 |             //lower - xx
243 | 
244 |           //cout << dec << "\n yy: " << yy << " xx: " << xx << " iii: " << iii << "\n";
245 | 
246 |           //First three loops produce garbage
247 | 
248 |           SB_CONST(P_pool2x2l4avg_P, 0, 4*pipedepth*1); //Initialize garbage inputs
249 | 
250 |           SB_DMA_READ(&neuron_i[y+0][xx][iii],Ni*sizeof(VTYPE),pipedepth_bytes,Tx+1,P_pool2x2l4avg_R0);
251 |           SB_DMA_READ(&neuron_i[y+1][xx][iii],Ni*sizeof(VTYPE),pipedepth_bytes,Tx+1,P_pool2x2l4avg_R1);
252 |           SB_DMA_READ(&neuron_i[y+2][xx][iii],Ni*sizeof(VTYPE),pipedepth_bytes,Tx+1,P_pool2x2l4avg_R2);
253 |           SB_DMA_READ(&neuron_i[y+3][xx][iii],Ni*sizeof(VTYPE),pipedepth_bytes,Tx+1,P_pool2x2l4avg_R3);
254 |           SB_DMA_READ(&neuron_i[y+4][xx][iii],Ni*sizeof(VTYPE),pipedepth_bytes,Tx+1,P_pool2x2l4avg_R4);
255 | 
256 |           SB_RECURRENCE(P_pool2x2l4avg_I,P_pool2x2l4avg_P,4*pipedepth*(Tx));  
257 | 
258 |           SB_GARBAGE(P_pool2x2l4avg_O0,pipedepth*1);
259 |           SB_GARBAGE(P_pool2x2l4avg_O1,pipedepth*1);
260 |           SB_GARBAGE(P_pool2x2l4avg_O2,pipedepth*1);
261 |           SB_GARBAGE(P_pool2x2l4avg_O3,pipedepth*1);
262 | 
263 |           SB_DMA_WRITE(P_pool2x2l4avg_O0,Ni*sizeof(VTYPE),pipedepth_bytes,Tx,&neuron_n[y+0][xx][iii]);
264 |           SB_DMA_WRITE(P_pool2x2l4avg_O1,Ni*sizeof(VTYPE),pipedepth_bytes,Tx,&neuron_n[y+1][xx][iii]);
265 |           SB_DMA_WRITE(P_pool2x2l4avg_O2,Ni*sizeof(VTYPE),pipedepth_bytes,Tx,&neuron_n[y+2][xx][iii]);
266 |           SB_DMA_WRITE(P_pool2x2l4avg_O3,Ni*sizeof(VTYPE),pipedepth_bytes,Tx,&neuron_n[y+3][xx][iii]);
267 | 
268 |           SB_GARBAGE(P_pool2x2l4avg_I,4*pipedepth*1);
269 | 
270 |         }
271 |       }
272 |     }
273 |   }
274 |   SB_WAIT_ALL();
275 |   return c;
276 | }
277 | 
278 | int pooling_layer_blocked_sb_2x2_sx1_sy1_full_ni(VTYPE (&neuron_i)[NYPAD][NXPAD][Ni],
279 |                                                  VTYPE (&neuron_n)[NYSCL][NXSCL][Ni]) {
280 |   int c=0;
281 | 
282 |   int pipedepth=16;
283 |   int pipedepth_bytes=(pipedepth*8);
284 |   int DP_WIDTH=8;
285 | 
286 |   SB_CONFIG(pool2x2l4avg_config, pool2x2l4avg_size); 
287 | 
288 |   for (int yy = 0; yy < Ny; yy += Ty) {
289 |     for (int xx = 0; xx < Nx; xx += Tx) {
290 |       for (int y = yy; y < yy + Ty; y += 4) { // two rows at a time
291 |         //cout << dec << "\n yy: " << yy << " xx: " << xx << "\n";
292 | 
293 |         //First three loops produce garbage
294 | 
295 |         int ni_elem = Ni*sizeof(VTYPE)/DP_WIDTH;
296 |         SB_CONST(P_pool2x2l4avg_P, 0, 4*ni_elem); 
297 | 
298 |         SB_DMA_READ(&neuron_i[y+0][xx][0],Ni*sizeof(VTYPE),Ni*sizeof(VTYPE),Tx+1,P_pool2x2l4avg_R0);
299 |         SB_DMA_READ(&neuron_i[y+1][xx][0],Ni*sizeof(VTYPE),Ni*sizeof(VTYPE),Tx+1,P_pool2x2l4avg_R1);
300 |         SB_DMA_READ(&neuron_i[y+2][xx][0],Ni*sizeof(VTYPE),Ni*sizeof(VTYPE),Tx+1,P_pool2x2l4avg_R2);
301 |         SB_DMA_READ(&neuron_i[y+3][xx][0],Ni*sizeof(VTYPE),Ni*sizeof(VTYPE),Tx+1,P_pool2x2l4avg_R3);
302 |         SB_DMA_READ(&neuron_i[y+4][xx][0],Ni*sizeof(VTYPE),Ni*sizeof(VTYPE),Tx+1,P_pool2x2l4avg_R4);
303 | 
304 |         SB_RECURRENCE(P_pool2x2l4avg_I,P_pool2x2l4avg_P,4*ni_elem*Tx);  
305 | 
306 |         SB_GARBAGE(P_pool2x2l4avg_O0,ni_elem);
307 |         SB_GARBAGE(P_pool2x2l4avg_O1,ni_elem);
308 |         SB_GARBAGE(P_pool2x2l4avg_O2,ni_elem);
309 |         SB_GARBAGE(P_pool2x2l4avg_O3,ni_elem);
310 | 
311 |         SB_DMA_WRITE(P_pool2x2l4avg_O0,Ni*sizeof(VTYPE),Ni*sizeof(VTYPE),Tx,&neuron_n[y+0][xx][0]);
312 |         SB_DMA_WRITE(P_pool2x2l4avg_O1,Ni*sizeof(VTYPE),Ni*sizeof(VTYPE),Tx,&neuron_n[y+1][xx][0]);
313 |         SB_DMA_WRITE(P_pool2x2l4avg_O2,Ni*sizeof(VTYPE),Ni*sizeof(VTYPE),Tx,&neuron_n[y+2][xx][0]);
314 |         SB_DMA_WRITE(P_pool2x2l4avg_O3,Ni*sizeof(VTYPE),Ni*sizeof(VTYPE),Tx,&neuron_n[y+3][xx][0]);
315 | 
316 |         SB_GARBAGE(P_pool2x2l4avg_I,4*ni_elem);
317 |       }
318 |     }
319 |   }
320 |   SB_WAIT_ALL();
321 |   return c;
322 | }
323 | #endif
324 | 
325 | int test_layer(VTYPE (&neuron_i)[NYPAD][NXPAD][Ni],
326 |                VTYPE (&neuron_n)[NYSCL][NXSCL][Ni]) {
327 |   begin_roi();
328 | #ifdef SB
329 |   #if Kx == 4
330 |     pooling_layer_blocked_sb_4x4_sx1_sy1(neuron_i,neuron_n);
331 |   #elif Kx == 2
332 |     #if Ni < 64
333 |       pooling_layer_blocked_sb_2x2_sx1_sy1_full_ni(neuron_i,neuron_n);
334 |     #else
335 |       pooling_layer_blocked_sb_2x2_sx1_sy1(neuron_i,neuron_n);
336 |     #endif
337 |   #else 
338 |     #error "Kx must be 2 or 4"
339 |   #endif
340 | #else
341 |    pooling_layer_blocked(neuron_i,neuron_n);
342 | #endif
343 |   end_roi();
344 | }
345 | 
346 | 
347 | int main(int argc, char** argv) {
348 |   //Arrays:
349 |   //VTYPE  neuron_i[NYPAD][NXPAD][Ni];
350 |   //VTYPE  neuron_n[NYSCL][NXSCL][Ni];
351 |   //VTYPE neuron_n2[NYSCL][NXSCL][Ni];
352 |   
353 |   VTYPE  (*neuron_i)[NYPAD][NXPAD][Ni];
354 |   VTYPE  (*neuron_n)[NYSCL][NXSCL][Ni];
355 |   VTYPE (*neuron_n2)[NYSCL][NXSCL][Ni];
356 | 
357 |   //cout << "allocating memory\n";
358 |   neuron_i  = (VTYPE (*)[NYPAD][NXPAD][Ni])aligned_malloc(64,NYPAD*NXPAD*Ni*sizeof(VTYPE)+64);
359 |   neuron_n  = (VTYPE (*)[NYSCL][NXSCL][Ni])aligned_malloc(64,NYSCL*NXSCL*Ni*sizeof(VTYPE)+64);
360 |   neuron_n2 = (VTYPE (*)[NYSCL][NXSCL][Ni])aligned_malloc(64,NYSCL*NXSCL*Ni*sizeof(VTYPE)+64);
361 | 
362 |   //cout << "NYSCL: " << NYSCL << "\n";
363 |   //cout << "NXSCL: " << NXSCL << "\n";
364 |   //cout << "Ni:    " << Ni    << "\n";
365 | 
366 |   //cout << "bound i\t"  << hex << &(*neuron_i)[0][0][0]  << " to " << &(*neuron_i)[NYPAD-1][NXPAD-1][Ni-1] << "\n";
367 |   //cout << "bound n1\t" << hex << &(*neuron_n)[0][0][0]  << " to " << &(*neuron_n)[NYSCL-1][NXSCL-1][Ni-1] << "\n";
368 |   //cout << "bound n2\t" << hex << &(*neuron_n2)[0][0][0] << " to " << &(*neuron_n2)[NYSCL-1][NXSCL-1][Ni-1] << "\n";
369 | 
370 |   //cout << "0,0,1\t" << hex << &(*neuron_n2)[0][0][1] << "\n";
371 |   //cout << "0,1,0\t" << hex << &(*neuron_n2)[0][1][0] << "\n";
372 |   //cout << "1,0,0\t" << hex << &(*neuron_n2)[1][0][0] << "\n";
373 | 
374 |   //cout << "isize: " << NYPAD*NXPAD*Ni*sizeof(VTYPE) << "\n";
375 |   //cout << "nsize: " << NYSCL*NXSCL*Ni*sizeof(VTYPE) << "\n";
376 | 
377 | 
378 |   if(argc==3) {
379 | 
380 |   //cout << "Did nothing\n";
381 | 
382 | //  } else if(argc==2 && string(argv[1])=="perf") {
383 |   } else if(argc==2) {
384 |     test_layer(*neuron_i,*neuron_n);
385 |     //cout << "Perf Run Complete\n";
386 |   } else {
387 |     cout << "initializing arrays\n";
388 |     fill_pooling(*neuron_i,*neuron_n,*neuron_n2);
389 |     cout << "starting computation\n";
390 | 
391 |     int calc = 0;
392 |     pooling_layer(*neuron_i,*neuron_n);
393 |     test_layer(*neuron_i,*neuron_n2);
394 |     cout << "computation complete!\n";  
395 | 
396 |     if(calc > 0) {
397 |       cout << "calc: " << calc << "\n";
398 |     }
399 |     compare_short((VTYPE*)*neuron_n,(VTYPE*)*neuron_n2,NYSCL*NXSCL*Ni);
400 |     cout << "adds: " << NYSCL*NXSCL*Ni*Ky*Kx <<  "\n";
401 |     cout << "argc:" << argc << "\n";
402 | //  cout << "mult-block:  " << calc.first   << " sigmoid-block: " << calc.second  << "\n";
403 | //  cout << "mult-orig:  "  << calc2.first  << " sigmoid-orig:  " << calc2.second << "\n";
404 | //
405 | //  int n_outputs= Ny/Sy * Nx/Sx * Nn;
406 | //  cout << "mult-correct: " << n_outputs*Ni*Kx*Ky
407 | //       << " sigmoid-correct: "  << n_outputs << "\n";
408 |   }
409 |   sb_stats();
410 | 
411 | }
412 | 
413 | 


--------------------------------------------------------------------------------
/workloads/diannao/red16to1sig.dfg:
--------------------------------------------------------------------------------
 1 | InputVec: N [0, 1, 2, 3]  
 2 | InputVec: S [0, 1, 2, 3, 4, 5, 6, 7]  
 3 | InputVec: acc [0, 1]
 4 | Input: pred
 5 | 
 6 | #compute lanes "A" and "B"
 7 | 
 8 | AM0 =Mul16x4(N0, S0)
 9 | AM1 =Mul16x4(N1, S1)
10 | AM2 =Mul16x4(N2, S2)
11 | AM3 =Mul16x4(N3, S3)
12 | 
13 | AS0 =Add16x4(AM0, AM1)
14 | AS1 =Add16x4(AM2, AM3)
15 | 
16 | AS2 =Add16x4(AS0, AS1)
17 | 
18 | AR = Red16x4(AS2, acc0)
19 | 
20 | out0 = Sig16(AR, pred)
21 | 
22 | 
23 | 
24 | BM0 =Mul16x4(N0, S4)
25 | BM1 =Mul16x4(N1, S5)
26 | BM2 =Mul16x4(N2, S6)
27 | BM3 =Mul16x4(N3, S7)
28 | 
29 | BS0 =Add16x4(BM0, BM1)
30 | BS1 =Add16x4(BM2, BM3)
31 | 
32 | BS2 =Add16x4(BS0, BS1)
33 | 
34 | BR = Red16x4(BS2, acc1)
35 | 
36 | out1 = Sig16(BR, pred)
37 | 
38 | OutputVec: out [0, 1]
39 | 
40 | 


--------------------------------------------------------------------------------
/workloads/diannao/red16to1sigx2.dfg:
--------------------------------------------------------------------------------
 1 | InputVec: NA [0, 1, 2, 3]
 2 | InputVec: SA [0, 1, 2, 3]  
 3 | InputVec: NB [0, 1, 2, 3]
 4 | InputVec: SB [0, 1, 2, 3]  
 5 | 
 6 | InputVec: acc [0, 1]
 7 | InputVec: pred [0, 1]
 8 | 
 9 | #compute lanes "A" and "B"
10 | 
11 | MA0 =Mul16x4(NA0, SA0)
12 | MA1 =Mul16x4(NA1, SA1)
13 | MA2 =Mul16x4(NA2, SA2)
14 | MA3 =Mul16x4(NA3, SA3)
15 | 
16 | SA0 =Add16x4(MA0, MA1)
17 | SA1 =Add16x4(MA2, MA3)
18 | 
19 | SA2 =Add16x4(SA0, SA1)
20 | 
21 | RA = Red16x4(SA2, acc0)
22 | 
23 | out0 = Sig16(RA,pred0)
24 | 
25 | 
26 | 
27 | MB0 =Mul16x4(NB0, SB0)
28 | MB1 =Mul16x4(NB1, SB1)
29 | MB2 =Mul16x4(NB2, SB2)
30 | MB3 =Mul16x4(NB3, SB3)
31 | 
32 | SB0 =Add16x4(MB0, MB1)
33 | SB1 =Add16x4(MB2, MB3)
34 | 
35 | SB2 =Add16x4(SB0, SB1)
36 | 
37 | RB = Red16x4(SB2, acc1)
38 | 
39 | out1 = Sig16(RB,pred1)
40 | 
41 | OutputVec: out [0, 1]
42 | 
43 | 
44 | 


--------------------------------------------------------------------------------
/workloads/diannao/red32to1sig.dfg:
--------------------------------------------------------------------------------
 1 | InputVec: N [0, 1, 2, 3, 4, 5, 6, 7]  # 8Wide 1Deep
 2 | InputVec: S [0, 1, 2, 3, 4, 5, 6, 7]  # 8Wide 1Deep
 3 | Input: acc
 4 | Input: pred
 5 | 
 6 | M0 =Mul16x4(N0, S0)
 7 | M1 =Mul16x4(N1, S1)
 8 | M2 =Mul16x4(N2, S2)
 9 | M3 =Mul16x4(N3, S3)
10 | M4 =Mul16x4(N4, S4)
11 | M5 =Mul16x4(N5, S5)
12 | M6 =Mul16x4(N6, S6)
13 | M7 =Mul16x4(N7, S7)
14 | 
15 | A0 =Add16x4(M0, M1)
16 | A1 =Add16x4(M2, M3)
17 | A2 =Add16x4(M4, M5)
18 | A3 =Add16x4(M6, M7)
19 | 
20 | A8 =Add16x4(A0, A1)
21 | A9 =Add16x4(A2, A3)
22 | 
23 | A10 = Add16x4(A8, A9)
24 | 
25 | R = Red16x4(A10, acc)
26 | 
27 | out=Sig16(R,    pred)
28 | 
29 | Output: out
30 | 
31 | 
32 | 


--------------------------------------------------------------------------------
/workloads/diannao/red8to1sig.dfg:
--------------------------------------------------------------------------------
 1 | InputVec: N [0, 1]  
 2 | InputVec: S [0, 1, 2, 3]  
 3 | InputVec: acc [0, 1]
 4 | Input: pred
 5 | 
 6 | #compute lanes "A" and "B"
 7 | 
 8 | AM0 =Mul16x4(N0, S0)
 9 | AM1 =Mul16x4(N1, S1)
10 | 
11 | AS0 =Add16x4(AM0, AM1)
12 | 
13 | AR = Red16x4(AS0, acc0)
14 | 
15 | out0 = Sig16(AR, pred)
16 | 
17 | 
18 | 
19 | BM0 =Mul16x4(N0, S2)
20 | BM1 =Mul16x4(N1, S3)
21 | 
22 | BS0 =Add16x4(BM0, BM1)
23 | 
24 | BR = Red16x4(BS0, acc1)
25 | 
26 | out1 = Sig16(BR, pred)
27 | 
28 | OutputVec: out [0, 1]
29 | 
30 | 


--------------------------------------------------------------------------------
/workloads/diannao/run-all.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | args=""
 4 | #args="no run"
 5 | #args="perf"
 6 | 
 7 | for i in *[0-9]p *[0-9]sb; do
 8 |   echo -n "./$i $args "
 9 |   ./$i $args
10 | #   ticks=`echo $out | cut -d: -f2`
11 | #   echo $ticks
12 | done
13 | 


--------------------------------------------------------------------------------
/workloads/diannao/sim_timing.h:
--------------------------------------------------------------------------------
 1 | #ifdef __x86_64__
 2 | static __inline__ uint64_t rdtsc(void) {
 3 |   unsigned a, d;
 4 |   //asm("cpuid");
 5 |   asm volatile("rdtsc" : "=a" (a), "=d" (d));
 6 | 
 7 |   return (((uint64_t)a) | (((uint64_t)d) << 32));
 8 | }
 9 | 
10 | static uint64_t ticks;
11 | __attribute__ ((noinline))  void begin_roi() {
12 |   ticks=rdtsc();
13 | }
14 | __attribute__ ((noinline))  void end_roi()   {
15 |   ticks=(rdtsc()-ticks);
16 |   printf("ticks: %ld\n",ticks);
17 | }
18 | __attribute__ ((noinline)) static void sb_stats()   {
19 | }
20 | __attribute__ ((noinline)) static void sb_verify()   {
21 | }
22 | 
23 | #else
24 | __attribute__ ((noinline)) static void begin_roi() {
25 |     __asm__ __volatile__("add x0, x0, 1"); \
26 | }
27 | __attribute__ ((noinline)) static void end_roi()   {
28 |     __asm__ __volatile__("add x0, x0, 2"); \
29 | }
30 | __attribute__ ((noinline)) static void sb_stats()   {
31 |     __asm__ __volatile__("add x0, x0, 3"); \
32 | }
33 | __attribute__ ((noinline)) static void sb_verify()   {
34 |     __asm__ __volatile__("add x0, x0, 4"); \
35 | }
36 | 
37 | #endif
38 | 
39 | 


--------------------------------------------------------------------------------
/workloads/diannao/softbrain.hpp:
--------------------------------------------------------------------------------
 1 | /*
 2 | * MM Headerfile for softbrain parameters relevant to DNN
 3 | */
 4 | 
 5 | // Scratchpad info
 6 | #define SCRATCHSIZE 2048 //  number of dnn elts that fit in scratch pad
 7 | #define SCRATCHSTART 0 // logical address for start of scratch
 8 | 
 9 | // Virtual port interfaces
10 | // input ports for pipe0
11 | #define INPUTNEURON0 0  // Wide & deep      
12 | #define INPUTWEIGHT0 1  // Wide & deep
13 | #define INPUTACC0    2  // Deep
14 | #define INPUTPRED0   3  // Deep
15 | // input ports for pipe1
16 | #define INPUTNEURON1 4  // Wide & deep           
17 | #define INPUTWEIGHT1 5  // Wide & deep
18 | #define INPUTACC1    6  // Deep
19 | #define INPUTPRED1   7  // Deep
20 | 
21 | // output ports
22 | #define OUTPUT0 8  // Output for pipe 0: Deep (pipe is 16:1 reduce)
23 | #define OUTPUT1 9  // Output for pipe 1: Deep
24 | 
25 | 


--------------------------------------------------------------------------------